diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-03-14 15:24:23 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-03-14 15:24:23 +0100 | 
| commit | 15a56fcb5f29b8507298144a835a819de652e788 (patch) | |
| tree | 1527d3d5996389f00f9f0afa2a4fa1122a794830 | |
| parent | 6fcfe0a5cf8f53658e50e346076768eec229e695 (diff) | |
Prepare DFA
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | cpp.cpp | 9 | ||||
| -rw-r--r-- | cpp.h | 1 | ||||
| -rw-r--r-- | dfa.cpp | 1 | ||||
| -rw-r--r-- | dfa.h | 10 | ||||
| -rw-r--r-- | lexer.cpp | 283 | ||||
| -rw-r--r-- | lexer.h | 41 | ||||
| -rw-r--r-- | test-dfa.cpp | 17 | ||||
| -rw-r--r-- | test-lexer.cpp | 110 | 
9 files changed, 33 insertions, 444 deletions
| @@ -47,9 +47,12 @@ SRC=\      cpp.cpp \      grammer.cpp \      lexer.cpp \ +    test-lexer.cpp \      minicc.cpp \ +    test-minicc.cpp \      cppbnf.cpp \ -    test-lexer.cpp \ +    dfa.cpp \ +    test-dfa.cpp \      googletest/src/gtest-all.cpp \      googlemock/src/gmock-all.cpp @@ -186,15 +186,6 @@ std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<Tree   return result;  } -// TODO: remove in favor of tokens_from_pptokens() -void CPP::PreprocessorTokensToTokens(std::vector<Token>& tokens) -{ - for (auto& i : tokens) { -  if (i.type == "preprocessing-op-or-punc") -   i.type = i.value; - } -} -  // Phase 7.b: Grammar Analysis  std::pair<index_t, std::vector<Gram::TreeNode>> analysis(std::vector<Token>)  { @@ -13,7 +13,6 @@ CPP();  ~CPP();  std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree); -static void PreprocessorTokensToTokens(std::vector<Token>& tokens); // obsolete  // phases of translation, according to standard  void source_charset_map(); // phase 1 @@ -0,0 +1 @@ +#include "dfa.h" @@ -0,0 +1,10 @@ +#pragma once + +namespace { +}; + +// deterministic finite automaton +class DFA { +public: + DFA(){} +}; @@ -2,294 +2,13 @@  using namespace Lex; -void Tree::clear() { - nodes.clear(); - root = 0; - last = 0; - node_num = 0; -} - -// Type of lexical token -std::string Tree::GetType() { - if (node_num > 0 && nodes[root].child_names.size() == 1) -  return nodes[root].child_names[0]; - return ""; -} - -bool Tree::Valid(const std::string& Top) const { - // A token is non empty - if (node_num == 0) -  return false; - - // Start symbol on top - auto rootNode{nodes.find(root)}; - if (rootNode == nodes.end()) -  throw std::runtime_error("Node not found: "s + std::to_string(root)); -  - if (rootNode->second.name != Top) -  return false; - - // All nodes filled (implies all leaves are terminal) - for (const auto& [index, node]: nodes) { -  if (node.childs.size() < node.child_names.size()) -   return false; // node not filled - } - - return true; -} - -bool Tree::AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - node_num ++; - root = node_num; - last = node_num; - std::string node_name(1, char(c)); - - auto reverseRule{reverseBNF.find(node_name)}; - if (reverseRule == reverseBNF.end()) -  throw std::runtime_error("Reverse rule not found for "s + node_name); - - auto rule{bnf.find(node_name)}; - if (rule != bnf.end()) { // multiple variants! -  throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); - } - nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); - return true; -} - -std::vector<TreeNode> Tree::getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - std::vector<TreeNode> result; // default: empty - - auto& root_name {nodes[root].name}; - auto bnfParents {reverseBNF.find(root_name)}; - if (bnfParents == reverseBNF.end()) -  return result; - - for (const auto& parent_node_name : bnfParents->second) { -  auto lists {bnf.at(parent_node_name)}; -  for (const auto& list : lists) { -   if (list.size() > 0 && list[0] == root_name) { -    TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name}; -    result.push_back(node); -   } -  } - } - - return result; -} - -index_t Tree::GetLast() { - index_t result {root}; - - while(result != 0 && nodes[result].childs.size() >= 2) { -  result = nodes[result].childs[nodes[result].childs.size() - 1]; - } - - return result; -} - -void Tree::AddRootNode(const TreeNode& newRootNode) { - node_num++; - nodes[node_num] = newRootNode; - root = node_num; - last = node_num; -} - -void Tree::RemoveRootNode() { - root = nodes[root].childs[0]; - nodes.erase(node_num); - node_num--; - last = GetLast(); -} - -// Path from leaf to root -std::vector<std::string> Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - std::vector<std::string> result; - - while (a != b) { -  auto parents {reverseBNF.find(a)}; -  if (parents == reverseBNF.end()) -   return {}; - -  bool hit{false}; -  for (const auto& parent : parents->second) { -   for (const auto& list : bnf.at(parent)) { -    if (list.size() > 0 && list[0] == a) { -     if (!hit) { -      result.push_back(a); -      a = parent; -      hit = true; -     } else -      throw std::runtime_error("Double match for "s + parent + "/"s + a); -    } -   } -  } - } - if (a == b) { -  result.push_back(a); - } - return result; -} - -index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) -{ - TreeNode& parent {nodes[parent_index]}; - node_num++; - index_t index = node_num; - parent.childs.push_back(index); - std::vector<std::string> child_names; - auto rule {bnf.find(name)}; - if (rule != bnf.end()) { -  for (auto& list : rule->second) { -   if (list.size() > 0 && list[0] == child_name) -    child_names = list; -  } - } - nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); - //root stays - last = GetLast(); - - return index; -} - -void Tree::AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - for (int i = path.size() - 1; i >= 0; i--) { -  std::string child_name; -  if (i > 0) -   child_name = path[i - 1]; -  current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); - } -} - -// try to add character to tree -bool Tree::Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - if (nodes.empty()) { // first node -  return AddFirstNode(c, bnf, reverseBNF); - } else { // at least one character is already present -  // Traverse tree until partially filled node found -  // or new node can be added -  index_t current_index{last}; - -  while (current_index != 0) { -   TreeNode& node {nodes[current_index]}; -   if (node.childs.size() < node.child_names.size()) { // partially filled node -    std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); -    if (list.size() > 0) { -     AddPath(list, current_index, bnf, reverseBNF); -     return true; -    } else { -     return false; // The path a->b is not available via bnf -    } -   } -   current_index = node.parent; -  } - -  // Add node at root - -  std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF); -  if (parent_nodes.size() == 0) -   throw std::runtime_error("Couldn't add new parent node."); - -  for (const auto &i : parent_nodes) { -   AddRootNode(i); -   if (Add(c, bnf, reverseBNF)) -    return true; -   RemoveRootNode(); -  } - - } - return false; -} - -// add path to start symbol -void Tree::Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - if (nodes.empty()) // only handle non-empty trees -  return; - - while (true) { -  std::string& old_root_name { nodes[root].name }; // current root node name - -  auto parents {reverseBNF.find(old_root_name)}; -  if (parents != reverseBNF.end()) { // parents in bnf available -   bool hit{false}; -   for (auto& parent : parents->second) { -    for (const auto& list : bnf.at(parent)) { -     if (list.size() == 1 && list[0] == old_root_name) { -      if (!hit) { -       const std::string& new_root_name {parent}; -       // Add new TreeNode in the direction to root: -       // New root with 1 child (old root) -       nodes.emplace(++node_num, -                    TreeNode{0, // parent -                             std::vector<index_t>{root}, // child indices -                             std::vector<std::string>{old_root_name}, // child names -                             new_root_name // name -                    }); -       nodes[root].parent = node_num; -       root = node_num; -       // this->last stays -       hit = true; -      } else -       throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); -     } -    } -   } -   if (!hit) -    break; -  } else -   break; - } -} - -void Lexer::FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result) -{ - tree.Resolve(bnf, ReverseBNF); - if (tree.Valid(Top)) { -  result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); -  token.clear(); - } - tree.clear(); -} - -Lexer::Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} +Lexer::Lexer(const BNF& bnf, const std::string& Top)  {  }  std::vector<Token> Lexer::Lex(const std::string& s)  { - location = {1, 0}; -   std::vector<Token> result; - std::string token; - - std::string Whitespace{"\t \n\r"}; - Tree tree; - - for (size_t pos{0}; pos < s.size(); pos++) { -  char c{s[pos]}; -  if (c == '\n') { -   location.column = 0; -   location.line++; -  } else if (std::isprint(c)) { -   location.column++; -  } - -  //std::cout << "Char: |" << c << "|" << std::endl; -  if (Whitespace.find(c) != std::string::npos) { // found whitespace character -   // evaluate token up to now and skip whitespace -   FinalizeTree(tree, token, result); -  } else { // no whitespace: try to add to tree -   if (!tree.Add(c, bnf, ReverseBNF)) { -    FinalizeTree(tree, token, result); -    if (!tree.Add(c, bnf, ReverseBNF)) -     throw std::runtime_error("Parse error"); -   } - -   token.push_back(c); -  } - } - - // Final evaluation of last token - FinalizeTree(tree, token, result);   return result;  } @@ -5,50 +5,9 @@  namespace Lex { -struct TreeNode { - index_t parent{}; - std::vector<index_t> childs; // fill char by char - std::vector<std::string> child_names; // fill always - std::string name; -}; - -class Tree { -private: - std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1 - index_t node_num{}; - index_t root{}; - index_t last{}; - -public: - void clear(); - std::string GetType(); - bool Valid(const std::string& Top) const; - bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - index_t GetLast(); - void AddRootNode(const TreeNode& newRootNode); - void RemoveRootNode(); - std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); - void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); -}; -  class Lexer  { -private: - const BNF &bnf; - const std::string& Top; - - Location location{1, 0}; - - std::map<std::string, std::set<std::string>> ReverseBNF; - - // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result); -  public:   Lexer(const BNF& bnf, const std::string& Top);   std::vector<Token> Lex(const std::string& s); diff --git a/test-dfa.cpp b/test-dfa.cpp new file mode 100644 index 0000000..07166f0 --- /dev/null +++ b/test-dfa.cpp @@ -0,0 +1,17 @@ +#include "debug.h" +#include "dfa.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +class DFATest: public ::testing::Test { +protected: + DFATest(){ +  debug = false; + } + ~DFATest() override {} +}; + +TEST_F(DFATest, BNF) { +} + diff --git a/test-lexer.cpp b/test-lexer.cpp index 735f670..a94e550 100644 --- a/test-lexer.cpp +++ b/test-lexer.cpp @@ -28,115 +28,5 @@ protected:  };  TEST_F(Test, BNF) { - std::string LexTop{"preprocessing-token"}; - BNF LexBNF{ -  {"preprocessing-token", {{"identifier"}, -                           {"preprocessing-op-or-punc"}, -                           {"pp-number"}}}, - -  {"identifier", {{"identifier-nondigit"}, -                  {"identifier", "identifier-nondigit"}, -                  {"identifier", "digit"}}}, -  {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, -  {"identifier-nondigit", -    {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, -     {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, -     {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, -     {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, -  {"preprocessing-op-or-punc", {{";"}, -                                {"="}}}, -  {"pp-number", {{"digit"}, -                 {"pp-number", "digit"}}} - }; - - std::string Top{"program"}; - BNF bnf{ -  {"program", {{"statement-list"}}}, -  {"statement-list", { -                      {"statement"}, -                      {"statement-list", "statement"}, -                     }}, -  {"statement", {{"assignment", ";"}}}, -  {"assignment", {{"identifier", "=", "identifier"}, -                  {"identifier", "=", "pp-number"}}} - }; - - // implicit? - //std::set<std::string> Terminals{"identifier", "=", ";"}; - - std::string Code{"a = bc ; c = 123 ; esd = Ff ; "};//1 = XYZ ; "}; - std::vector<Token> tokens_reference{ -  {"identifier", "a", { 1, 1} }, -  {"preprocessing-op-or-punc", "=", { 1, 3}}, -  {"identifier", "bc", { 1, 5}}, -  {"preprocessing-op-or-punc", ";", { 1, 8}}, -  {"identifier", "c", { 1, 10}}, -  {"preprocessing-op-or-punc", "=", { 1, 12}}, -  {"pp-number", "123", { 1, 14}}, -  {"preprocessing-op-or-punc", ";", { 1, 18}}, -  {"identifier", "esd", { 1, 20}}, -  {"preprocessing-op-or-punc", "=", { 1, 24}}, -  {"identifier", "Ff", { 1, 26}}, -  {"preprocessing-op-or-punc", ";", { 1, 29}}, -#if 0 -  {"pp-number", "1", { 1, 31}}, -  {"preprocessing-op-or-punc", "=", { 1, 33}}, -  {"identifier", "XYZ", { 1, 35}}, -  {"preprocessing-op-or-punc", ";", { 1, 39}}, -#endif - }; - - Lex::Lexer lexer(LexBNF, LexTop); - auto tokens = lexer.Lex(Code); - - ASSERT_EQ(tokens, tokens_reference); -#if 0 // TODO: use Debug() interface - std::cout << "=== Tokens =================================" << std::endl; - for (const auto& i: tokens) { -  std::cout << i.type << ": " << i.value << std::endl; - } -#endif - - CPP::PreprocessorTokensToTokens(tokens); - Gram::Compiler compiler(bnf, Top); - auto Tree = compiler.compile(tokens); - - compiler.DumpTree(); - - //---------------------------------------------------------------- -  - std::string Code2{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ "}; - std::vector<Token> tokens_reference2{ -  {"identifier", "a", { 1, 1} }, -  {"preprocessing-op-or-punc", "=", { 1, 3}}, -  {"identifier", "bc", { 1, 5}}, -  {"preprocessing-op-or-punc", ";", { 1, 8}}, -  {"identifier", "c", { 1, 10}}, -  {"preprocessing-op-or-punc", "=", { 1, 12}}, -  {"pp-number", "123", { 1, 14}}, -  {"preprocessing-op-or-punc", ";", { 1, 18}}, -  {"identifier", "esd", { 1, 20}}, -  {"preprocessing-op-or-punc", "=", { 1, 24}}, -  {"identifier", "Ff", { 1, 26}}, -  {"preprocessing-op-or-punc", ";", { 1, 29}}, -  {"pp-number", "1", { 1, 31}}, -  {"preprocessing-op-or-punc", "=", { 1, 33}}, -  {"identifier", "XYZ", { 1, 35}}, -  //{"preprocessing-op-or-punc", ";", { 1, 39}}, - }; - - tokens = lexer.Lex(Code2); - ASSERT_EQ(tokens, tokens_reference2); - CPP::PreprocessorTokensToTokens(tokens); - try { -  Tree = compiler.compile(tokens); -  FAIL() << "compile() was expected to throw."s; - } catch (const std::runtime_error& ex) { -  ASSERT_EQ(std::string{ex.what()}, "Compile error: Invalid program."); - }  } -int main(int argc, char* argv[]) { - ::testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} | 
