diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-01-18 23:13:56 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-01-18 23:13:56 +0100 | 
| commit | b01052f96c107603a30663c47308cadf7c30d94d (patch) | |
| tree | 5002e4136bde94b7674898f37d76cd0abb01ffa3 | |
| parent | 766b70f8b85197de38a9fd629641ca9f70cc2340 (diff) | |
Fix lex bnf
| -rw-r--r-- | minicc.cpp | 160 | 
1 files changed, 123 insertions, 37 deletions
| @@ -33,16 +33,16 @@ std::vector<PathElement> GetPath(std::string Token, BNF ReverseBNF, std::string  }  auto Reverse(BNF bnf){ - std::map<std::string, std::vector<std::string>> result; + std::map<std::string, std::set<std::string>> result;   for (const auto& [from, to] : bnf) {    for (const auto& list : to) {     for (const auto& element : list) {      auto i{result.find(element)};      if (i != result.end()) // already present -     i->second.push_back(from); +     i->second.insert(from);      else // new element -     result.emplace(element, std::vector{1, from}); +     result.emplace(element, std::set{from});     }    }   } @@ -68,10 +68,11 @@ private:  public:   bool Valid(const std::string& Top) const { -  // Start symbol on top +  // A token is non empty    if (node_num == 0)     return false; +  // Start symbol on top    auto rootNode{nodes.find(root)};    if (rootNode == nodes.end())     throw std::runtime_error("Node not found: "s + std::to_string(root)); @@ -88,24 +89,98 @@ public:    return true;   } - // try to add node - bool Add(char c, const BNF& bnf, const std::map<std::string, std::vector<std::string>>& reverseBNF) { + bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { +  node_num ++; +  root = node_num; +  last = node_num; +  std::string node_name(1, char(c)); + +  auto reverseRule{reverseBNF.find(node_name)}; +  if (reverseRule == reverseBNF.end()) +   throw std::runtime_error("Reverse rule not found: "s + node_name); + +  std::vector<std::string> children; // default: no children for terminal +  auto rule{bnf.find(node_name)}; +  if (rule != bnf.end()) { // multiple variants! +   bool hit{false}; +   for (const auto& i : rule->second) { +    if (i.size() > 0 && i[0] == node_name) { +     if (hit) +      throw std::runtime_error("Multiple matching rules found for "s + node_name); +     children = i; +     hit = true; +    } +   } +   if (!hit) +    throw std::runtime_error("No matching rule found for "s + node_name); +  } + +  nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, children, node_name}); +  return true; + } + + // try to add character to tree + bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {    if (nodes.empty()) { // first node -   node_num ++; -   root = node_num; -   last = node_num; -   std::string node_name{1, c}; -   auto rule{reverseBNF.find(node_name)}; -   if (rule == reverseBNF.end()) -    throw std::runtime_error("Rule not found: "s + node_name); -   nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); -   return true; -  } else { -   // TODO +   return AddFirstNode(c, bnf, reverseBNF); +  } else { // at least one character is already present +   // Traverse tree until partially filled node found +   // or new node can be added +   index_t current_index{last}; + +   while (current_index != 0) { +    TreeNode& node {nodes[current_index]}; +    if (node.childs.size() < node.child_names.size()) { // partially filled node +     throw std::runtime_error("TODO: partially filled node"); +    } +    current_index = node.parent; +   } +   throw std::runtime_error("TODO: need to add node"); +    }    return false;   } + // add path to start symbol + void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { +  if (nodes.empty()) // only handle non-empty trees +   return; + +  while (true) { +   std::string& old_root_name { nodes[root].name }; // current root node name + +   auto parents {reverseBNF.find(old_root_name)}; +   if (parents != reverseBNF.end()) { // parents in bnf available +    bool hit{false}; +    for (auto& parent : parents->second) { +     for (const auto& list : bnf.at(parent)) { +      if (list.size() == 1 && list[0] == old_root_name) { +       if (!hit) { +        const std::string& new_root_name {parent}; +        // Add new TreeNode in the direction to root: +        // New root with 1 child (old root) +        nodes.emplace(++node_num, +                     TreeNode{0, // parent +                              std::vector<index_t>{root}, // child indices +                              std::vector<std::string>{old_root_name}, // child names +                              new_root_name // name +                     }); +        nodes[root].parent = node_num; +        root = node_num; +        // this->last stays +        hit = true; +       } else +        throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); +      } +     } +    } +    if (!hit) +     break; +   } else +    break; +  } + } +  };  class Lexer @@ -115,17 +190,18 @@ private:   const BNF &bnf;   const std::string& Top; - std::map<std::string, std::vector<std::string>> ReverseBNF; + std::map<std::string, std::set<std::string>> ReverseBNF;   // to be called on token end - void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result) + void FinalizeCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result)   {    if (candidates.empty()) { // skip     if (!token.empty())      throw std::runtime_error("Expected empty token, got "s + token);    } else { // check candidates     bool valid{false}; -   for (const auto& ct : candidates) { +   for (auto& ct : candidates) { +    ct.Resolve(bnf, ReverseBNF);      if (ct.Valid(Top)) {       if (valid)        throw std::runtime_error("Found ambiguous token "s + token); @@ -141,6 +217,16 @@ private:    }   } + void AddCandidate(char c, std::deque<Tree>& candidates) { +  // new candidate: starting with c +  auto element {ReverseBNF.find(std::string(1, c))}; +  if (element != ReverseBNF.end()) { +   Tree newTree; +   if (newTree.Add(c, bnf, ReverseBNF)) +    candidates.push_back(newTree); +  } + } +  public:   Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}   { @@ -156,9 +242,10 @@ public:    for (size_t pos{0}; pos < s.size(); pos++) {     char c{s[pos]}; +   std::cout << "Char: |" << c << "|" << std::endl;     if (Whitespace.find(c) != std::string::npos) { // found whitespace character      // evaluate token up to now and skip whitespace -    CheckCandidates(candidates, token, result); +    FinalizeCandidates(candidates, token, result);     } else { // no whitespace: try to add to tree      std::deque<index_t> EraseList;      int i = 0; @@ -169,26 +256,21 @@ public:       }      } -    if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token -     token.push_back(c); +    if (token.empty()) { +     AddCandidate(c, candidates); +    } else if (candidates.size() - EraseList.size() > 0) { // added to some candidates: Erase invalidated ones       for (const auto& i : EraseList)        candidates.erase(candidates.begin() + i);      } else { // no candidates left: new tree -     CheckCandidates(candidates, token, result); -    } -     -    // new candidate: starting with c -    auto element {ReverseBNF.find(std::string(1, c))}; -    if (element != ReverseBNF.end()) { -     Tree newTree; -     if (newTree.Add(c, bnf, ReverseBNF)) -      candidates.push_back(newTree); +     FinalizeCandidates(candidates, token, result); +     AddCandidate(c, candidates);      } +    token.push_back(c);     }    }    // Final evaluation of last token -  CheckCandidates(candidates, token, result); +  FinalizeCandidates(candidates, token, result);    return result;   } @@ -232,9 +314,12 @@ TEST_F(Test, BNF) {    {"identifier", {{"identifier-nondigit"},                    {"identifier", "identifier-nondigit"},                    {"identifier", "digit"}}}, -  {"digit", {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}}, -  {"identifier-nondigit", {{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", -                            "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_"}}}, +  {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, +  {"identifier-nondigit", +    {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, +     {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, +     {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, +     {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}},    {"preprocessing-op-or-punc", {{";"},                                  {"="}}},    {"pp-number", {{"digit"}, @@ -256,10 +341,11 @@ TEST_F(Test, BNF) {   Lexer lexer(LexBNF, LexTop);   auto tokens = lexer.Lex(Code); -  +#if 1   for (const auto& i: tokens) {    std::cout << i << std::endl;   } +#endif   //auto Program = Compile(tokens, Top, bnf, Terminals);  } | 
