diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-03-26 14:50:20 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-03-26 14:50:20 +0100 | 
| commit | 94b1f232ea51767fc30c9ed6b3e5912d776bda30 (patch) | |
| tree | bd8076524c65337856d439c21687ce07e06f2eb5 | |
| parent | 0e1d2ab5ca0f0fde5d2dd83b95230a8d93b58e7b (diff) | |
Remove old bottom-up algorithm
| -rw-r--r-- | grammer.cpp | 347 | ||||
| -rw-r--r-- | grammer.h | 19 | 
2 files changed, 3 insertions, 363 deletions
| diff --git a/grammer.cpp b/grammer.cpp index 4bcdd4d..97930b1 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -17,8 +17,6 @@ void Compiler::clear()  {   nodes.clear();   root_node_id = 0; - - tokens_used = 0;  }  std::string Compiler::GetTypeOfNode(index_t node_id) const @@ -115,9 +113,6 @@ void Compiler::DumpTree()      for (int i = 0; i < child_ids.size(); i++) {       todo.insert(todo.begin() + i, std::pair<int32_t, size_t>{child_ids[i], indent + 1});      } -    if (node.alternatives.size()) { -     line += ", "s + std::to_string(node.alternatives.size()) + " alternatives available"s; -    }     }     Debug(line);    } @@ -125,302 +120,9 @@ void Compiler::DumpTree()   Debug("==============================================");  } -bool Compiler::AllTokensUsed() const -{ - return tokens_used == tokens.size(); -} -  bool Compiler::treeIsComplete() const  { - return nodes.size() > 0 && rootIsStartSymbol() && AllTokensUsed(); -} - -std::vector<std::string>& Compiler::getNodeExpectedChilds(index_t node_id) -{ - std::string& type = nodes[node_id].type; - index_t& variant = nodes[node_id].variant; - return bnf[type][variant]; -} - -// returns true if all childs are filled, recursively. Else false with to_fill as hint to node to fill -bool Compiler::subTreeIsComplete(index_t node_id, index_t& to_fill) -{ - // recurse first to get first subtree to_fill - for (const auto& i : nodes[node_id].child_ids) { -  if (!ChildIdIsToken(i)) { // consider subtrees -   if (!subTreeIsComplete(i, to_fill)) -    return false; // found incomplete subtree -> return it! -  } - } - - if (nodes[node_id].child_ids.size() < getNodeExpectedChilds(node_id).size()) { -  to_fill = node_id; -  return false; - } - - return true; -} - -size_t Compiler::CommonPrefix(const std::vector<Token>& tokens, const std::vector<std::string>& types) -{ - auto [tokens_it, types_it] = std::mismatch(tokens.begin(), tokens.end(), types.begin(), types.end(), [](const Token& token, const std::string& s){ return token.type == s; }); - return types_it - types.begin(); // a distance, 0 on no match -} - -void Compiler::AddFirstNode() -{ - root_node_id = 0; - - const std::string& child_type = tokens[0].type; - auto it = reversedFirst.find(child_type); - if (it == reversedFirst.end()) -  throw std::runtime_error("Illegal first token: "s + child_type + " ("s + tokens[0].value + ")"s); - - std::set<std::string>& alternatives_set {it->second}; - - std::string node_type; - index_t node_variant; - std::deque<std::pair<std::string, index_t>> alternatives; // only for valid elements from alternatives_set - std::vector<int32_t> child_ids; - - for (const auto& type : alternatives_set) { -  const auto& variants{bnf[type]}; -  for (int i = 0; i < variants.size(); i++) { -   const std::vector<std::string> & variant{variants[i]}; -   size_t common{}; -   if ((common = CommonPrefix(tokens, variant)) > 0) { // match a common prefix -    if (node_type == "") { // no match yet -     node_type = type; -     node_variant = i; -     for (int token_id = 0; token_id < common; token_id++) -      child_ids.push_back(ChildIdFromTokenId(token_id)); -    } else -     alternatives.emplace_back(type, i); -   } -  } - } - - if (node_type == "") // no matching type found -  throw std::runtime_error("Syntax error on first token: "s + child_type + " ("s + tokens[0].value + ")"s); - - nodes.emplace_back(TreeNode{0, 0, node_type, node_variant, alternatives, child_ids}); - - tokens_used = child_ids.size(); -} - -bool Compiler::AddRootNode() -{ - if (nodes.size() == 0) { -  AddFirstNode(); - } else { -  const std::string& child_type = nodes[root_node_id].type; // starting at old root -  auto it = reversedFirst.find(child_type); -  if (it == reversedFirst.end()) // this one doesn't have a parent, maybe a start symbol to discard? -   return false; - -  index_t old_root_node_id {root_node_id}; -  index_t new_root_node_id {nodes.size()}; - -  std::set<std::string>& alternatives_set {it->second}; - -  std::string node_type; -  index_t node_variant; -  std::deque<std::pair<std::string, index_t>> alternatives; // only for valid elements from alternatives_set -  std::vector<int32_t> child_ids{static_cast<int>(old_root_node_id)}; - -  for (const auto& type : alternatives_set) { -   const auto& variants{bnf[type]}; -   for (int i = 0; i < variants.size(); i++) { -    const std::vector<std::string> & variant{variants[i]}; -    if (variant.size() == 0) -     continue; // TODO: Handle case of empty rule (see e.g. C++ "attribute-list") -    if (child_type == variant[0]) { -     if (node_type == "") { -      node_type = type; -      node_variant = i; -     } else -      alternatives.emplace_back(type, i); // duplicates possible: variants of same type -    } -   } -  } - -  if (node_type == "") // no matching type found (maybe backtrack possible?) -   return false; - -  // now add!  -  Debug("AddRootNode(): Adding "s + node_type); -  nodes[old_root_node_id].parent_node_id = new_root_node_id; -  root_node_id = new_root_node_id; -  nodes.emplace_back(TreeNode{root_node_id, root_node_id, node_type, node_variant, alternatives, child_ids}); -  // keep tokens_used as is - } - - DumpTree(); - return true; -} - -void Compiler::removeTokensUpTo(index_t token_id) -{ - removeTokensUpTo(token_id, root_node_id); -} - -// operate on node_id -void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) -{ - // token_id should be the new tokens_used -  - if (token_id < tokens_used) { -  auto& node{nodes[node_id]}; -  auto& child_ids {node.child_ids}; - -  // remove relevant tokens from end -  while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) { -   Debug("Removing token "s + std::to_string(TokenIdFromChildId(child_ids.back()))); -   child_ids.pop_back(); -   if (tokens_used > 0) -    tokens_used--; -   else -    throw std::runtime_error("ICE: Removing non-existing token at "s + std::to_string(node_id) + " ("s + node.type + ")"s); -  } - -  // recurse from back, to remove tokens from end -  for (int i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) { -   if (!ChildIdIsToken(child_ids[i])) { -    removeTokensUpTo(token_id, child_ids[i]); -   } -  } - } -} - -// Go back one step: Remove Node or Token -void Compiler::RemoveLastNode() -{ - TreeNode& node {nodes.back()}; - index_t node_id = node.node_id; -  - if (node_id == root_node_id) { // No parent -> remove root -  if (node.child_ids.empty()) { // No children -> now tree is empty -   clear(); -  } else if (ChildIdIsToken(node.child_ids.back())) { // last token child: remove -   removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); -  } else if (node.child_ids.size() == 1) { // One child: removing possible -   if (!ChildIdIsToken(node.child_ids[0])) { -    // node: set new root -    nodes[node.child_ids[0]].parent_node_id = node.child_ids[0]; -    root_node_id = node.child_ids[0]; -   } -   Debug("Removing root node "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s); -   nodes.pop_back(); -  } else { -   DumpTree(); -   throw std::runtime_error("ICE: Backtrack not possible: Root not empty"); -  } - } else if (node.child_ids.empty()) { // No children -> remove leaf -  // We have a parent, otherwise we would have taken previous branch -  TreeNode& parent {nodes[node.parent_node_id]}; -  if (parent.child_ids.empty() || parent.child_ids.back() != node_id) -   throw std::runtime_error("ICE: Backtrack: Bad child nodes"); -  parent.child_ids.pop_back(); -  Debug("Removing "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s); -  nodes.pop_back(); - } else if (ChildIdIsToken(node.child_ids.back())) { -  removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); - } else { // In the middle -  throw std::runtime_error("ICE: Backtrack in the middle of the tree: "s + std::to_string(node_id) + " ("s + node.type + ")"s); - } - DumpTree(); -} - -// Change type of last node according to alternatives -void Compiler::ChangeNodeType() -{ - TreeNode& node {nodes.back()}; - - if (node.alternatives.empty()) -  throw std::runtime_error("ICE: No alternatives found during Backtrack"); - - auto& [type, variant] {node.alternatives.front()}; - - node.type = type; - node.variant = variant; - - node.alternatives.pop_front(); -} - -// throws if no further track back is possible: compile error -void Compiler::TrackBack() -{ - // Search backwards for alternatives: last node with alternatives (variant or alt. token) - while (!nodes.empty() && nodes.back().alternatives.empty()) { -  RemoveLastNode(); - } - - if (nodes.empty()) { -  throw std::runtime_error("Compile error: Invalid program."); - } - - ChangeNodeType(); -} - -// GetPath() + traverse(): return shortest path with variants -// via first-entry-in-bnf-rule -// excluding lower (already exists) -// including upper (already determined to be included) - -// breadth-first search -// return: node, child -std::unordered_map<std::string, std::string> Compiler::traverse(const std::string& lower, const std::string& upper) -{ - std::unordered_map<std::string, std::string> visited; // node, child - std::deque<std::pair<std::string, std::string>> todo{{lower, ""}}; // node, child -  - while (!todo.empty()) { -  auto [current_node, current_child] = todo.front(); -  std::string& current_node2{current_node}; // workaround for lambda capture below (clang 8) -  todo.pop_front(); - -  auto it {visited.find(current_node)}; -  if (it == visited.end()) { // not visited, yet: visit now -   auto parents_it {reversedFirst.find(current_node)}; - -   if (parents_it != reversedFirst.end()) { -    auto& parents {parents_it->second}; -     -    visited[current_node] = current_child; -     -    std::for_each(parents.begin(), parents.end(), [&](const auto&x) { -     todo.push_back({x, current_node2}); -    }); -   } -  } - } - - return visited; -} - -// returns list from upper (including) to lower (excluding) -// returns empty list on fail -std::vector<std::string> Compiler::GetPath(std::string upper, std::string lower) -{ - std::vector<std::string> result; - - // traverse bnf from lower to upper - std::unordered_map<std::string, std::string> visited {traverse(lower, upper)}; -  - auto current {upper}; - while (current != lower) { -  auto it {visited.find(current)}; -  if (it != visited.end()) { -   auto& child{it->second}; - -   result.push_back(current); - -   current = child; -  } else { -   return {}; -  } - } - return result; + return nodes.size() > 0 && rootIsStartSymbol();  }  index_t Compiler::AddNode(const std::string& child_type, index_t parent_index) @@ -443,8 +145,8 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)    }   } - nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}}); - //root stays, tokens_used stays + nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, std::vector<int32_t>{}}); + //root stays   Debug("AddNode(): "s + nodes[parent_index].type + "->"s + child_type + ": "s + std::to_string(index));   DumpTree(); @@ -452,49 +154,6 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)   return index;  } -void Compiler::AddPath(const std::vector<std::string>& path, index_t current_index) -{ - for (const std::string& child_type: path) { -  current_index = AddNode(child_type, current_index); - } - - nodes.back().child_ids.emplace_back(ChildIdFromTokenId(tokens_used)); - tokens_used++; - - Debug("AddPath(): token "s + tokens.back().type); - DumpTree(); -} - -bool Compiler::FillTree() -{ - if (nodes.size() == 0) // ignore empty tree, successfully -  return true; - - index_t to_fill{}; - - while (!subTreeIsComplete(root_node_id, to_fill)) { -  if (tokens_used >= tokens.size()) -   return false; // Unexpected end of program? - -  auto& node {nodes[to_fill]}; -  std::string next_child {bnf[node.type][node.variant][node.child_ids.size()]}; -  if (next_child == tokens[tokens_used].type) { // add token directly -   node.child_ids.push_back(ChildIdFromTokenId(tokens_used)); -   tokens_used++; -   Debug("tokens_used++: "s + std::to_string(tokens_used)); -   DumpTree(); -  } else { // add inner nodes -   auto list = GetPath(next_child, tokens[tokens_used].type); -   if (list.size() > 0) { -    AddPath(list, to_fill); -   } else { -    return false; -   } -  } - } - return true; -} -  size_t Compiler::minimumSymbolsNeeded(std::string symbol)  {   if (isTerminal(bnf, symbol)) { @@ -20,7 +20,6 @@ struct TreeNode {   std::string type;   index_t variant; // bnf[type][variant] - std::deque<std::pair<std::string, index_t>> alternatives; // [type][value] alternatives that we can consider on fail. Discard after parsing!   std::vector<int32_t> child_ids; // < 0: terminal: token_id; >= 0: non-terminal: node_id; fill token by token  }; @@ -34,9 +33,6 @@ private:   // Input   std::vector<Token> tokens; - // Helper data - index_t tokens_used{0}; // number of tokens used, this is also the next token index to use -   BNF &bnf; // not const for access via operator[]   const std::string m_top; @@ -50,24 +46,9 @@ private:   std::string GetTypeOfNode(index_t node_id) const;   bool IsRootNode(index_t node_id) const;   bool rootIsStartSymbol() const; - bool AllTokensUsed() const;   bool treeIsComplete() const; - std::vector<std::string>& getNodeExpectedChilds(index_t node_id); - bool subTreeIsComplete(index_t node_id, index_t& to_fill); - size_t CommonPrefix(const std::vector<Token>& tokens, const std::vector<std::string>& types); - void AddFirstNode(); - bool AddRootNode(); - void removeTokensUpTo(index_t token_id); - void removeTokensUpTo(index_t token_id, index_t node_id); - void RemoveLastNode(); - void ChangeNodeType(); - void TrackBack();   void Validate() const; - std::unordered_map<std::string, std::string> traverse(const std::string& lower, const std::string& upper); - std::vector<std::string> GetPath(std::string upper, std::string lower);   index_t AddNode(const std::string& child_type, index_t parent_index); - void AddPath(const std::vector<std::string>& path, index_t current_index); - bool FillTree();   // top-down algorithm   std::unordered_map<std::string, size_t> m_min; // cache | 
