diff options
| -rw-r--r-- | Makefile | 13 | ||||
| -rw-r--r-- | bnf.cpp | 31 | ||||
| -rw-r--r-- | bnf.h | 2 | ||||
| -rwxr-xr-x | cppbnf.cpp | 51 | ||||
| -rw-r--r-- | grammer.cpp | 59 | 
5 files changed, 100 insertions, 56 deletions
| @@ -1,16 +1,16 @@  PROJECTNAME=minicc -CXX=clang++-8 +CXX=clang++-9  #CXX=g++-8 -#CXXFLAGS=-O0 -D_DEBUG +CXXFLAGS=-O0 -D_DEBUG  # -fprofile-instr-generate -fcoverage-mapping  # gcc:--coverage -CXXFLAGS=-O2 -DNDEBUG +#CXXFLAGS=-O2 -DNDEBUG -CXXFLAGS+= -Wall -std=c++17 -I. -Ilib +CXXFLAGS+= -Wall -std=c++17 -I. -ifeq ($(CXX),clang++-8) +ifeq ($(CXX),clang++-9)  # currently broken:  # ld.lld-8: error: undefined symbol: boost::re_detail_106700::cpp_regex_traits_implementation<char>::transform(char const*, char const*) const  #CXXFLAGS+= -stdlib=libc++ @@ -28,7 +28,7 @@ LIBS=\  -lboost_regex \  -lpthread -ifeq ($(CXX),clang++-8) +ifeq ($(CXX),clang++-9)  LIBS+= \  -fuse-ld=lld-8 \  -lstdc++ \ @@ -48,6 +48,7 @@ SRC=\      grammer.cpp \      lexer.cpp \      minicc.cpp \ +    cppbnf.cpp \      test-lexer.cpp \      googletest/src/gtest-all.cpp \      googlemock/src/gmock-all.cpp @@ -1,6 +1,7 @@  #include "bnf.h" -std::map<std::string, std::set<std::string>> Reverse(BNF bnf){ +std::map<std::string, std::set<std::string>> Reverse(BNF bnf) +{   std::map<std::string, std::set<std::string>> result;   for (const auto& [from, to] : bnf) { @@ -18,4 +19,32 @@ std::map<std::string, std::set<std::string>> Reverse(BNF bnf){   return result;  } +BNF SubBNF(const BNF& bnf, const std::string& top) +{ + BNF result; + + std::vector<std::string> todo{top}; + + while (!todo.empty()) { +  std::string current{todo.back()}; +  todo.pop_back(); + +  auto it = bnf.find(current); +  if (it != bnf.end()) { +   // add current value +   result[it->first] = it->second; + +   // add sub-tree values if not present yet, but available in original bnf +   for (auto& variant: it->second) { +    for (auto& child: variant) { +     if (result.find(child) == result.end() && bnf.find(child) != bnf.end()) { +      todo.push_back(child); +     } +    } +   } +  } + } + + return result; +} @@ -12,3 +12,5 @@ using namespace std::string_literals;  using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;  std::map<std::string, std::set<std::string>> Reverse(BNF bnf); + +BNF SubBNF(const BNF& bnf, const std::string& top); @@ -21,7 +21,7 @@ namespace {    return result;   } - std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const T_BNF& bnf) + std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const BNF& bnf)   {    std::unordered_map<std::string, std::unordered_set<std::string>> result;    for (const auto& [symbol, lists] : bnf) { @@ -39,12 +39,12 @@ namespace {    return result;   } - bool isTerminal(const std::string& symbol, const T_BNF& bnf) + bool isTerminal(const std::string& symbol, const BNF& bnf)   {    return bnf.find(symbol) == bnf.end();   } - size_t numberOfStartSymbols(const T_BNF& bnf) + size_t numberOfStartSymbols(const BNF& bnf)   {    // exactly 1 start symbol    std::vector<std::string> startSymbols; @@ -64,7 +64,7 @@ namespace {    return startSymbols.size();   } - bool symbolsValid(const T_BNF& bnf) + bool symbolsValid(const BNF& bnf)   {    for (const auto& [symbol, lists] : bnf) {     if (boost::contains(symbol, " ")) { @@ -74,14 +74,19 @@ namespace {     for (const auto& list : lists) {      for (const auto& i : list) { -     if (i.size() == 1 && !isTerminal(i, bnf)) { -      std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too long." << std::endl; -      return false; -     } -     if (boost::contains(i, " ")) { -      std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl; -      return false; +     if (!isTerminal(i, bnf)) { +      // every non-terminal symbol must be longer that 1 char +      if (i.size() == 1) { +       std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too short." << std::endl; +       return false; +      } + +      // non-terminal symbols must not contain space +      if (boost::contains(i, " ")) { +       std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl; +       return false; +      }       }      }     } @@ -92,11 +97,11 @@ namespace {   // returns 1 if exactly one start symbol and   // all nodes size > 1, except terminal symbols - bool valid(const T_BNF& bnf) { + bool valid(const BNF& bnf) {    return numberOfStartSymbols(bnf) == 1 && symbolsValid(bnf);   } - bool validLex(const T_BNF& bnf) { + bool validLex(const BNF& bnf) {    // all terminal symbols exactly one character    for (const auto& [symbol, lists] : bnf) { @@ -174,7 +179,7 @@ namespace {    }   } - T_BNF& normalizeBNF(T_BNF& bnf) + BNF& normalizeBNF(BNF& bnf)   {    // resolve OPTIONAL symbols    for (auto& [symbol, lists] : bnf) { @@ -184,7 +189,7 @@ namespace {    return bnf;   } - T_BNF& normalizeLexBNF(T_BNF& bnf) + BNF& normalizeLexBNF(BNF& bnf)   {    normalizeBNF(bnf); @@ -206,9 +211,9 @@ namespace {  } // namespace -T_BNF GetCppBNFLex() +BNF GetCppBNFLex()  { - T_BNF bnf{ + BNF bnf{    // [gram.lex]    {"hex-quad", {{"hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit"}}}, @@ -520,9 +525,9 @@ T_BNF GetCppBNFLex()   return normalizeLexBNF(bnf);  } -T_BNF GetCppBNFGram() +BNF GetCppBNFGram()  { - T_BNF bnf{ + BNF bnf{    // [gram.key]    {"typedef-name", {{"identifier"}, {"simple-template-id"}}}, @@ -1919,15 +1924,15 @@ T_BNF GetCppBNFGram()   return normalizeBNF(bnf);  } -TEST(Lex, Test2) { - auto bnf = GetCppBNFLex(); +TEST(CppBnf, LexicalBnf) { + auto bnf = SubBNF(GetCppBNFLex(), "preprocessing-token");   EXPECT_TRUE(valid(bnf));   EXPECT_TRUE(validLex(bnf));  } -TEST(Gram, Test3) { - auto bnf = GetCppBNFGram(); +TEST(CppBnf, GrammarBnf) { + auto bnf = SubBNF(GetCppBNFGram(), "translation-unit");   EXPECT_TRUE(valid(bnf));  } diff --git a/grammer.cpp b/grammer.cpp index 3c7671b..808c49e 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -4,7 +4,16 @@  using namespace Gram; -void Compiler::clear() { +static bool debug{false}; + +void Debug(std::string s) +{ + if (debug) +  std::cout << s << std::endl; +} + +void Compiler::clear() +{   nodes.clear();   root_node_id = 0; @@ -71,52 +80,51 @@ namespace {  void Compiler::DumpTree()  { - std::cout << "= Dump =======================================" << std::endl; - std::cout << "nodes.size()=" << nodes.size() << std::endl; + Debug("= Dump ======================================="); + Debug("nodes.size()="s + std::to_string(nodes.size()));   if (nodes.size() > 0) {    if (0) { -   std::cout << "--- Nodes ------------------------------------" << std::endl; -   std::cout << "root_node_id=" << root_node_id << std::endl; +   Debug("--- Nodes ------------------------------------"); +   Debug("root_node_id="s + std::to_string(root_node_id));     for (const auto& node : nodes) { -    std::cout << node.type << "(" << node.node_id << "):"; +    std::string line{"("s + std::to_string(node.node_id) + "):"s};      for (const auto& child : node.child_ids) { -     std::cout << " "; +     line += " "s;       if (ChildIdIsToken(child)) -      std::cout << "t" << TokenIdFromChildId(child); +      line += "t"s + std::to_string(TokenIdFromChildId(child));       else -      std::cout << child; +      line += std::to_string(child);      } -    std::cout << std::endl; +    Debug(line);     }    } -  std::cout << "--- Tree -------------------------------------" << std::endl; +  Debug("--- Tree -------------------------------------");    std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(root_node_id), 0}}; // id, indent    while (!todo.empty()) {     auto [current_index, indent] {todo.front()};     todo.pop_front(); -   std::cout << std::string(indent, ' '); +   std::string line(indent, ' ');     if (ChildIdIsToken(current_index)) {      index_t token_id {TokenIdFromChildId(current_index)}; -    std::cout << "Token(" << token_id << "): " << tokens[token_id].type << "(" << tokens[token_id].value << ")"; +    line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s;     } else {      auto& node {nodes[current_index]}; -    std::cout << "Node(" << current_index << "): " << node.type << "/" << node.variant; +    line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant);      auto child_ids{node.child_ids};      for (int i = 0; i < child_ids.size(); i++) {       todo.insert(todo.begin() + i, std::pair<int32_t, size_t>{child_ids[i], indent + 1});      }      if (node.alternatives.size()) { -     std::cout << ", " << node.alternatives.size() << " alternatives available"; +     line += ", "s + std::to_string(node.alternatives.size()) + " alternatives available"s;      }     } -   std::cout << std::endl; - +   Debug(line);    }   } - std::cout << "==============================================" << std::endl; + Debug("==============================================");  }  bool Compiler::AllTokensUsed() const @@ -240,7 +248,7 @@ bool Compiler::AddRootNode()     return false;    // now add!  -  std::cout << "AddRootNode(): Adding " << node_type << std::endl; +  Debug("AddRootNode(): Adding "s + node_type);    nodes[old_root_node_id].parent_node_id = new_root_node_id;    root_node_id = new_root_node_id;    nodes.emplace_back(TreeNode{root_node_id, root_node_id, node_type, node_variant, alternatives, child_ids}); @@ -258,7 +266,6 @@ void Compiler::removeTokensUpTo(index_t token_id)  void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)  { - // std::cout << "DEBUG " << token_id << " " << tokens_used << std::endl;   // token_id should be the new tokens_used   if (token_id < tokens_used) { @@ -267,7 +274,7 @@ void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)    // remove relevant tokens from end    while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) { -   std::cout << "Removing token " << TokenIdFromChildId(child_ids.back()) << std::endl; +   Debug("Removing token "s + std::to_string(TokenIdFromChildId(child_ids.back())));     child_ids.pop_back();     if (tokens_used > 0)      tokens_used--; @@ -301,7 +308,7 @@ void Compiler::RemoveLastNode()      nodes[node.child_ids[0]].parent_node_id = node.child_ids[0];      root_node_id = node.child_ids[0];     } -   std::cout << "Removing root node " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; +   Debug("Removing root node "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s);     nodes.pop_back();    } else {     DumpTree(); @@ -313,7 +320,7 @@ void Compiler::RemoveLastNode()    if (parent.child_ids.empty() || parent.child_ids.back() != node_id)     throw std::runtime_error("ICE: Backtrack: Bad child nodes");    parent.child_ids.pop_back(); -  std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; +  Debug("Removing "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s);    nodes.pop_back();   } else if (ChildIdIsToken(node.child_ids.back())) {    removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); @@ -435,7 +442,7 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)   nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}});   //root stays, tokens_used stays - std::cout << "AddNode(): " << parent.type << "->" << child_type << ": "<< index << std::endl; + Debug("AddNode(): "s + parent.type + "->"s + child_type + ": "s + std::to_string(index));   DumpTree();   return index; @@ -450,7 +457,7 @@ void Compiler::AddPath(const std::vector<std::string>& path, index_t current_ind   nodes.back().child_ids.emplace_back(ChildIdFromTokenId(tokens_used));   tokens_used++; - std::cout << "AddPath(): token " << tokens.back().type << std::endl; + Debug("AddPath(): token "s + tokens.back().type);   DumpTree();  } @@ -470,7 +477,7 @@ bool Compiler::FillTree()    if (next_child == tokens[tokens_used].type) { // add token directly     node.child_ids.push_back(ChildIdFromTokenId(tokens_used));     tokens_used++; -   std::cout << "tokens_used++: " << tokens_used << std::endl; +   Debug("tokens_used++: "s + std::to_string(tokens_used));     DumpTree();    } else { // add inner nodes     auto list = GetPath(next_child, tokens[tokens_used].type); | 
