diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-03-30 18:33:01 +0200 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-03-30 18:33:01 +0200 | 
| commit | 79fbc8bf495770e4a8b7c66c46acf07f4e47e568 (patch) | |
| tree | 174c0f3c194013c39a73f6a50ed8866784388c07 | |
| parent | 2eb2383387d16fc919c07e1a6b9211406576b893 (diff) | |
Speed up compile
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | bnf.cpp | 64 | ||||
| -rw-r--r-- | bnf.h | 23 | ||||
| -rw-r--r-- | cpp.cpp | 25 | ||||
| -rw-r--r-- | cpp.h | 58 | ||||
| -rw-r--r-- | grammer.cpp | 96 | ||||
| -rw-r--r-- | grammer.h | 26 | ||||
| -rw-r--r-- | test-cpp.cpp | 20 | 
8 files changed, 247 insertions, 67 deletions
| @@ -3,10 +3,10 @@ PROJECTNAME=minicc  CXX=clang++-10  #CXX=g++-9 +#CXXFLAGS=-O2 -DNDEBUG  CXXFLAGS=-O0 -g -D_DEBUG  # -fprofile-instr-generate -fcoverage-mapping  # gcc:--coverage -#CXXFLAGS=-O2 -DNDEBUG  CXXFLAGS+= -Wall -I. @@ -1,8 +1,8 @@  #include "bnf.h" -std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf) +std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf)  { - std::unordered_map<std::string, std::set<std::string>> result; + std::unordered_map<std::string, std::unordered_set<std::string>> result;   for (const auto& [from, to] : bnf) {    for (const auto& list : to) { @@ -11,7 +11,7 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf)      if (i != result.end()) // already present       i->second.insert(from);      else // new element -     result.emplace(element, std::set{from}); +     result.emplace(element, std::unordered_set{from});     }    }   } @@ -19,9 +19,9 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf)   return result;  } -std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf) +std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf)  { - std::unordered_map<std::string, std::set<std::string>> result; + std::unordered_map<std::string, std::unordered_set<std::string>> result;   for (const auto& [from, to] : bnf) {    for (const auto& list : to) { @@ -31,7 +31,7 @@ std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf)      if (i != result.end()) // already present       i->second.insert(from);      else // new element -     result.emplace(element, std::set{from}); +     result.emplace(element, std::unordered_set{from});     }    }   } @@ -73,3 +73,55 @@ bool isTerminal(const BNF& bnf, const std::string& symbol)   return bnf.find(symbol) == bnf.end();  } +std::unordered_set<std::string> getTerminals(const BNF& bnf) +{ + std::unordered_set<std::string> result; + + for (const auto& [from, to] : bnf) { +  for (const auto& list : to) { +   for (const auto& element : list) { +    if (isTerminal(bnf, element)) +     result.insert(element); +   } +  } + } + + return result; +} + +std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf) +{ + std::unordered_set<std::pair<std::string, size_t>, PairHash> result; + + for (const auto& [from, to] : bnf) { +  for (size_t i = 0; i < to.size(); i++) { +   const auto& list{to[i]}; +   if (list.size() == 0) +    result.insert({from, i}); +  } + } + + return result; +} + +std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf) +{ + std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> result; + + for (const auto& [from, to] : bnf) { +  for (size_t i = 0; i < to.size(); i++) { +   const auto& list{to[i]}; +   if (list.size() > 0) { +    const auto& element{list[0]}; +    auto it{result.find(element)}; +    if (it != result.end()) // already present +     it->second.insert({from, i}); +    else // new element +     result.emplace(element, std::unordered_set<std::pair<std::string, index_t>, PairHash>{{from, i}}); +   } +  } + } + + return result; +} + @@ -1,17 +1,32 @@  #pragma once +#include "minicc.h" +  #include <deque> -#include <unordered_map> -#include <set>  #include <string> +#include <unordered_map> +#include <unordered_set>  #include <utility>  #include <vector>  using BNF = std::unordered_map<std::string, std::vector<std::vector<std::string>>>; -std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf); // unused now, remove? -std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf); +std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf); // unused now, remove? +std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf);  BNF SubBNF(const BNF& bnf, const std::string& top);  bool isTerminal(const BNF& bnf, const std::string& symbol); +std::unordered_set<std::string> getTerminals(const BNF& bnf); + +struct PairHash { + size_t operator()(const std::pair<std::string,size_t>& p) const noexcept + { +  size_t h0 {std::hash<std::string>{}(p.first)}; +  size_t h1 {std::hash<size_t     >{}(p.second)}; +  return h0 ^ (h1 << 1); + } +}; +std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf); + +std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant @@ -10,7 +10,9 @@  #include <gtest/gtest.h>  #include <gmock/gmock.h> +#include <functional>  #include <unordered_set> +#include <unordered_map>  using namespace Gram; @@ -222,13 +224,26 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens)   return compiler.compile(tokens);  } +namespace { + + CPP::map_type map_translation_unit { +  {"top-level-declaration-seq", [](){}} + }; + +} // anonymous namespace + +void CPP::traverse(index_t node_id, map_type& map) +{ + // TODO +} +  // Phase 7.c: Translate -void CPP::translate(const std::vector<Gram::TreeNode>& tree) +void CPP::translate()  { - if (tree.size() == 0) + if (m_nodes.size() == 0)    throw std::runtime_error("ICE: Tree is empty"); - //traverse(i, ); + traverse(0, map_translation_unit);  }  // Phase 8: Instantiate objects @@ -259,8 +274,8 @@ void CPP::compile(const std::string& code)   concatenate_strings();   auto tokens = tokens_from_pptokens(pp_tokens); - auto nodes = analysis(tokens); - translate(nodes); + m_nodes = analysis(tokens); + translate();   instantiate(); @@ -9,31 +9,37 @@ class CPP {  public: -CPP(); -~CPP(); - -std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree); - -// phases of translation, according to standard -void source_charset_map(); // phase 1 -void backslash_escape(); // phase 2 -std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3 -void preprocess(); // phase 4 -void execution_charset_map(); // phase 5 -void concatenate_strings(); // phase 6 -std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a -std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b -void translate(const std::vector<Gram::TreeNode>& tree); // phase 7.c -void instantiate(); // phase 8 -void link(); // phase 9 - -// all phases of translation -void compile(const std::string& code); - -std::vector<uint8_t> getCode(); -std::vector<uint8_t> getData(); - + CPP(); + ~CPP(); + + std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree); + + // phases of translation, according to standard + void source_charset_map(); // phase 1 + void backslash_escape(); // phase 2 + std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3 + void preprocess(); // phase 4 + void execution_charset_map(); // phase 5 + void concatenate_strings(); // phase 6 + std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a + std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b + void translate(); // phase 7.c + void instantiate(); // phase 8 + void link(); // phase 9 + + // all phases of translation + void compile(const std::string& code); + + std::vector<uint8_t> getCode(); + std::vector<uint8_t> getData(); + + typedef std::unordered_map<std::string, std::function<void()>> map_type; +   private: - std::string m_code; - std::vector<Token> m_charTokens; + std::string m_code; // input / start + std::vector<Token> m_charTokens; // result of phase 3 + std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b + + void traverse(index_t node_id, map_type& map);  }; + diff --git a/grammer.cpp b/grammer.cpp index 8b6f01b..9ceed12 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos)    throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos));  } -size_t Compiler::minimumSymbolsNeeded(std::string symbol) +size_t Compiler::minimumSymbolsNeeded(const std::string& symbol)  {   if (isTerminal(bnf, symbol)) {    return 1; @@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol)   }  } -size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list) +size_t Compiler::minimumSymbolsNeeded(const std::vector<std::string>& symbol_list)  {   size_t result{0}; @@ -196,12 +196,6 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t    symbol_list.erase(symbol_list.begin());   } - // match terminal symbols at end - while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { -  end--; -  symbol_list.erase(symbol_list.end() - 1); - } -   // matching of empty lists   if (symbol_list.size() == 0) {    if (begin == end) { // only match real empty list @@ -218,6 +212,8 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t   auto it{bnf.find(symbol_list.front())};   if (it != bnf.end()) {    for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives +   if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization +    continue;     AddNodeGuard guard(*this, i);     std::vector<std::string> list {it->second[i]};     list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); @@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)   return match(symbol_list, begin, end);  } +bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const +{ + return m_start_cache.contains({non_terminal, variant, terminal}); +} + +void Compiler::fillStartCache() +{ + std::unordered_set<std::string> terminals {getTerminals(bnf)}; + + { // follow terminal symbols +  for (const std::string& terminal: terminals) { +   std::unordered_set<std::string> current_set{terminal}; +   std::unordered_set<std::string> done_set; +   std::unordered_set<std::string> next_set; + +   do { +    for (const auto& current_symbol: current_set) { +     auto it {reversedPosFirst.find(current_symbol)}; +     if (it != reversedPosFirst.end()) { +      std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second}; + +      for (const auto& position: positions) { +       m_start_cache.insert(std::tuple<std::string, size_t, std::string>{position.first, position.second, terminal}); +       if (done_set.find(position.first) == done_set.end()) { +        next_set.insert(position.first); +        done_set.insert(position.first); +       } +      } +     } +    } + +    current_set = next_set; +    next_set.clear(); +   } while (!current_set.empty()); +  } + } + + { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals +  std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)}; +  std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set; +  std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set; + +  do { +   for (const auto& current_pos: current_set) { +    for (const std::string& terminal: terminals) { +     m_start_cache.insert(std::tuple<std::string, size_t, std::string>{current_pos.first, current_pos.second, terminal}); +    } + +    auto it {reversedPosFirst.find(current_pos.first)}; +    if (it != reversedPosFirst.end()) { +     std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second}; + +     for (const auto& position: positions) { +      if (done_set.find(position) == done_set.end()) { +       next_set.insert(position); +       done_set.insert(position); +      } +     } +    } + +   } + +   current_set = next_set; +   next_set.clear(); +  } while (!current_set.empty()); + } +} +  void Compiler::constructTree()  {   symbol_variants_it = symbol_variants.begin(); @@ -276,14 +340,17 @@ void Compiler::constructTree()  } -Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +Compiler::Compiler(BNF& bnf, const std::string& top) + : bnf(bnf) + , m_top(top) + //, ReverseBNF{Reverse(bnf)} + //, reversedFirst{reverseFirst(bnf)} + , reversedPosFirst{reversePosFirst(bnf)}  { - //std::cout << "DEBUG: " << m_top << std::endl; -    //   // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements)   // - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit");   // remove bad marker elements   auto it{m_min.begin()};   while (it != m_min.end()) { @@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve     ++it;    }   } - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); + + // fill other cache + fillStartCache();  }  std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens) @@ -5,6 +5,7 @@  #include <cstdint>  #include <deque> +#include <tuple>  #include <unordered_set>  #include <utility> @@ -41,8 +42,9 @@ private:   BNF &bnf; // not const for access via operator[]   const std::string m_top; - std::unordered_map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove? - std::unordered_map<std::string, std::set<std::string>> reversedFirst; // possible parent types of first childs of a given type + //std::unordered_map<std::string, std::unordered_set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove? + //std::unordered_map<std::string, std::unordered_set<std::string>> reversedFirst; // possible parent types of first childs of a given type + std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type   std::deque<index_t> symbol_variants;   decltype(symbol_variants)::iterator symbol_variants_it; @@ -63,12 +65,26 @@ private:   void IncNodePosition(NodePosition& pos);   // top-down algorithm - std::unordered_map<std::string, size_t> m_min; // cache - size_t minimumSymbolsNeeded(std::string symbol); - size_t minimumSymbolsNeeded(std::vector<std::string> symbol_list); + std::unordered_map<std::string, size_t> m_min; // cache for MinimumSymbolsNeeded + size_t minimumSymbolsNeeded(const std::string& symbol); + size_t minimumSymbolsNeeded(const std::vector<std::string>& symbol_list);   bool match(std::string symbol, size_t begin, size_t end);   bool match(std::vector<std::string> symbol_list, size_t begin, size_t end); + // start / end cache + struct TupleHash { +  size_t operator()(const std::tuple<std::string,size_t,std::string>& t) const noexcept +  { +   size_t h0 {std::hash<std::string>{}(std::get<0>(t))}; +   size_t h1 {std::hash<size_t     >{}(std::get<1>(t))}; +   size_t h2 {std::hash<std::string>{}(std::get<2>(t))}; +   return h0 ^ (h1 << 1) ^ (h2 << 2); +  } + }; + std::unordered_set<std::tuple<std::string, size_t, std::string>, TupleHash> m_start_cache; + bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const; + void fillStartCache(); +   void constructTree();   std::vector<std::string> m_symbol_list;   index_t m_symbol_list_pos{}; diff --git a/test-cpp.cpp b/test-cpp.cpp index aabc4f4..8a9b7cb 100644 --- a/test-cpp.cpp +++ b/test-cpp.cpp @@ -24,13 +24,12 @@ class CppTest: public ::testing::Test  {  protected:   CppTest() { -  debug = true; +  //debug = true;   }   ~CppTest() {   }  }; -#if 1  TEST_F(CppTest, preprocessing_tokenize) {   CPP cpp;   auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }"); @@ -45,7 +44,6 @@ TEST_F(CppTest, preprocessing_tokenize) {   ASSERT_EQ(nodes.size(), 44);  } -#endif  TEST_F(CppTest, preprocessing_tokenize_compile_error) {   CPP cpp; @@ -65,8 +63,16 @@ TEST_F(CppTest, preprocessing_tokenize_compile_error) {   FAIL() << "Exception expected";  } -#if 0 -TEST(Cpp, translate) { - CPP::translate(); +TEST(Cpp, compile) { + CPP cpp; +  + cpp.compile("int main() { return 1 + 1; }");  } -#endif + +TEST(Cpp, compile_2_times) { + CPP cpp; +  + cpp.compile("int main() { return (1 + 2) * 2; }"); + cpp.compile("int main() { return 1 + 2 * 2; }"); +} + | 
