diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-01-18 15:36:42 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-01-18 15:36:42 +0100 | 
| commit | 766b70f8b85197de38a9fd629641ca9f70cc2340 (patch) | |
| tree | f62ae2a98487af55f32cceeeda7b261f24955be3 | |
| parent | 39716aa1907e975f6e37adafe42d72d643223a98 (diff) | |
Lexer (WIP)
| -rw-r--r-- | TODO | 1 | ||||
| -rw-r--r-- | minicc.cpp | 209 | 
2 files changed, 137 insertions, 73 deletions
| @@ -0,0 +1 @@ +Locations in source: File:line @@ -5,6 +5,7 @@  #include <deque>  #include <map> +#include <memory>  #include <string>  #include <utility>  #include <vector> @@ -31,115 +32,172 @@ std::vector<PathElement> GetPath(std::string Token, BNF ReverseBNF, std::string   return {}; // TODO  } -BNF Reverse(BNF bnf){ - return {}; // TODO +auto Reverse(BNF bnf){ + std::map<std::string, std::vector<std::string>> result; + + for (const auto& [from, to] : bnf) { +  for (const auto& list : to) { +   for (const auto& element : list) { +    auto i{result.find(element)}; +    if (i != result.end()) // already present +     i->second.push_back(from); +    else // new element +     result.emplace(element, std::vector{1, from}); +   } +  } + } + + return result;  }  using index_t = size_t;  struct TreeNode {   index_t parent; - std::vector<index_t> childs; + std::vector<index_t> childs; // fill char by char + std::vector<std::string> child_names; // fill always   std::string name;  }; -struct Tree { +class Tree { +private:   std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1   index_t node_num{};   index_t root{};   index_t last{}; - bool Valid() const { -  // Start symbol? -  // All terminal symbols? -  return true; // TODO +public: + bool Valid(const std::string& Top) const { +  // Start symbol on top +  if (node_num == 0) +   return false; + +  auto rootNode{nodes.find(root)}; +  if (rootNode == nodes.end()) +   throw std::runtime_error("Node not found: "s + std::to_string(root)); +   +  if (rootNode->second.name != Top) +   return false; + +  // All nodes filled (implies all leaves are terminal) +  for (const auto& [index, node]: nodes) { +   if (node.childs.size() < node.child_names.size()) +    return false; // node not filled +  } + +  return true;   } -  bool Add(char c) { + // try to add node + bool Add(char c, const BNF& bnf, const std::map<std::string, std::vector<std::string>>& reverseBNF) { +  if (nodes.empty()) { // first node +   node_num ++; +   root = node_num; +   last = node_num; +   std::string node_name{1, c}; +   auto rule{reverseBNF.find(node_name)}; +   if (rule == reverseBNF.end()) +    throw std::runtime_error("Rule not found: "s + node_name); +   nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); +   return true; +  } else {     // TODO -   //node_num ++; -   //nodes.emplace(node_num, {}); -   return false;    } +  return false; + }  }; -void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result) +class Lexer  { - bool valid{false}; - for (const auto& ct : candidates) { -  if (ct.Valid()) { -   if (valid) -    throw std::runtime_error("Found ambiguous token "s + token); -   result.push_back(token); -   token.clear(); -   valid = true; + +private: + const BNF &bnf; + const std::string& Top; + + std::map<std::string, std::vector<std::string>> ReverseBNF; + + // to be called on token end + void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result) + { +  if (candidates.empty()) { // skip +   if (!token.empty()) +    throw std::runtime_error("Expected empty token, got "s + token); +  } else { // check candidates +   bool valid{false}; +   for (const auto& ct : candidates) { +    if (ct.Valid(Top)) { +     if (valid) +      throw std::runtime_error("Found ambiguous token "s + token); +     result.push_back(token); +     token.clear(); +     valid = true; +    } +   } +   if (!valid) +    throw std::runtime_error("Invalid token: "s + token); + +   candidates.clear();    }   } - if (!valid) -  throw std::runtime_error("Invalid token: "s + token); - candidates.clear(); -} - -std::vector<std::string> Lex(std::string s, std::string Top, BNF bnf) -{ - std::vector<std::string> result; - std::string token; +public: + Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} + { + } - BNF ReverseBNF{ Reverse(bnf)}; + std::vector<std::string> Lex(const std::string& s) + { +  std::vector<std::string> result; +  std::string token; - std::string Whitespace{"\t \n\r"}; - std::deque<Tree> candidates; +  std::string Whitespace{"\t \n\r"}; +  std::deque<Tree> candidates; - for (size_t pos{0}; pos < s.size(); pos++) { -  char c{s[pos]}; -  if (Whitespace.find(c) != std::string::npos) { // found whitespace character -   if (candidates.empty()) { // skip -    if (!token.empty()) -     throw std::runtime_error("Expected empty token, got "s + token); -   } else { // check candidates +  for (size_t pos{0}; pos < s.size(); pos++) { +   char c{s[pos]}; +   if (Whitespace.find(c) != std::string::npos) { // found whitespace character +    // evaluate token up to now and skip whitespace      CheckCandidates(candidates, token, result); -   } -  } else { // no whitespace: try to add to tree -   std::deque<index_t> EraseList; -   int i = 0; -   for (auto ct = candidates.begin(); ct != candidates.end(); ct++, i++) { -    if (!ct->Add(c)) { -     EraseList.push_front(i); // no candidate anymore +   } else { // no whitespace: try to add to tree +    std::deque<index_t> EraseList; +    int i = 0; +    for (auto ct = candidates.begin(); ct != candidates.end(); ct++, i++) { +     if (!ct->Add(c, bnf, ReverseBNF)) { // either add char or delete candidate +      // push to front to get reversed order +      EraseList.push_front(i); // no candidate anymore +     } +    } +     +    if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token +     token.push_back(c); +     for (const auto& i : EraseList) +      candidates.erase(candidates.begin() + i); +    } else { // no candidates left: new tree +     CheckCandidates(candidates, token, result); +    } +     +    // new candidate: starting with c +    auto element {ReverseBNF.find(std::string(1, c))}; +    if (element != ReverseBNF.end()) { +     Tree newTree; +     if (newTree.Add(c, bnf, ReverseBNF)) +      candidates.push_back(newTree);      } -   } -   // new candidate: starting with c -   auto element {ReverseBNF.find(std::string(1, c))}; -   if (element != ReverseBNF.end()) { -    candidates.emplace_back(); -    candidates.back().Add(c); -   } - -   if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token -    token.push_back(c); -    for (const auto& i : EraseList) -     candidates.erase(candidates.begin() + i); -   } else { // no candidates left: new tree -    CheckCandidates(candidates, token, result);     }    } - } - // Final evaluation - if (candidates.empty()) { // skip -  if (!token.empty()) -   throw std::runtime_error("Expected empty token at end of input, got "s + token); - } else { // check candidates +  // Final evaluation of last token    CheckCandidates(candidates, token, result); + +  return result;   } - return result; -} +};  ProgramNode Compile(std::vector<std::string> Tokens, std::string Top, BNF bnf, Terminals terminals)  { - BNF ReverseBNF{ Reverse(bnf)}; + BNF ReverseBNF;//{ Reverse(bnf)};   if (Tokens.size()){    std::string Token = Tokens[0]; @@ -194,10 +252,15 @@ TEST_F(Test, BNF) {   std::set<std::string> Terminals{"identifier", "=", ";"}; - std::string Code{"a = b ; c = d ; e = f ;"}; + std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; - auto tokens = Lex(Code, LexTop, LexBNF); - auto Program = Compile(tokens, Top, bnf, Terminals); + Lexer lexer(LexBNF, LexTop); + auto tokens = lexer.Lex(Code); +  + for (const auto& i: tokens) { +  std::cout << i << std::endl; + } + //auto Program = Compile(tokens, Top, bnf, Terminals);  }  int main(int argc, char* argv[]) { | 
