diff options
| -rw-r--r-- | TODO | 1 | ||||
| -rw-r--r-- | minicc.cpp | 93 | 
2 files changed, 71 insertions, 23 deletions
| @@ -1 +0,0 @@ -Locations in source: File:line @@ -4,6 +4,7 @@  #include "gtest/gtest.h"  #include <algorithm> +#include <cctype>  #include <deque>  #include <map>  #include <memory> @@ -69,6 +70,13 @@ public:    node_num = 0;   } + // Type of lexical token + std::string GetType() { +  if (node_num > 0 && nodes[root].child_names.size() == 1) +   return nodes[root].child_names[0]; +  return ""; + } +   bool Valid(const std::string& Top) const {    // A token is non empty    if (node_num == 0) @@ -295,6 +303,31 @@ public:  }; +struct Location { + size_t line; + size_t column; +}; + +bool operator==(const Location &a, const Location &b) +{ + return (a.line == b.line && a.column == b.column); +} + +struct Token { + std::string type; + std::string value; + Location location; +}; + +bool operator==(const Token &a, const Token &b) +{ + return (a.type == b.type && a.value == b.value && a.location == b.location); +} + +std::ostream& operator<<(std::ostream& os, const Token& token) { + return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")"; +} +  class Lexer  { @@ -302,14 +335,16 @@ private:   const BNF &bnf;   const std::string& Top; + Location location{1, 0}; +   std::map<std::string, std::set<std::string>> ReverseBNF;   // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector<std::string>& result) + void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result)   {    tree.Resolve(bnf, ReverseBNF);    if (tree.Valid(Top)) { -   result.push_back(token); +   result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}});     token.clear();    }    tree.clear(); @@ -320,9 +355,9 @@ public:   {   } - std::vector<std::string> Lex(const std::string& s) + std::vector<Token> Lex(const std::string& s)   { -  std::vector<std::string> result; +  std::vector<Token> result;    std::string token;    std::string Whitespace{"\t \n\r"}; @@ -330,7 +365,14 @@ public:    for (size_t pos{0}; pos < s.size(); pos++) {     char c{s[pos]}; -   std::cout << "Char: |" << c << "|" << std::endl; +   if (c == '\n') { +    location.column = 0; +    location.line++; +   } else if (std::isprint(c)) { +    location.column++; +   } + +   //std::cout << "Char: |" << c << "|" << std::endl;     if (Whitespace.find(c) != std::string::npos) { // found whitespace character      // evaluate token up to now and skip whitespace      FinalizeTree(tree, token, result); @@ -353,23 +395,11 @@ public:  }; -ProgramNode Compile(std::vector<std::string> Tokens, std::string Top, BNF bnf, Terminals terminals) +ProgramNode Compile(std::vector<Token> Tokens, std::string Top, BNF bnf, Terminals terminals)  { - BNF ReverseBNF;//{ Reverse(bnf)}; + std::map<std::string, std::set<std::string>> ReverseBNF{ Reverse(bnf)};   if (Tokens.size()){ -  std::string Token = Tokens[0]; -#if 0 -  auto Path = GetPath(Token, ReverseBNF, Top, terminals); -  if (Path.size()) { -   size_t Index{1}; -   while (Index < Tokens.size()) { -    Path = GetPath(Token, ReverseBNF, Top, terminals, Path); -    Index++; -   } -  } else -   throw std::runtime_error("Invalid token: "s + Token); -#endif   } else    throw std::runtime_error("No tokens!"); @@ -416,15 +446,34 @@ TEST_F(Test, BNF) {   std::set<std::string> Terminals{"identifier", "=", ";"};   std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; + std::vector<Token> tokens_reference{ +  {"identifier", "a", { 1, 1} }, +  {"preprocessing-op-or-punc", "=", { 1, 3}}, +  {"identifier", "bc", { 1, 5}}, +  {"preprocessing-op-or-punc", ";", { 1, 8}}, +  {"identifier", "c", { 1, 10}}, +  {"preprocessing-op-or-punc", "=", { 1, 12}}, +  {"pp-number", "123", { 1, 14}}, +  {"preprocessing-op-or-punc", ";", { 1, 18}}, +  {"identifier", "esd", { 1, 20}}, +  {"preprocessing-op-or-punc", "=", { 1, 24}}, +  {"identifier", "Ff", { 1, 26}}, +  {"preprocessing-op-or-punc", ";", { 1, 29}}, +  {"pp-number", "1", { 1, 31}}, +  {"preprocessing-op-or-punc", "=", { 1, 33}}, +  {"identifier", "XYZ", { 1, 34}}, + };   Lexer lexer(LexBNF, LexTop);   auto tokens = lexer.Lex(Code); -#if 1 + + ASSERT_EQ(tokens, tokens_reference); +#if 0   for (const auto& i: tokens) { -  std::cout << i << std::endl; +  std::cout << i.value << std::endl;   }  #endif - //auto Program = Compile(tokens, Top, bnf, Terminals); + auto Program = Compile(tokens, Top, bnf, Terminals);  }  int main(int argc, char* argv[]) { | 
