diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-03-21 15:38:05 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-03-21 15:38:05 +0100 | 
| commit | 3cc139bce0283018473d4906ee2ea5f40f771255 (patch) | |
| tree | 658956ab52b8892d419a2ba905f4c41f21f6d926 | |
| parent | 74350b52fee9f576a1cc71d99cfd4ebdf5a73e0f (diff) | |
Add lexer to CPP
| -rw-r--r-- | cpp.cpp | 120 | ||||
| -rw-r--r-- | cpp.h | 4 | 
2 files changed, 39 insertions, 85 deletions
| @@ -2,9 +2,10 @@  #include "bnf.h"  #include "cppbnf.h" +#include "debug.h" +#include "lexer.h"  #include "grammer.h"  #include "minicc.h" -#include "debug.h"  #include <gtest/gtest.h>  #include <gmock/gmock.h> @@ -29,66 +30,14 @@ void CPP::backslash_escape()   // TODO  } -namespace { - -std::vector<Token> sourceToCharTokens(const std::string& code) -{ - std::vector<Token> result; - - Location location{1, 1}; - - for (char c: code) { -  if (c == '\n') { -   location.column = 1; -   location.line++; -  } else if (std::isprint(c)) { -   location.column++; -  } - -  result.emplace_back(Token{std::string(1, c), std::string(1, c), location}); - } - return result; -} - -} -  // Phase 3: Parse preprocessing tokens -std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std::string& s) +std::vector<Token> CPP::preprocessing_tokenize(const std::string& s)  { - m_code = s; - - m_charTokens = sourceToCharTokens(s); -   auto bnf{SubBNF(GetCppBNFLex(), "preprocessing-token")}; +  + Lex::Lexer lexer(bnf, "preprocessing-token"); - // add to bnf to match whole file - bnf["file"] = { -  {"preprocessing-token-list"}, -  {"whitespace-list", "preprocessing-token-list"} - }; - bnf["preprocessing-token-list"] = { -  {"preprocessing-token-padded"}, -  {"preprocessing-token-list", "preprocessing-token-padded"} - }; - bnf["preprocessing-token-padded"] = { -  {"preprocessing-token"}, -  {"preprocessing-token", "whitespace-list"} - }; - bnf["whitespace-list"] = { -  {"whitespace-char"}, -  {"whitespace-list", "whitespace-char" } - }; - bnf["whitespace-char"] = { -  {" "}, {"\t"}, {"\n"}, {"\r"} - }; - Gram::Compiler compiler(bnf, "file"); - debug = true; - std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens); - - debug = true; - compiler.DumpTree(); - - return Tree; + return lexer.Lex(s);  }  // Phase 4: Preprocessing @@ -137,7 +86,7 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr  };  namespace { - std::unordered_set<std::string> pp_types{ + std::unordered_set<std::string> pp_types {    "identifier",    "pp-number",    "character-literal", @@ -146,10 +95,16 @@ namespace {    "user-defined-string-literal",    "preprocessing-op-or-punc"   }; + + std::unordered_set<std::string> keywords { +  "alignas", +  "alignof", +  // ... Keywords table, p.15 + };  }  // Phase 7.a: Create tokens from preprocessing tokens -std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<TreeNode>> Tree) +std::vector<Token> CPP::tokens_from_pptokens(std::vector<Token> pp_tokens)  {   std::vector<Token> result; @@ -161,28 +116,23 @@ std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<Tree   // "user-defined-string-literal" -> "literal" + value   // "preprocessing-op-or-punc" -> value+value (operator,punctuator) - // TODO: traverse Tree, creating Token list - std::vector<index_t> todo(1, index_t(Tree.first)); - - while (!todo.empty()) { -  index_t current_index = todo.back(); -  todo.pop_back(); - -  TreeNode &node{Tree.second[current_index]}; - -  // visit node -  if (pp_types.find(node.type) != pp_types.end()) { // TODO -   std::cout << node.type << ": " << valueOfNode(current_index, Tree.second) << std::endl; -  } else { // only traverse further if not handled - -   // iterate backwards in childs, to get depth-first search in tree, from the beginning -   std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ -    if (!ChildIdIsToken(child)) -     todo.push_back(child); -   }); -  } + for (auto& token: pp_tokens) { +  if (pp_types.find(token.type) != pp_types.end()) { +   if (token.type == "identifier") { +#if 0 +    if (keywords.find(token.value) != keywords.end()) +     result.emplace_back("keyword", token.value); +    else +#endif +    result.emplace_back(Token{"identifier"s, token.value}); +   } +   else if (token.type == "preprocessing-op-or-punc") +    result.emplace_back(Token{token.value, token.value}); +   else +    result.emplace_back(Token{"literal", token.value}); +  } else +   throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s);   } -    return result;  } @@ -238,12 +188,16 @@ protected:   }  }; -#if 0 +#if 1  TEST_F(CppTest, preprocessing_tokenize) {   CPP cpp; - auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }"); + auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }"); - cpp.tokens_from_pptokens(ppTree); + ASSERT_EQ(pp_tokens.size(), 9); + + auto tokens = cpp.tokens_from_pptokens(pp_tokens); + + ASSERT_EQ(tokens.size(), 9);  }  #endif @@ -17,11 +17,11 @@ std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& T  // phases of translation, according to standard  void source_charset_map(); // phase 1  void backslash_escape(); // phase 2 -std::pair<index_t, std::vector<Gram::TreeNode>> preprocessing_tokenize(const std::string& s); // phase 3 +std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3  void preprocess(); // phase 4  void execution_charset_map(); // phase 5  void concatenate_strings(); // phase 6 -std::vector<Token> tokens_from_pptokens(std::pair<index_t, std::vector<Gram::TreeNode>> Tree); // phase 7.a +std::vector<Token> tokens_from_pptokens(std::vector<Token> pp_tokens); // phase 7.a  std::pair<index_t, std::vector<Gram::TreeNode>> analysis(std::vector<Token>); // phase 7.b  void translate(); // phase 7.c  void instantiate(); // phase 8 | 
