diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-10-31 18:10:58 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-10-31 18:10:58 +0100 | 
| commit | 8256280b348b4b53fff35c9101ced0a8dfb2c58e (patch) | |
| tree | e745b79e98c853cf1891372e8b2a926a5d81fff5 | |
| parent | ce77838c4f32b9dc237f0c4b17d1f1e1741254d4 (diff) | |
CPP::translate() (WIP), documentation, bugfixing
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | TODO | 10 | ||||
| -rw-r--r-- | cpp.cpp | 50 | ||||
| -rw-r--r-- | cpp.h | 10 | ||||
| -rw-r--r-- | grammer.cpp | 4 | ||||
| -rw-r--r-- | grammer.h | 4 | ||||
| -rw-r--r-- | minicc.cpp | 8 | ||||
| -rw-r--r-- | minicc.h | 8 | 
8 files changed, 57 insertions, 39 deletions
| @@ -89,7 +89,7 @@ TESTSRC=\  SRC=$(PROGSRC) mcc.cpp  all: test-$(PROJECTNAME) mcc -	./test-$(PROJECTNAME) +	./test-$(PROJECTNAME) # --gtest_filter='*preprocessing_tokenize*'  # testsuite ----------------------------------------------  test-$(PROJECTNAME): $(TESTSRC:.cpp=.o) @@ -1,9 +1 @@ - - Token() = default; - Token(const std::string& s) { type = s; } // Assign type via "=" from string - -start symbol implicitly from bnf -validate bnf: no empty types or values, or empty target lists - -map -> unordered_map -set -> unordered_set +Update cppbnf.cpp to n4860 @@ -11,6 +11,7 @@  #include <gmock/gmock.h>  #include <functional> +#include <optional>  #include <unordered_set>  #include <unordered_map>  #include <filesystem> @@ -19,7 +20,16 @@ using namespace Gram;  namespace fs = std::filesystem; -CPP::CPP(){} +CPP::CPP(): map_translation_unit ({ + {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", +  [&](fs::path& path, index_t node_id) +   { +    //std::cout << "DEBUG: " << path << ", " << node_id << ", " << valueOfNode(node_id, m_nodes) << ", " << m_nodes[node_id].node_id << ", " << m_nodes[node_id].pos.node_id << std::endl; +   } + }, +}) +{ +}  CPP::~CPP(){} @@ -66,6 +76,8 @@ void CPP::concatenate_strings()  std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tree)  {   std::string result; + std::optional<size_t> pos0; + index_t last_index;   std::vector<int32_t> todo(1, int32_t(node_index)); @@ -75,7 +87,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr    // visit node if token    if (ChildIdIsToken(current_index)) { -   result += m_code[TokenIdFromChildId(current_index)]; +   if (!pos0) { +    pos0 = m_tokens[TokenIdFromChildId(current_index)].location.pos; +   } +   last_index = TokenIdFromChildId(current_index);    } else {     const TreeNode &node{Tree[current_index]}; @@ -87,7 +102,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr    }   } - return result; + if (!pos0) +  throw std::runtime_error("ICE: Node value not available"); + + return m_code.substr(*pos0, m_tokens[last_index].location.pos - *pos0) + m_tokens[last_index].value;  };  namespace { @@ -203,16 +221,16 @@ std::vector<Token> CPP::tokens_from_pptokens(const std::vector<Token>& pp_tokens    if (pp_types.find(token.type) != pp_types.end()) {     if (token.type == "identifier") {      if (keywords.find(token.value) != keywords.end()) -     result.emplace_back(Token{token.value, token.value}); +     result.emplace_back(Token{token.value, token.value, token.location});      else -     result.emplace_back(Token{"identifier"s, token.value}); +     result.emplace_back(Token{"identifier"s, token.value, token.location});     }     else if (token.type == "preprocessing-op-or-punc") -    result.emplace_back(Token{token.value, token.value}); +    result.emplace_back(Token{token.value, token.value, token.location});     else -    result.emplace_back(Token{"literal", token.value}); +    result.emplace_back(Token{"literal", token.value, token.location});    } else -   throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s); +   throw std::runtime_error("Unhandled preprocessing token: "s + token.toString());   }   return result;  } @@ -227,14 +245,6 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens)   return compiler.compile(tokens);  } -namespace { - - CPP::map_type map_translation_unit { -  {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", [](){}}, - }; - -} // anonymous namespace -  void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path)  {   fs::path current_path{parent_path / m_nodes[node_id].type}; @@ -242,7 +252,7 @@ void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path)   // execute callbacks   auto it{map.find(current_path.generic_string())};   if (it != map.end()) { -  std::cout << "DEBUG: Found " << current_path << std::endl; +  it->second(current_path, node_id);   }   // recurse tree @@ -277,6 +287,8 @@ void CPP::link()  // phases of translation, according to standard  void CPP::compile(const std::string& code)  { + m_code = code; +   source_charset_map(); // phase 1   backslash_escape(); // phase 2 @@ -289,8 +301,8 @@ void CPP::compile(const std::string& code)   concatenate_strings(); // phase 6 - auto tokens = tokens_from_pptokens(pp_tokens); // phase 7a - m_nodes = analysis(tokens); // phase 7b + m_tokens = tokens_from_pptokens(pp_tokens); // phase 7a + m_nodes = analysis(m_tokens); // phase 7b   translate(); // phase 7c   instantiate(); // phase 8 @@ -33,13 +33,15 @@ public:   std::vector<uint8_t> getCode();   std::vector<uint8_t> getData(); - typedef std::unordered_map<std::string, std::function<void()>> map_type; -   private: - std::string m_code; // input / start - std::vector<Token> m_charTokens; // result of phase 3 + typedef std::unordered_map<std::string, std::function<void(fs::path&, index_t)>> map_type; + + std::string m_code; // input from compile() + std::vector<Token> m_tokens; // result of phase 7.a   std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b   void traverse(index_t node_id, map_type& map, fs::path parent_path = "/"); +  + CPP::map_type map_translation_unit;  }; diff --git a/grammer.cpp b/grammer.cpp index d7afaef..31a4bbf 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -74,7 +74,9 @@ void Compiler::DumpTree()     std::string line(indent, ' ');     if (ChildIdIsToken(current_index)) {      index_t token_id {TokenIdFromChildId(current_index)}; -    line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s; +    line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type; +    if (tokens[token_id].value != tokens[token_id].type) +     line += "("s + tokens[token_id].value + ")"s;     } else {      auto& node {nodes[current_index]};      line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant); @@ -22,8 +22,8 @@ struct NodePosition {  // token_id: index into token list  // node_id: index into tree node list  struct TreeNode { - NodePosition pos; // position of this node in tree - index_t node_id{}; + NodePosition pos; // position of this node in tree (i.e. parent node_id + child_pos in parent) + index_t node_id{}; // this node's id   std::string type;   index_t variant; // bnf[type][variant] @@ -50,7 +50,13 @@ void Location::advance(bool newline)   }  } -std::string Location::toString() +std::string Location::toString() const  {   return std::to_string(line) + ":"s + std::to_string(column);  } + +std::string Token::toString() const +{ + return location.toString() + ": "s + value + " ("s + type + ")"s; +} + @@ -1,3 +1,5 @@ +// Common definitions +  #pragma once  #include <cstdlib> @@ -19,8 +21,8 @@ struct Location {   size_t column{1};   size_t pos{0}; - void advance(bool newline = false); - std::string toString(); + void advance(bool newline = false); ///< advance 1 char + std::string toString() const;  };  bool operator==(const Location &a, const Location &b); @@ -29,6 +31,8 @@ struct Token {   std::string type;   std::string value;   Location location; + + std::string toString() const;  };  // For printing via Google Test | 
