#pragma once #include "bnf.h" #include "minicc.h" #include #include #include class GrammerTest; namespace Gram { struct NodePosition { index_t node_id{}; // 0-based index_t child_pos{}; // 0-based }; // TreeNodes are only intermediate. Terminal symbols don't get of TreeNodes // token_id: index into token list // node_id: index into tree node list struct TreeNode { NodePosition pos; // position of this node in tree index_t node_id{}; std::string type; index_t variant; // bnf[type][variant] std::vector child_ids; // < 0: terminal: token_id; > 0: non-terminal: node_id; = 0: unset }; class Compiler { private: // The result std::vector nodes; // Input std::vector tokens; BNF &bnf; // not const for access via operator[] const std::string m_top; std::unordered_map> ReverseBNF; // possible parent types of a given type; unused now: remove? std::unordered_map> reversedFirst; // possible parent types of first childs of a given type // Tree specific void clear(); // Node specific std::string GetTypeOfNode(index_t node_id) const; index_t AddNode(const std::string& type, index_t variant, NodePosition pos = {}); void RemoveNode(); // Adds Node and Removes it on scope exit (RAII) class AddNodeGuard { Compiler& m_compiler; public: AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos); ~AddNodeGuard(); }; void IncNodePosition(NodePosition& pos); NodePosition begin_pos; // top-down algorithm std::unordered_map m_min; // cache size_t minimumSymbolsNeeded(std::string symbol); size_t minimumSymbolsNeeded(std::vector symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector symbol_list, size_t begin, size_t end); public: Compiler(BNF& bnf, const std::string& top); std::pair> compile(std::vector p_tokens); void DumpTree(); friend class ::GrammerTest; }; bool ChildIdIsToken(int32_t child_id); index_t TokenIdFromChildId(int32_t child_id); int32_t ChildIdFromTokenId(index_t token_id); } // namespace Gram