diff options
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | bnf.cpp | 21 | ||||
| -rw-r--r-- | bnf.h | 17 | ||||
| -rw-r--r-- | grammer.cpp | 14 | ||||
| -rw-r--r-- | grammer.h | 19 | ||||
| -rw-r--r-- | lexer.cpp | 292 | ||||
| -rw-r--r-- | lexer.h | 56 | ||||
| -rw-r--r-- | minicc.cpp | 471 | ||||
| -rw-r--r-- | minicc.h | 26 | ||||
| -rw-r--r-- | test-lexer.cpp | 97 | 
10 files changed, 551 insertions, 466 deletions
| @@ -44,6 +44,10 @@ endif  SRC=\      minicc.cpp \ +    lexer.cpp \ +    grammer.cpp \ +    bnf.cpp \ +    test-lexer.cpp \      googletest/src/gtest-all.cpp \      googlemock/src/gmock-all.cpp @@ -0,0 +1,21 @@ +#include "bnf.h" + +std::map<std::string, std::set<std::string>> Reverse(BNF bnf){ + std::map<std::string, std::set<std::string>> result; + + for (const auto& [from, to] : bnf) { +  for (const auto& list : to) { +   for (const auto& element : list) { +    auto i{result.find(element)}; +    if (i != result.end()) // already present +     i->second.insert(from); +    else // new element +     result.emplace(element, std::set{from}); +   } +  } + } + + return result; +} + + @@ -0,0 +1,17 @@ +#pragma once + +#include <deque> +#include <map> +#include <set> +#include <string> +#include <utility> +#include <vector> + +using namespace std::string_literals; + +using BNF = std::map<std::string, std::vector<std::vector<std::string>>>; +using Terminals = std::set<std::string>; +using ProgramNode = std::deque<std::string>; +using PathElement = std::pair<std::string, size_t>; // Name, Index + +std::map<std::string, std::set<std::string>> Reverse(BNF bnf); diff --git a/grammer.cpp b/grammer.cpp new file mode 100644 index 0000000..a44104c --- /dev/null +++ b/grammer.cpp @@ -0,0 +1,14 @@ +#include "grammer.h" + +Compiler::Compiler(const BNF& bnf, const std::string& Top): m_bnf(bnf), m_Top(Top), ReverseBNF{Reverse(bnf)} +{ +} + +ProgramNode Compiler::compile(std::vector<Token> Tokens) +{ + if (Tokens.size()){ + } else +  throw std::runtime_error("No tokens!"); + + return {}; +} diff --git a/grammer.h b/grammer.h new file mode 100644 index 0000000..5e76c60 --- /dev/null +++ b/grammer.h @@ -0,0 +1,19 @@ +#pragma once + +#include "bnf.h" +#include "minicc.h" + +class Compiler +{ + +private: + const BNF &m_bnf; + const std::string& m_Top; + + std::map<std::string, std::set<std::string>> ReverseBNF; + +public: + Compiler(const BNF& bnf, const std::string& Top); + ProgramNode compile(std::vector<Token> Tokens); +}; + diff --git a/lexer.cpp b/lexer.cpp new file mode 100644 index 0000000..46a38bd --- /dev/null +++ b/lexer.cpp @@ -0,0 +1,292 @@ +#include "lexer.h" + +void Tree::clear() { + nodes.clear(); + root = 0; + last = 0; + node_num = 0; +} + +// Type of lexical token +std::string Tree::GetType() { + if (node_num > 0 && nodes[root].child_names.size() == 1) +  return nodes[root].child_names[0]; + return ""; +} + +bool Tree::Valid(const std::string& Top) const { + // A token is non empty + if (node_num == 0) +  return false; + + // Start symbol on top + auto rootNode{nodes.find(root)}; + if (rootNode == nodes.end()) +  throw std::runtime_error("Node not found: "s + std::to_string(root)); +  + if (rootNode->second.name != Top) +  return false; + + // All nodes filled (implies all leaves are terminal) + for (const auto& [index, node]: nodes) { +  if (node.childs.size() < node.child_names.size()) +   return false; // node not filled + } + + return true; +} + +bool Tree::AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + node_num ++; + root = node_num; + last = node_num; + std::string node_name(1, char(c)); + + auto reverseRule{reverseBNF.find(node_name)}; + if (reverseRule == reverseBNF.end()) +  throw std::runtime_error("Reverse rule not found for "s + node_name); + + auto rule{bnf.find(node_name)}; + if (rule != bnf.end()) { // multiple variants! +  throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); + } + nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); + return true; +} + +std::vector<TreeNode> Tree::getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + std::vector<TreeNode> result; // default: empty + + auto& root_name {nodes[root].name}; + auto bnfParents {reverseBNF.find(root_name)}; + if (bnfParents == reverseBNF.end()) +  return result; + + for (const auto& parent_node_name : bnfParents->second) { +  auto lists {bnf.at(parent_node_name)}; +  for (const auto& list : lists) { +   if (list.size() > 0 && list[0] == root_name) { +    TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name}; +    result.push_back(node); +   } +  } + } + + return result; +} + +index_t Tree::GetLast() { + index_t result {root}; + + while(result != 0 && nodes[result].childs.size() >= 2) { +  result = nodes[result].childs[nodes[result].childs.size() - 1]; + } + + return result; +} + +void Tree::AddRootNode(const TreeNode& newRootNode) { + node_num++; + nodes[node_num] = newRootNode; + root = node_num; + last = node_num; +} + +void Tree::RemoveRootNode() { + root = nodes[root].childs[0]; + nodes.erase(node_num); + node_num--; + last = GetLast(); +} + +// Path from leaf to root +std::vector<std::string> Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + std::vector<std::string> result; + + while (a != b) { +  auto parents {reverseBNF.find(a)}; +  if (parents == reverseBNF.end()) +   return {}; + +  bool hit{false}; +  for (const auto& parent : parents->second) { +   for (const auto& list : bnf.at(parent)) { +    if (list.size() > 0 && list[0] == a) { +     if (!hit) { +      result.push_back(a); +      a = parent; +      hit = true; +     } else +      throw std::runtime_error("Double match for "s + parent + "/"s + a); +    } +   } +  } + } + if (a == b) { +  result.push_back(a); + } + return result; +} + +index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) +{ + TreeNode& parent {nodes[parent_index]}; + node_num++; + index_t index = node_num; + parent.childs.push_back(index); + std::vector<std::string> child_names; + auto rule {bnf.find(name)}; + if (rule != bnf.end()) { +  for (auto& list : rule->second) { +   if (list.size() > 0 && list[0] == child_name) +    child_names = list; +  } + } + nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); + //root stays + last = GetLast(); + + return index; +} + +void Tree::AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + for (int i = path.size() - 1; i >= 0; i--) { +  std::string child_name; +  if (i > 0) +   child_name = path[i - 1]; +  current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); + } +} + +// try to add character to tree +bool Tree::Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + if (nodes.empty()) { // first node +  return AddFirstNode(c, bnf, reverseBNF); + } else { // at least one character is already present +  // Traverse tree until partially filled node found +  // or new node can be added +  index_t current_index{last}; + +  while (current_index != 0) { +   TreeNode& node {nodes[current_index]}; +   if (node.childs.size() < node.child_names.size()) { // partially filled node +    std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); +    if (list.size() > 0) { +     AddPath(list, current_index, bnf, reverseBNF); +     return true; +    } else { +     return false; // The path a->b is not available via bnf +    } +   } +   current_index = node.parent; +  } + +  // Add node at root + +  std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF); +  if (parent_nodes.size() == 0) +   throw std::runtime_error("Couldn't add new parent node."); + +  for (const auto &i : parent_nodes) { +   AddRootNode(i); +   if (Add(c, bnf, reverseBNF)) +    return true; +   RemoveRootNode(); +  } + + } + return false; +} + +// add path to start symbol +void Tree::Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { + if (nodes.empty()) // only handle non-empty trees +  return; + + while (true) { +  std::string& old_root_name { nodes[root].name }; // current root node name + +  auto parents {reverseBNF.find(old_root_name)}; +  if (parents != reverseBNF.end()) { // parents in bnf available +   bool hit{false}; +   for (auto& parent : parents->second) { +    for (const auto& list : bnf.at(parent)) { +     if (list.size() == 1 && list[0] == old_root_name) { +      if (!hit) { +       const std::string& new_root_name {parent}; +       // Add new TreeNode in the direction to root: +       // New root with 1 child (old root) +       nodes.emplace(++node_num, +                    TreeNode{0, // parent +                             std::vector<index_t>{root}, // child indices +                             std::vector<std::string>{old_root_name}, // child names +                             new_root_name // name +                    }); +       nodes[root].parent = node_num; +       root = node_num; +       // this->last stays +       hit = true; +      } else +       throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); +     } +    } +   } +   if (!hit) +    break; +  } else +   break; + } +} + +void Lexer::FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result) +{ + tree.Resolve(bnf, ReverseBNF); + if (tree.Valid(Top)) { +  result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); +  token.clear(); + } + tree.clear(); +} + +Lexer::Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} +{ +} + +std::vector<Token> Lexer::Lex(const std::string& s) +{ + std::vector<Token> result; + std::string token; + + std::string Whitespace{"\t \n\r"}; + Tree tree; + + for (size_t pos{0}; pos < s.size(); pos++) { +  char c{s[pos]}; +  if (c == '\n') { +   location.column = 0; +   location.line++; +  } else if (std::isprint(c)) { +   location.column++; +  } + +  //std::cout << "Char: |" << c << "|" << std::endl; +  if (Whitespace.find(c) != std::string::npos) { // found whitespace character +   // evaluate token up to now and skip whitespace +   FinalizeTree(tree, token, result); +  } else { // no whitespace: try to add to tree +   if (!tree.Add(c, bnf, ReverseBNF)) { +    FinalizeTree(tree, token, result); +    if (!tree.Add(c, bnf, ReverseBNF)) +     throw std::runtime_error("Parse error"); +   } + +   token.push_back(c); +  } + } + + // Final evaluation of last token + FinalizeTree(tree, token, result); + + return result; +} + @@ -0,0 +1,56 @@ +#pragma once + +#include "minicc.h" +#include "bnf.h" + +struct TreeNode { + index_t parent{}; + std::vector<index_t> childs; // fill char by char + std::vector<std::string> child_names; // fill always + std::string name; +}; + +class Tree { +private: + std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1 + index_t node_num{}; + index_t root{}; + index_t last{}; + +public: + void clear(); + std::string GetType(); + bool Valid(const std::string& Top) const; + bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + index_t GetLast(); + void AddRootNode(const TreeNode& newRootNode); + void RemoveRootNode(); + std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); + void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF); +}; + +class Lexer +{ + +private: + const BNF &bnf; + const std::string& Top; + + Location location{1, 0}; + + std::map<std::string, std::set<std::string>> ReverseBNF; + + // to be called on token end + void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result); + +public: + Lexer(const BNF& bnf, const std::string& Top); + std::vector<Token> Lex(const std::string& s); + +}; + + @@ -1,7 +1,9 @@ -#include <boost/algorithm/string.hpp> +#include "bnf.h" +#include "lexer.h" +#include "grammer.h" +#include "minicc.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" +#include <boost/algorithm/string.hpp>  #include <algorithm>  #include <cctype> @@ -12,13 +14,6 @@  #include <utility>  #include <vector> -using namespace std::string_literals; - -using BNF = std::map<std::string, std::vector<std::vector<std::string>>>; -using Terminals = std::set<std::string>; -using ProgramNode = std::deque<std::string>; -using PathElement = std::pair<std::string, size_t>; // Name, Index -  std::vector<std::string> split(std::string s)  {   std::vector<std::string> result; @@ -28,297 +23,11 @@ std::vector<std::string> split(std::string s)   return result;  } -auto Reverse(BNF bnf){ - std::map<std::string, std::set<std::string>> result; - - for (const auto& [from, to] : bnf) { -  for (const auto& list : to) { -   for (const auto& element : list) { -    auto i{result.find(element)}; -    if (i != result.end()) // already present -     i->second.insert(from); -    else // new element -     result.emplace(element, std::set{from}); -   } -  } - } - - return result; -} - -using index_t = size_t; - -struct TreeNode { - index_t parent{}; - std::vector<index_t> childs; // fill char by char - std::vector<std::string> child_names; // fill always - std::string name; -}; - -class Tree { -private: - std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1 - index_t node_num{}; - index_t root{}; - index_t last{}; - -public: - void clear() { -  nodes.clear(); -  root = 0; -  last = 0; -  node_num = 0; - } - - // Type of lexical token - std::string GetType() { -  if (node_num > 0 && nodes[root].child_names.size() == 1) -   return nodes[root].child_names[0]; -  return ""; - } - - bool Valid(const std::string& Top) const { -  // A token is non empty -  if (node_num == 0) -   return false; - -  // Start symbol on top -  auto rootNode{nodes.find(root)}; -  if (rootNode == nodes.end()) -   throw std::runtime_error("Node not found: "s + std::to_string(root)); -   -  if (rootNode->second.name != Top) -   return false; - -  // All nodes filled (implies all leaves are terminal) -  for (const auto& [index, node]: nodes) { -   if (node.childs.size() < node.child_names.size()) -    return false; // node not filled -  } - -  return true; - } - - bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  node_num ++; -  root = node_num; -  last = node_num; -  std::string node_name(1, char(c)); - -  auto reverseRule{reverseBNF.find(node_name)}; -  if (reverseRule == reverseBNF.end()) -   throw std::runtime_error("Reverse rule not found for "s + node_name); - -  auto rule{bnf.find(node_name)}; -  if (rule != bnf.end()) { // multiple variants! -   throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); -  } -  nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); -  return true; - } - - std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  std::vector<TreeNode> result; // default: empty - -  auto& root_name {nodes[root].name}; -  auto bnfParents {reverseBNF.find(root_name)}; -  if (bnfParents == reverseBNF.end()) -   return result; - -  for (const auto& parent_node_name : bnfParents->second) { -   auto lists {bnf.at(parent_node_name)}; -   for (const auto& list : lists) { -    if (list.size() > 0 && list[0] == root_name) { -     TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name}; -     result.push_back(node); -    } -   } -  } - -  return result; - } - - index_t GetLast() { -  index_t result {root}; - -  while(result != 0 && nodes[result].childs.size() >= 2) { -   result = nodes[result].childs[nodes[result].childs.size() - 1]; -  } - -  return result; - } - - void AddRootNode(const TreeNode& newRootNode) { -  node_num++; -  nodes[node_num] = newRootNode; -  root = node_num; -  last = node_num; - } - - void RemoveRootNode() { -  root = nodes[root].childs[0]; -  nodes.erase(node_num); -  node_num--; -  last = GetLast(); - } - - // Path from leaf to root - std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  std::vector<std::string> result; - -  while (a != b) { -   auto parents {reverseBNF.find(a)}; -   if (parents == reverseBNF.end()) -    return {}; - -   bool hit{false}; -   for (const auto& parent : parents->second) { -    for (const auto& list : bnf.at(parent)) { -     if (list.size() > 0 && list[0] == a) { -      if (!hit) { -       result.push_back(a); -       a = parent; -       hit = true; -      } else -       throw std::runtime_error("Double match for "s + parent + "/"s + a); -     } -    } -   } -  } -  if (a == b) { -   result.push_back(a); -  } -  return result; - } - - index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) - { -  TreeNode& parent {nodes[parent_index]}; -  node_num++; -  index_t index = node_num; -  parent.childs.push_back(index); -  std::vector<std::string> child_names; -  auto rule {bnf.find(name)}; -  if (rule != bnf.end()) { -   for (auto& list : rule->second) { -    if (list.size() > 0 && list[0] == child_name) -     child_names = list; -   } -  } -  nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); -  //root stays -  last = GetLast(); - -  return index; - } - - void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  for (int i = path.size() - 1; i >= 0; i--) { -   std::string child_name; -   if (i > 0) -    child_name = path[i - 1]; -   current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); -  } - } - - // try to add character to tree - bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  if (nodes.empty()) { // first node -   return AddFirstNode(c, bnf, reverseBNF); -  } else { // at least one character is already present -   // Traverse tree until partially filled node found -   // or new node can be added -   index_t current_index{last}; - -   while (current_index != 0) { -    TreeNode& node {nodes[current_index]}; -    if (node.childs.size() < node.child_names.size()) { // partially filled node -     std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); -     if (list.size() > 0) { -      AddPath(list, current_index, bnf, reverseBNF); -      return true; -     } else { -      return false; // The path a->b is not available via bnf -     } -    } -    current_index = node.parent; -   } - -   // Add node at root - -   std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF); -   if (parent_nodes.size() == 0) -    throw std::runtime_error("Couldn't add new parent node."); - -   for (const auto &i : parent_nodes) { -    AddRootNode(i); -    if (Add(c, bnf, reverseBNF)) -     return true; -    RemoveRootNode(); -   } - -  } -  return false; - } - - // add path to start symbol - void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { -  if (nodes.empty()) // only handle non-empty trees -   return; - -  while (true) { -   std::string& old_root_name { nodes[root].name }; // current root node name - -   auto parents {reverseBNF.find(old_root_name)}; -   if (parents != reverseBNF.end()) { // parents in bnf available -    bool hit{false}; -    for (auto& parent : parents->second) { -     for (const auto& list : bnf.at(parent)) { -      if (list.size() == 1 && list[0] == old_root_name) { -       if (!hit) { -        const std::string& new_root_name {parent}; -        // Add new TreeNode in the direction to root: -        // New root with 1 child (old root) -        nodes.emplace(++node_num, -                     TreeNode{0, // parent -                              std::vector<index_t>{root}, // child indices -                              std::vector<std::string>{old_root_name}, // child names -                              new_root_name // name -                     }); -        nodes[root].parent = node_num; -        root = node_num; -        // this->last stays -        hit = true; -       } else -        throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); -      } -     } -    } -    if (!hit) -     break; -   } else -    break; -  } - } - -}; - -struct Location { - size_t line; - size_t column; -}; -  bool operator==(const Location &a, const Location &b)  {   return (a.line == b.line && a.column == b.column);  } -struct Token { - std::string type; - std::string value; - Location location; -}; -  bool operator==(const Token &a, const Token &b)  {   return (a.type == b.type && a.value == b.value && a.location == b.location); @@ -328,173 +37,3 @@ std::ostream& operator<<(std::ostream& os, const Token& token) {   return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")";  } -class Lexer -{ - -private: - const BNF &bnf; - const std::string& Top; - - Location location{1, 0}; - - std::map<std::string, std::set<std::string>> ReverseBNF; - - // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result) - { -  tree.Resolve(bnf, ReverseBNF); -  if (tree.Valid(Top)) { -   result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); -   token.clear(); -  } -  tree.clear(); - } - -public: - Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - std::vector<Token> Lex(const std::string& s) - { -  std::vector<Token> result; -  std::string token; - -  std::string Whitespace{"\t \n\r"}; -  Tree tree; - -  for (size_t pos{0}; pos < s.size(); pos++) { -   char c{s[pos]}; -   if (c == '\n') { -    location.column = 0; -    location.line++; -   } else if (std::isprint(c)) { -    location.column++; -   } - -   //std::cout << "Char: |" << c << "|" << std::endl; -   if (Whitespace.find(c) != std::string::npos) { // found whitespace character -    // evaluate token up to now and skip whitespace -    FinalizeTree(tree, token, result); -   } else { // no whitespace: try to add to tree -    if (!tree.Add(c, bnf, ReverseBNF)) { -     FinalizeTree(tree, token, result); -     if (!tree.Add(c, bnf, ReverseBNF)) -      throw std::runtime_error("Parse error"); -    } - -    token.push_back(c); -   } -  } - -  // Final evaluation of last token -  FinalizeTree(tree, token, result); - -  return result; - } - -}; - -class Compiler -{ - -private: - const BNF &bnf; - const std::string& Top; - - std::map<std::string, std::set<std::string>> ReverseBNF; - -public: - Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - ProgramNode compile(std::vector<Token> Tokens) - { -  if (Tokens.size()){ -  } else -   throw std::runtime_error("No tokens!"); - -  return {}; - } -}; - -class Test: public ::testing::Test { -protected: - Test(){} - ~Test() override {} -}; - -TEST_F(Test, BNF) { - std::string LexTop{"preprocessing-token"}; - BNF LexBNF{ -  {"preprocessing-token", {{"identifier"}, -                           {"preprocessing-op-or-punc"}, -                           {"pp-number"}}}, - -  {"identifier", {{"identifier-nondigit"}, -                  {"identifier", "identifier-nondigit"}, -                  {"identifier", "digit"}}}, -  {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, -  {"identifier-nondigit", -    {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, -     {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, -     {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, -     {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, -  {"preprocessing-op-or-punc", {{";"}, -                                {"="}}}, -  {"pp-number", {{"digit"}, -                 {"pp-number", "digit"}}} - }; - - std::string Top{"program"}; - BNF bnf{ -  {"program", {{"statement-list"}}}, -  {"statement-list", {{"statement", "statement-list"}, -                      {}, }}, -  {"statement", {{"assigmnent", ";"}}}, -  {"assignment", {{"identifier", "=", "identifier"}}} - }; - - // implicit? - //std::set<std::string> Terminals{"identifier", "=", ";"}; - - std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; - std::vector<Token> tokens_reference{ -  {"identifier", "a", { 1, 1} }, -  {"preprocessing-op-or-punc", "=", { 1, 3}}, -  {"identifier", "bc", { 1, 5}}, -  {"preprocessing-op-or-punc", ";", { 1, 8}}, -  {"identifier", "c", { 1, 10}}, -  {"preprocessing-op-or-punc", "=", { 1, 12}}, -  {"pp-number", "123", { 1, 14}}, -  {"preprocessing-op-or-punc", ";", { 1, 18}}, -  {"identifier", "esd", { 1, 20}}, -  {"preprocessing-op-or-punc", "=", { 1, 24}}, -  {"identifier", "Ff", { 1, 26}}, -  {"preprocessing-op-or-punc", ";", { 1, 29}}, -  {"pp-number", "1", { 1, 31}}, -  {"preprocessing-op-or-punc", "=", { 1, 33}}, -  {"identifier", "XYZ", { 1, 34}}, - }; - - Lexer lexer(LexBNF, LexTop); - auto tokens = lexer.Lex(Code); - - ASSERT_EQ(tokens, tokens_reference); -#if 0 - for (const auto& i: tokens) { -  std::cout << i.value << std::endl; - } -#endif - Compiler compiler(bnf, Top); - auto Program = compiler.compile(tokens); - - //ASSERT_EQ(Program, Program_reference); -} - -int main(int argc, char* argv[]) { - ::testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/minicc.h b/minicc.h new file mode 100644 index 0000000..0500b8c --- /dev/null +++ b/minicc.h @@ -0,0 +1,26 @@ +#pragma once + +#include <cstdlib> +#include <vector> +#include <string> +#include <iostream> + +using index_t = size_t; + +std::vector<std::string> split(std::string s); + +struct Location { + size_t line; + size_t column; +}; + +bool operator==(const Location &a, const Location &b); + +struct Token { + std::string type; + std::string value; + Location location; +}; + +bool operator==(const Token &a, const Token &b); +std::ostream& operator<<(std::ostream& os, const Token& token); diff --git a/test-lexer.cpp b/test-lexer.cpp new file mode 100644 index 0000000..39db3ec --- /dev/null +++ b/test-lexer.cpp @@ -0,0 +1,97 @@ +#include "bnf.h" +#include "lexer.h" +#include "grammer.h" +#include "minicc.h" + +#include <boost/algorithm/string.hpp> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include <algorithm> +#include <cctype> +#include <deque> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +class Test: public ::testing::Test { +protected: + Test(){} + ~Test() override {} +}; + +TEST_F(Test, BNF) { + std::string LexTop{"preprocessing-token"}; + BNF LexBNF{ +  {"preprocessing-token", {{"identifier"}, +                           {"preprocessing-op-or-punc"}, +                           {"pp-number"}}}, + +  {"identifier", {{"identifier-nondigit"}, +                  {"identifier", "identifier-nondigit"}, +                  {"identifier", "digit"}}}, +  {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, +  {"identifier-nondigit", +    {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, +     {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, +     {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, +     {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, +  {"preprocessing-op-or-punc", {{";"}, +                                {"="}}}, +  {"pp-number", {{"digit"}, +                 {"pp-number", "digit"}}} + }; + + std::string Top{"program"}; + BNF bnf{ +  {"program", {{"statement-list"}}}, +  {"statement-list", {{"statement", "statement-list"}, +                      {}, }}, +  {"statement", {{"assigmnent", ";"}}}, +  {"assignment", {{"identifier", "=", "identifier"}}} + }; + + // implicit? + //std::set<std::string> Terminals{"identifier", "=", ";"}; + + std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; + std::vector<Token> tokens_reference{ +  {"identifier", "a", { 1, 1} }, +  {"preprocessing-op-or-punc", "=", { 1, 3}}, +  {"identifier", "bc", { 1, 5}}, +  {"preprocessing-op-or-punc", ";", { 1, 8}}, +  {"identifier", "c", { 1, 10}}, +  {"preprocessing-op-or-punc", "=", { 1, 12}}, +  {"pp-number", "123", { 1, 14}}, +  {"preprocessing-op-or-punc", ";", { 1, 18}}, +  {"identifier", "esd", { 1, 20}}, +  {"preprocessing-op-or-punc", "=", { 1, 24}}, +  {"identifier", "Ff", { 1, 26}}, +  {"preprocessing-op-or-punc", ";", { 1, 29}}, +  {"pp-number", "1", { 1, 31}}, +  {"preprocessing-op-or-punc", "=", { 1, 33}}, +  {"identifier", "XYZ", { 1, 34}}, + }; + + Lexer lexer(LexBNF, LexTop); + auto tokens = lexer.Lex(Code); + + ASSERT_EQ(tokens, tokens_reference); +#if 0 + for (const auto& i: tokens) { +  std::cout << i.value << std::endl; + } +#endif + Compiler compiler(bnf, Top); + auto Program = compiler.compile(tokens); + + //ASSERT_EQ(Program, Program_reference); +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleMock(&argc, argv); + return RUN_ALL_TESTS(); +} | 
