diff options
| -rw-r--r-- | asm/encode.cpp | 62 | ||||
| -rw-r--r-- | asm/intel64/add.cpp | 6 | ||||
| -rw-r--r-- | asm/intel64/codes.cpp | 46 | ||||
| -rw-r--r-- | asm/intel64/codes.h | 3 | ||||
| -rw-r--r-- | asm/intel64/mov.cpp | 5 | ||||
| -rw-r--r-- | cpp.cpp | 7 | ||||
| -rw-r--r-- | flowgraph/data.h | 5 | ||||
| -rw-r--r-- | flowgraph/node.cpp | 2 | ||||
| -rw-r--r-- | flowgraph/node.h | 11 | 
9 files changed, 123 insertions, 24 deletions
| diff --git a/asm/encode.cpp b/asm/encode.cpp index ea50cb7..8bf33c0 100644 --- a/asm/encode.cpp +++ b/asm/encode.cpp @@ -1,6 +1,68 @@ +// Intel specific conversion: Abstract Graph -> Machine specific segment  #include "encode.h" +#include "asm/assembler.h" +#include "minicc.h" + +#include <boost/endian/conversion.hpp> + +#include <exception> +  void Asm::toMachineCode(const FlowGraph::Graph& graph, Segment& segment)  { + segment.clear(); + + for (const std::shared_ptr<FlowGraph::Node>& node: graph) { +  try { +   FlowGraph::BinaryOperation& op {dynamic_cast<FlowGraph::BinaryOperation&>(*node)}; + +   auto operands {op.operands()}; +   // TODO: ignore destination (0) for now + +   if (operands[1].type() != FlowGraph::DataType::Int) { +    std::runtime_error("Bad type for operand 1: "s + std::to_string(int(operands[1].type()))); +   } + +   if (operands[2].type() != FlowGraph::DataType::Int) { +    std::runtime_error("Bad type for operand 2: "s + std::to_string(int(operands[2].type()))); +   } + +   if (!operands[1].storage()) +    throw std::runtime_error("ICE: Operand 1 storage is 0"); +   if (!operands[2].storage()) +    throw std::runtime_error("ICE: Operand 2 storage is 0"); + +   uint32_t immediate1{}; +   try { +    FlowGraph::Constant& value1 {dynamic_cast<FlowGraph::Constant&>(*operands[1].storage())}; +    if (value1.value().size() < sizeof(uint32_t)) +     throw std::runtime_error("ICE: Int data from operand 1 needs at least 4 bytes, got "s + std::to_string(value1.value().size())); + +    immediate1 = boost::endian::little_to_native(*(reinterpret_cast<const uint32_t*>(value1.value().data()))); +   } catch (const std::bad_cast& ex) { +    std::runtime_error("Bad value for operand 1: Constant expected"); +   } +    +   uint32_t immediate2{}; +   try { +    FlowGraph::Constant& value2 {dynamic_cast<FlowGraph::Constant&>(*operands[2].storage())}; +    if (value2.value().size() < sizeof(uint32_t)) +     throw std::runtime_error("ICE: Int data from operand 2 needs at least 4 bytes, got "s + std::to_string(value2.value().size())); + +    immediate2 = boost::endian::little_to_native(*(reinterpret_cast<const uint32_t*>(value2.value().data()))); +   } catch (const std::bad_cast& ex) { +    std::runtime_error("Bad value for operand 2: Constant expected"); +   } + +   Asm::Args args1{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate1)}}; +   segment.push_back(makeOp("mov", args1)); +    +   Asm::Args args2{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate2)}}; +   segment.push_back(makeOp("add", args2)); + +  } catch (const std::bad_cast& ex) { +   std::runtime_error("ICE: Encoding: Unsupported node: "s + ex.what()); +  } + }  } diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp index 106ffec..4438895 100644 --- a/asm/intel64/add.cpp +++ b/asm/intel64/add.cpp @@ -12,8 +12,12 @@ Op_add::Op_add(Asm::Args& args)   if (args[0].type() == typeid(Asm::Args::Register32) &&       std::any_cast<Asm::Args::Register32>(args[0]).name() == "eax" &&       args[1].type() == typeid(Asm::Args::Immediate32)) - { // add eax, imm32 + { // add eax, imm32 (before "add reg32, imm32"! It's shorter.)    machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode(); + } else if (args[0].type() == typeid(Asm::Args::Register32) && +     args[1].type() == typeid(Asm::Args::Immediate32)) + { // add reg32, imm32 +  machine_code = std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Register32>(args[0]).name()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();   } else if (args[0].type() == typeid(Asm::Args::Register64) &&              std::any_cast<Asm::Args::Register64>(args[0]).name() == "rax"  &&              args[1].type() == typeid(Asm::Args::Immediate32)) diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp index 66a08dd..5d93a57 100644 --- a/asm/intel64/codes.cpp +++ b/asm/intel64/codes.cpp @@ -29,14 +29,14 @@ namespace {    {"dl", 2}, {"dh", 6},    {"ax", 0}, {"sp", 4}, -  {"bx", 3}, {"bp", 7}, -  {"cx", 1}, {"si", 5}, -  {"dx", 2}, {"di", 6}, +  {"bx", 3}, {"bp", 5}, +  {"cx", 1}, {"si", 6}, +  {"dx", 2}, {"di", 7},    {"eax", 0}, {"esp", 4}, -  {"ebx", 3}, {"ebp", 7}, -  {"ecx", 1}, {"esi", 5}, -  {"edx", 2}, {"edi", 6}, +  {"ebx", 3}, {"ebp", 5}, +  {"ecx", 1}, {"esi", 6}, +  {"edx", 2}, {"edi", 7},   };  } @@ -44,15 +44,26 @@ namespace {  // Manual, page 530  // Reg + Reg/Memory  uint8_t ModRM(const std::string& reg, const std::string& rm) { - // TODO: extend   uint8_t result{0b11000000}; - auto index1{ IndexOfRegister.find(reg) }; - if (index1 == IndexOfRegister.end()) -  throw std::runtime_error("Unknown register for arg1: "s + reg); - - result |= (index1->second << 3); - + size_t val_reg{}; + // reg + if (reg.size() > 0 && reg[0] == '/') { // "/digit" +  try { +   val_reg = stoull(reg.substr(1)); +  } catch (const std::exception& ex) { +   throw std::runtime_error("ModRM: Bad digit in arg1: "s + reg); +  } + } else { // reg +  auto index1{ IndexOfRegister.find(reg) }; +  if (index1 == IndexOfRegister.end()) +   throw std::runtime_error("ModRM: Unknown register for arg1: "s + reg); +  val_reg = index1->second; + } + + result |= (val_reg << 3); + + // rm   auto index2{ IndexOfRegister.find(rm) };   if (index2 == IndexOfRegister.end())    throw std::runtime_error("Unknown register for arg2: "s + rm); @@ -62,6 +73,15 @@ uint8_t ModRM(const std::string& reg, const std::string& rm) {   return result;  } +uint8_t RegNo(const std::string& reg) +{ + auto index{ IndexOfRegister.find(reg) }; + if (index == IndexOfRegister.end()) +  throw std::runtime_error("Reg: Unknown register for arg: "s + reg); + + return index->second; +} +  #if 0   prefixes{    "lock", 0xf0, diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h index 0ff17f1..112eef4 100644 --- a/asm/intel64/codes.h +++ b/asm/intel64/codes.h @@ -10,3 +10,6 @@ std::vector<uint8_t> REX(const std::string& s);  // Manual, page 530  // Reg + Reg/Memory  uint8_t ModRM(const std::string& reg, const std::string& rm); + +// Just the number of reg, e.g. for encoding inside primary opcode +uint8_t RegNo(const std::string& reg); diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp index 40a48f8..8603fc9 100644 --- a/asm/intel64/mov.cpp +++ b/asm/intel64/mov.cpp @@ -15,6 +15,8 @@ Op_mov::Op_mov(Asm::Args& args)    // r/m8, r8: ModRM:r/m (w), ModRM:reg (r)    machine_code = std::vector<uint8_t>{ 0x88 } +     ModRM(std::any_cast<Asm::Args::Register8>(args[1]).name(), std::any_cast<Asm::Args::Register8>(args[0]).name()); + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Immediate32)) { // mov reg32, imm32 +  machine_code = std::vector<uint8_t>{ static_cast<uint8_t>(0xB8 + RegNo(std::any_cast<Asm::Args::Register32>(args[0]).name())) } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();   } else {    throw std::runtime_error("Unimplemented: mov "s + args[0].type().name() + " "s + args[1].type().name());   } @@ -25,6 +27,9 @@ namespace {  bool registered {   registerOp(mangleName<Asm::Args::Register8, Asm::Args::Register8>("mov"), [](Asm::Args& args) -> std::shared_ptr<Op>{                 return std::make_shared<Op_mov>(args); +            }) && + registerOp(mangleName<Asm::Args::Register32, Asm::Args::Immediate32>("mov"), [](Asm::Args& args) -> std::shared_ptr<Op>{ +               return std::make_shared<Op_mov>(args);              })  }; @@ -1,6 +1,7 @@  #include "cpp.h"  #include "asm/encode.h" +#include "asm/operators.h"  #include "bnf.h"  #include "cppbnf.h"  #include "debug.h" @@ -584,13 +585,13 @@ void CPP::link()   // TODO   // mSegment -> elf -#if 0 - return { + + mCode = std::vector<uint8_t>{              0x48, 0xc7, 0xc0, 0x3c, 0x00, 0x00, 0x00, // mov    $0x3c,%rax     # syscall 60              0x48, 0x31, 0xff,                         // xor    %rdi,%rdi      # exit code 0 + } + mSegment.getCode() + std::vector<uint8_t>{ // add to edi              0x0f, 0x05,                               // syscall   }; -#endif  }  // phases of translation, according to standard diff --git a/flowgraph/data.h b/flowgraph/data.h index 1ed4964..abf046d 100644 --- a/flowgraph/data.h +++ b/flowgraph/data.h @@ -8,7 +8,7 @@  namespace FlowGraph {   // Explicitely not including size - enum class DataType + enum class DataType: int   {    Size,    Int, @@ -27,8 +27,9 @@ namespace FlowGraph {   class Data   {   public: -  Data(DataType type, std::shared_ptr<Storage> storage):m_type(type) {} +  Data(DataType type, std::shared_ptr<Storage> storage): m_type(type), m_storage(storage) {}    DataType type() const { return m_type; } +  std::shared_ptr<Storage> storage() { return m_storage; }   private:    const DataType m_type;    std::shared_ptr<Storage> m_storage; diff --git a/flowgraph/node.cpp b/flowgraph/node.cpp index 81217ce..795a252 100644 --- a/flowgraph/node.cpp +++ b/flowgraph/node.cpp @@ -9,7 +9,7 @@ using namespace FlowGraph;  // 4 byte for now  Data FlowGraph::MakeConstantInt(int i)  { - std::vector<uint8_t> value(size_t(4)); + std::vector<uint8_t> value(size_t(4), uint8_t(0));   *(reinterpret_cast<int32_t*>(value.data())) = boost::endian::native_to_little(static_cast<int32_t>(i));   return Data(DataType::Int, std::make_shared<Constant>(value));  } diff --git a/flowgraph/node.h b/flowgraph/node.h index 89f6088..853b017 100644 --- a/flowgraph/node.h +++ b/flowgraph/node.h @@ -17,7 +17,12 @@ namespace FlowGraph {   class Node   {   public: +  Node(){} +  Node(std::vector<Data> operands): mOperands(operands) {} +  std::vector<Data>& operands() { return mOperands; }    virtual ~Node() {}; // force class to be polymorphic (e.g. in a container) + private: +  std::vector<Data> mOperands;   };   // Memory on Heap: new and delete @@ -146,13 +151,11 @@ namespace FlowGraph {   {   public:    BinaryOperation(BinaryOperationType type, Data& destination, Data& source0, Data& source1): -   m_type(type), m_destination(destination), m_source0(source0), m_source1(source1) +   Node(std::vector<Data>({destination, source0, source1})), m_type(type)   {} +  BinaryOperationType type() {return m_type;}   private:    BinaryOperationType m_type; -  Data m_destination; -  Data m_source0; -  Data m_source1;   };  } // namespace FlowGraph | 
