diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-10-18 16:59:54 +0200 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-10-18 16:59:54 +0200 | 
| commit | 8a2d1dc5c8b6639985d26d1c915048d87d52426b (patch) | |
| tree | 2f3957a1c24ef35b4ec9259a6a0d97393b248a57 | |
| parent | 8f28495ab9a8ebf53868405541e907394895e51f (diff) | |
Added xor, mov, jmp
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | asm/assembler.cpp | 2 | ||||
| -rw-r--r-- | asm/assembler.h | 124 | ||||
| -rw-r--r-- | asm/chunk.h | 17 | ||||
| -rw-r--r-- | asm/intel64/add.cpp | 25 | ||||
| -rw-r--r-- | asm/intel64/codes.cpp | 63 | ||||
| -rw-r--r-- | asm/intel64/codes.h | 6 | ||||
| -rw-r--r-- | asm/intel64/int.cpp | 4 | ||||
| -rw-r--r-- | asm/intel64/jmp.cpp | 103 | ||||
| -rw-r--r-- | asm/intel64/jmp.h | 31 | ||||
| -rw-r--r-- | asm/intel64/mov.cpp | 31 | ||||
| -rw-r--r-- | asm/intel64/mov.h | 31 | ||||
| -rw-r--r-- | asm/intel64/xor.cpp | 31 | ||||
| -rw-r--r-- | asm/intel64/xor.h | 31 | ||||
| -rw-r--r-- | asm/segment.cpp | 9 | ||||
| -rw-r--r-- | asm/segment.h | 3 | ||||
| -rw-r--r-- | intel.cpp | 437 | ||||
| -rw-r--r-- | test-asm.cpp | 10 | 
18 files changed, 457 insertions, 504 deletions
| @@ -49,8 +49,11 @@ PROGSRC=\      asm/chunk.cpp \      asm/intel64/add.cpp \      asm/intel64/int.cpp \ +    asm/intel64/jmp.cpp \ +    asm/intel64/mov.cpp \      asm/intel64/nop.cpp \      asm/intel64/ret.cpp \ +    asm/intel64/xor.cpp \      asm/intel64/codes.cpp \      asm/operators.cpp \      asm/segment.cpp \ diff --git a/asm/assembler.cpp b/asm/assembler.cpp index d6ab230..5c879b1 100644 --- a/asm/assembler.cpp +++ b/asm/assembler.cpp @@ -11,7 +11,7 @@ std::unordered_map<std::string, FactoryFunction> ops;  bool registerOp(const std::string& mnemonic, FactoryFunction f)  {   if (ops.contains(mnemonic)) { -  std::cout << "Warning: mnemonic |" << mnemonic << "| already registered." << std::endl; +  std::cerr << "Warning: mnemonic |" << mnemonic << "| already registered." << std::endl;    return false;   } diff --git a/asm/assembler.h b/asm/assembler.h index 3d3e9a9..52c3da5 100644 --- a/asm/assembler.h +++ b/asm/assembler.h @@ -10,8 +10,84 @@  #include <memory>  #include <string>  #include <unordered_map> +#include <vector> + +// TODO: namespace Asm, e.g. AsmArgs -> Asm::Args + +class AsmArgs: public std::vector<std::any> +{ +public: + AsmArgs(){} + AsmArgs(const std::vector<std::any>& args): std::vector<std::any>(args){} + + class Immediate8 + { + public: +  Immediate8(uint8_t value): m_value(value) {} +  uint8_t value() {return m_value;} +  std::vector<uint8_t> getCode() {return {m_value};}; + + private: +  uint8_t m_value; + }; + + class Immediate32 + { + public: +  Immediate32(uint32_t value): m_value(value) {} +  uint32_t value() { return m_value; } +  std::vector<uint8_t> getCode() { +   std::vector<uint8_t> result(size_t(4)); +   *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value); +   return result; +  }; + + private: +  uint32_t m_value; + }; + + class Register8 + { + public: +  Register8(const std::string& name): m_name(name) {} +  std::string name() { return m_name; } + + private: +  std::string m_name; + }; + + class Register32 + { + public: +  Register32(const std::string& name): m_name(name) {} +  std::string name() { return m_name; } + + private: +  std::string m_name; + }; + + class Register64 + { + public: +  Register64(const std::string& name): m_name(name) {} +  std::string name() { return m_name; } + + private: +  std::string m_name; + }; + + class Label + { + public: +  Label(const std::string& name): m_name(name) {} +  std::string name() { return m_name; } + + private: +  std::string m_name; + }; + +}; -using AsmArgs = std::vector<std::any>;  using FactoryFunction = std::function<std::shared_ptr<Op>(AsmArgs&)>;  // mnemonic: mnemonic including argument types @@ -44,49 +120,3 @@ std::string mangleName(const std::string& s)  std::string mangleName(const std::string& s, AsmArgs& args); -class Immediate8 -{ -public: - Immediate8(uint8_t value): m_value(value) {} - uint8_t value() {return m_value;} - std::vector<uint8_t> getCode() {return {m_value};}; - -private: - uint8_t m_value; -}; - -class Immediate32 -{ -public: - Immediate32(uint32_t value): m_value(value) {} - uint32_t value() { return m_value; } - std::vector<uint8_t> getCode() { -  std::vector<uint8_t> result(size_t(4)); -  *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value); -  return result; - }; - -private: - uint32_t m_value; -}; - -class Register32 -{ -public: - Register32(const std::string& name): m_name(name) {} - std::string name() { return m_name; } - -private: - std::string m_name; -}; - -class Register64 -{ -public: - Register64(const std::string& name): m_name(name) {} - std::string name() { return m_name; } - -private: - std::string m_name; -}; - diff --git a/asm/chunk.h b/asm/chunk.h index cc45ea9..cf6efaa 100644 --- a/asm/chunk.h +++ b/asm/chunk.h @@ -6,6 +6,9 @@  #include <string>  #include <vector> +// TODO: use it everywhere! +using OP_T = std::vector<uint8_t>; +  class Chunk  {  public: @@ -14,6 +17,20 @@ public:   virtual size_t size() = 0; ///< returns size in bytes  }; +// can be added via multiple inheritance to chunks with addresses +struct AddressFeature +{ + std::string label; + + std::vector<uint8_t> machine_code; + size_t addr_size; + size_t addr_offs; ///< offset inside code + + std::vector<uint8_t> alternative_code; + size_t alternative_size; + size_t alternative_offs; ///< offset inside code +}; +  class Label: public Chunk  {  public: diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp index dc5c704..2de2219 100644 --- a/asm/intel64/add.cpp +++ b/asm/intel64/add.cpp @@ -9,10 +9,16 @@ using namespace std::string_literals;  Op_add::Op_add(AsmArgs& args)  { - if (args[0].type() == typeid(Register32) && std::any_cast<Register32>(args[0]).name() == "eax" && args[1].type() == typeid(Immediate32)) { // add eax, imm32 -  machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<Immediate32>(args[1]).getCode(); - } else if (args[0].type() == typeid(Register64) && std::any_cast<Register64>(args[0]).name() == "rax"  && args[1].type() == typeid(Immediate32)) { // add rax, imm32 -  machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<Immediate32>(args[1]).getCode(); + if (args[0].type() == typeid(AsmArgs::Register32) && +     std::any_cast<AsmArgs::Register32>(args[0]).name() == "eax" && +     args[1].type() == typeid(AsmArgs::Immediate32)) + { // add eax, imm32 +  machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<AsmArgs::Immediate32>(args[1]).getCode(); + } else if (args[0].type() == typeid(AsmArgs::Register64) && +            std::any_cast<AsmArgs::Register64>(args[0]).name() == "rax"  && +            args[1].type() == typeid(AsmArgs::Immediate32)) + { // add rax, imm32 +  machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<AsmArgs::Immediate32>(args[1]).getCode();   } else {    throw std::runtime_error("Unimplemented: add "s + args[0].type().name() + " "s + args[1].type().name());   } @@ -20,12 +26,13 @@ Op_add::Op_add(AsmArgs& args)  namespace { -bool registered0 { registerOp(mangleName<Register32, Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{ +bool registered { + registerOp(mangleName<AsmArgs::Register32, AsmArgs::Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_add>(args); -                             }) }; -// TODO -bool registered1 { registerOp(mangleName<Register64, Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{ +                             }) && + registerOp(mangleName<AsmArgs::Register64, AsmArgs::Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_add>(args); -                             }) }; +                             }) +};  } diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp index a1d9e87..66a08dd 100644 --- a/asm/intel64/codes.cpp +++ b/asm/intel64/codes.cpp @@ -1,7 +1,12 @@  #include "codes.h" +#include <exception> +#include <unordered_map> + +using namespace std::string_literals; +  // REX prefix: 0b0100WRXB -std::vector<uint8_t> REX(std::string s) { +std::vector<uint8_t> REX(const std::string& s) {   uint8_t result{0b01000000};   if (s == "W")    result |= 0b00001000; @@ -15,3 +20,59 @@ std::vector<uint8_t> REX(std::string s) {   return { result };   } +namespace { + + std::unordered_map<std::string, size_t> IndexOfRegister{ +  {"al", 0}, {"ah", 4}, +  {"bl", 3}, {"bh", 7}, +  {"cl", 1}, {"ch", 5}, +  {"dl", 2}, {"dh", 6}, + +  {"ax", 0}, {"sp", 4}, +  {"bx", 3}, {"bp", 7}, +  {"cx", 1}, {"si", 5}, +  {"dx", 2}, {"di", 6}, + +  {"eax", 0}, {"esp", 4}, +  {"ebx", 3}, {"ebp", 7}, +  {"ecx", 1}, {"esi", 5}, +  {"edx", 2}, {"edi", 6}, + }; + +} + +// Manual, page 530 +// Reg + Reg/Memory +uint8_t ModRM(const std::string& reg, const std::string& rm) { + // TODO: extend + uint8_t result{0b11000000}; + + auto index1{ IndexOfRegister.find(reg) }; + if (index1 == IndexOfRegister.end()) +  throw std::runtime_error("Unknown register for arg1: "s + reg); + + result |= (index1->second << 3); + + auto index2{ IndexOfRegister.find(rm) }; + if (index2 == IndexOfRegister.end()) +  throw std::runtime_error("Unknown register for arg2: "s + rm); + + result |= index2->second; + + return result; +} + +#if 0 + prefixes{ +  "lock", 0xf0, + +  // branch hint +  0x2e, "branch not taken" +  0x3e, "branch taken" + +  0x66, "operand size override" // switch between 16 and 32 bit operands +  0x67, "address size override" // switch between 16 and 32 bit addresses + }; + }; +#endif + diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h index 32eff1c..0ff17f1 100644 --- a/asm/intel64/codes.h +++ b/asm/intel64/codes.h @@ -5,4 +5,8 @@  #include <vector>  // REX prefix: 0b0100WRXB -std::vector<uint8_t> REX(std::string s); +std::vector<uint8_t> REX(const std::string& s); + +// Manual, page 530 +// Reg + Reg/Memory +uint8_t ModRM(const std::string& reg, const std::string& rm); diff --git a/asm/intel64/int.cpp b/asm/intel64/int.cpp index 7b682ab..a7df338 100644 --- a/asm/intel64/int.cpp +++ b/asm/intel64/int.cpp @@ -6,7 +6,7 @@ Op_int::Op_int(AsmArgs& args)  {   // At this point, the registration already ensured the number and types of args - Immediate8 i {std::any_cast<Immediate8>(args[0])}; + AsmArgs::Immediate8 i {std::any_cast<AsmArgs::Immediate8>(args[0])};   if (i.value() == 0) { // INT 0    machine_code = { 0xCE }; @@ -21,7 +21,7 @@ Op_int::Op_int(AsmArgs& args)  namespace { -bool registered { registerOp(mangleName<Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{ +bool registered { registerOp(mangleName<AsmArgs::Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_int>(args);                               }) }; diff --git a/asm/intel64/jmp.cpp b/asm/intel64/jmp.cpp new file mode 100644 index 0000000..30ae546 --- /dev/null +++ b/asm/intel64/jmp.cpp @@ -0,0 +1,103 @@ +#include "jmp.h" + +#include "codes.h" + +#include <asm/assembler.h> +#include <asm/operators.h> + +#include <asm/intel64/codes.h> + +using namespace std::string_literals; + +namespace { + struct Jump { +  std::string name; +  OP_T jmp8;  ///< if empty, not available +  OP_T jmp32; ///< if empty, not available + }; + + std::vector<Jump> jumpOps { +  // Call Procedure +  {"call",  OP_T{},       OP_T{ 0xE8 }      }, // no addr8 version + +  // Unconditional Jump +  {"jmp",   OP_T{ 0xEB }, OP_T{ 0xE9 }      }, + +  // Conditional Jumps +  {"ja",    OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 }}, +  {"jae",   OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }}, +  {"jb",    OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }}, +  {"jbe",   OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 }}, +  {"jc",    OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }}, +  {"jecxz", OP_T{ 0xE3 }, OP_T{}            }, // no addr32 version +  {"jrcxz", OP_T{ 0xE3 }, OP_T{}            }, // no addr32 version +  {"je",    OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 }}, +  {"jg",    OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F }}, +  {"jge",   OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D }}, +  {"jl",    OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C }}, +  {"jle",   OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E }}, +  {"jna",   OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 }}, +  {"jnae",  OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }}, +  {"jnb",   OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }}, +  {"jnbe",  OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 }}, +  {"jnc",   OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }}, +  {"jne",   OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 }}, +  {"jng",   OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E }}, +  {"jnge",  OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C }}, +  {"jnl",   OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D }}, +  {"jnle",  OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F }}, +  {"jno",   OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 }}, +  {"jnp",   OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B }}, +  {"jns",   OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 }}, +  {"jnz",   OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 }}, +  {"jo",    OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 }}, +  {"jp",    OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A }}, +  {"jpe",   OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A }}, +  {"jpo",   OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B }}, +  {"js",    OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 }}, +  {"jz",    OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 }}, + }; + + bool registerOps() { +  bool result{true}; +  for (const auto& jumpOp: jumpOps) { +   result &= registerOp(mangleName<AsmArgs::Label>(jumpOp.name), [&](AsmArgs& args) -> std::shared_ptr<Op>{ +                        return std::make_shared<Op_jmp>(jumpOp.name, args, jumpOp.jmp8, jumpOp.jmp32); +                       }); +  } +  return result; + } + + bool registered { +  registerOps() + }; +} + +Op_jmp::Op_jmp(const std::string& name, AsmArgs& args, const OP_T& jmp8, const OP_T& jmp32) +{ + label = std::any_cast<AsmArgs::Label>(args[0]).name(); + + if (!jmp32.empty()) { // set machine_code +  machine_code = jmp32 + OP_T{size_t(4), uint8_t(0)}; +  addr_size = 4; +  addr_offs = jmp32.size(); +  if (!jmp8.empty()) { // also provide alternative +   alternative_code = jmp8 + OP_T{size_t(1), uint8_t(0)}; +   alternative_size = 1; +   alternative_offs = jmp8.size(); +  } + } + + if (machine_code.empty() && !jmp8.empty()) { +  machine_code = jmp8 + OP_T{size_t(1), uint8_t(0)}; +  addr_size = 1; +  addr_offs = jmp8.size(); + } + + if (machine_code.empty()) { +  throw std::runtime_error("Unimplemented: "s + name); + } + + // actual address not set, yet! +} + diff --git a/asm/intel64/jmp.h b/asm/intel64/jmp.h new file mode 100644 index 0000000..db8a5a8 --- /dev/null +++ b/asm/intel64/jmp.h @@ -0,0 +1,31 @@ +// jmp +// call +// ja +// ... + +#pragma once + +#include <asm/assembler.h> + +class Op_jmp: public Op, public AddressFeature +{ +public: + Op_jmp(const std::string& name, AsmArgs& args, const OP_T& jmp8, const OP_T& jmp32); + + std::vector<uint8_t> getCode() override + { +  return machine_code; + } + + size_t size() override + { +  return machine_code.size(); + } + + bool optimize() override ///< returns true if changed + { +  return false; + } + +}; + diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp new file mode 100644 index 0000000..33589e9 --- /dev/null +++ b/asm/intel64/mov.cpp @@ -0,0 +1,31 @@ +#include "mov.h" + +#include "codes.h" + +#include <asm/assembler.h> +#include <asm/operators.h> + +#include <asm/intel64/codes.h> + +using namespace std::string_literals; + +Op_mov::Op_mov(AsmArgs& args) +{ + if (args[0].type() == typeid(AsmArgs::Register8) && args[1].type() == typeid(AsmArgs::Register8)) { // mov reg8, reg8 +  // r/m8, r8: ModRM:r/m (w), ModRM:reg (r) +  machine_code = std::vector<uint8_t>{ 0x88 } + +   ModRM(std::any_cast<AsmArgs::Register8>(args[1]).name(), std::any_cast<AsmArgs::Register8>(args[0]).name()); + } else { +  throw std::runtime_error("Unimplemented: mov "s + args[0].type().name() + " "s + args[1].type().name()); + } +} + +namespace { + +bool registered { + registerOp(mangleName<AsmArgs::Register8, AsmArgs::Register8>("mov"), [](AsmArgs& args) -> std::shared_ptr<Op>{ +               return std::make_shared<Op_mov>(args); +            }) +}; + +} diff --git a/asm/intel64/mov.h b/asm/intel64/mov.h new file mode 100644 index 0000000..e1b2304 --- /dev/null +++ b/asm/intel64/mov.h @@ -0,0 +1,31 @@ +// Memory Move + +#pragma once + +#include <asm/assembler.h> + +class Op_mov: public Op +{ +public: + Op_mov(AsmArgs& args); + +public: + std::vector<uint8_t> getCode() override + { +  return machine_code; + } + + size_t size() override + { +  return machine_code.size(); + } + + bool optimize() override ///< returns true if changed + { +  return false; + } + +protected: + std::vector<uint8_t> machine_code; +}; + diff --git a/asm/intel64/xor.cpp b/asm/intel64/xor.cpp new file mode 100644 index 0000000..c0dbb68 --- /dev/null +++ b/asm/intel64/xor.cpp @@ -0,0 +1,31 @@ +#include "xor.h" + +#include "codes.h" + +#include <asm/assembler.h> +#include <asm/operators.h> + +#include <asm/intel64/codes.h> + +using namespace std::string_literals; + +Op_xor::Op_xor(AsmArgs& args) +{ + if (args[0].type() == typeid(AsmArgs::Register8) && args[1].type() == typeid(AsmArgs::Register8)) { // xor reg8, reg8 +  // r8, r/m8: ModRM:reg (w), ModRM:r/m (r) +  machine_code = std::vector<uint8_t>{ 0x32 } + +   ModRM(std::any_cast<AsmArgs::Register8>(args[0]).name(), std::any_cast<AsmArgs::Register8>(args[1]).name()); + } else { +  throw std::runtime_error("Unimplemented: xor "s + args[0].type().name() + " "s + args[1].type().name()); + } +} + +namespace { + +bool registered { + registerOp(mangleName<AsmArgs::Register8, AsmArgs::Register8>("xor"), [](AsmArgs& args) -> std::shared_ptr<Op>{ +               return std::make_shared<Op_xor>(args); +            }) +}; + +} diff --git a/asm/intel64/xor.h b/asm/intel64/xor.h new file mode 100644 index 0000000..f00a657 --- /dev/null +++ b/asm/intel64/xor.h @@ -0,0 +1,31 @@ +// XOR + +#pragma once + +#include <asm/assembler.h> + +class Op_xor: public Op +{ +public: + Op_xor(AsmArgs& args); + +public: + std::vector<uint8_t> getCode() override + { +  return machine_code; + } + + size_t size() override + { +  return machine_code.size(); + } + + bool optimize() override ///< returns true if changed + { +  return false; + } + +protected: + std::vector<uint8_t> machine_code; +}; + diff --git a/asm/segment.cpp b/asm/segment.cpp index 60b8348..9fb7a52 100644 --- a/asm/segment.cpp +++ b/asm/segment.cpp @@ -30,3 +30,12 @@ std::vector<uint8_t> Segment::getCode()   return result;  } + +void Segment::insertAddresses() +{ +} + +void Segment::optimize() +{ + // TODO +} diff --git a/asm/segment.h b/asm/segment.h index f0a758e..dfacd12 100644 --- a/asm/segment.h +++ b/asm/segment.h @@ -12,6 +12,7 @@ class Segment: public std::vector<std::shared_ptr<Chunk>>  public:   size_t getAddressOfLabel(const std::string& label);   std::vector<uint8_t> getCode(); + void insertAddresses(); + void optimize();  }; - diff --git a/intel.cpp b/intel.cpp deleted file mode 100644 index 65b9f3f..0000000 --- a/intel.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Intel assembly language - - -// segments: code, stack - -#include "minicc.h" - -#include <algorithm> -#include <any> -#include <array> -#include <deque> -#include <functional> -#include <stdexcept> -#include <functional> -#include <stdexcept> -#include <string> -#include <unordered_map> -#include <vector> - -using namespace std::string_literals; -using namespace std::placeholders; - -namespace { - - std::vector<uint8_t> imm8(std::string s) { -  long value{ std::stol(s) }; -  uint8_t* bin = reinterpret_cast<uint8_t*>(&value); -  return { uint8_t(*bin & 0xFF) }; - } - - std::vector<uint8_t> imm32(std::string s) { -  long value{ std::stol(s) }; -  uint32_t* bin = reinterpret_cast<uint32_t*>(&value); -  return {uint8_t(*bin & 0xFF), uint8_t(*bin >> 8 & 0xFF), uint8_t(*bin >> 16 & 0xFF), uint8_t(*bin >> 24 & 0xFF) }; - } - - std::unordered_map<std::string, size_t> IndexOfRegister{ -  {"al", 0}, {"ah", 4}, -  {"bl", 3}, {"bh", 7}, -  {"cl", 1}, {"ch", 5}, -  {"dl", 2}, {"dh", 6}, - -  {"ax", 0}, {"sp", 4}, -  {"bx", 3}, {"bp", 7}, -  {"cx", 1}, {"si", 5}, -  {"dx", 2}, {"di", 6}, - -  {"eax", 0}, {"esp", 4}, -  {"ebx", 3}, {"ebp", 7}, -  {"ecx", 1}, {"esi", 5}, -  {"edx", 2}, {"edi", 6}, - }; - - // Manual, page 530 - // Reg + Reg/Memory - uint8_t ModRM(std::string reg, std::string rm) { -  // TODO: extend -  uint8_t result{0b11000000}; - -  auto index1{ IndexOfRegister.find(reg) }; -  if (index1 == IndexOfRegister.end()) -   throw std::runtime_error("Unknown register for arg1: "s + reg); - -  result |= (index1->second << 3); - -  auto index2{ IndexOfRegister.find(rm) }; -  if (index2 == IndexOfRegister.end()) -   throw std::runtime_error("Unknown register for arg2: "s + rm); - -  result |= index2->second; - -  return result; - } - - enum class AddressType { -  Relative8, -  Relative16, -  Relative32, -  Absolute8, -  Absolute16, -  Absolute32, - }; - - struct Address - { -  AddressType type; -  size_t position; // relative to respective machine code, e.g. byte 1 in jump -  std::string label; // where to jump to, as label - }; - - struct InstructionCode - { -  std::vector<uint8_t> machine_code; -  std::vector<Address> addresses; - }; - - // List of alternative codes - typedef std::deque<InstructionCode> InstructionCodeList; - - bool O1{ true }; // Optimization - - using OP_T = std::vector<uint8_t>; - - InstructionCodeList op_jmp(const std::vector<Token>& sl, std::vector<uint8_t> op_bytes_8, std::vector<uint8_t> op_bytes_32) - { -  if (sl.size() == 2) { // JMP rel8 / rel32 -   const std::string& label{ sl[1].value }; -   InstructionCodeList result; -   if (op_bytes_32.size() > 0) { -    op_bytes_32.resize(op_bytes_32.size() + 4, 0x00); -    result.push_back({ op_bytes_32, { {AddressType::Relative32, op_bytes_32.size() - 4, label} } } ); -   } -   if (op_bytes_8.size() > 0 && (O1 || op_bytes_32.size() == 0)) { -    op_bytes_8.push_back(0x00); -    result.push_back({ op_bytes_8, { {AddressType::Relative8, op_bytes_8.size() - 1, label} } }); -   } -   return result; -  } - -  // ... TODO -  throw std::runtime_error("Unknown command: "s + sl[0].value); - } - - std::unordered_map<std::string, std::function<InstructionCodeList(const std::vector<Token>&)>> ops_old{ - -  // Call Procedure -  {"call",  std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })}, - - -  // Unconditional Jump -  {"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })}, - -  // Conditional Jumps -  {"ja",     std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })}, -  {"jae",    std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, -  {"jb",     std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, -  {"jbe",    std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })}, -  {"jc",     std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, -  {"jecxz",  std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})}, -  {"jrcxz",  std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})}, -  {"je",     std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })}, -  {"jg",     std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })}, -  {"jge",    std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })}, -  {"jl",     std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })}, -  {"jle",    std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })}, -  {"jna",    std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })}, -  {"jnae",   std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, -  {"jnb",    std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, -  {"jnbe",   std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })}, -  {"jnc",    std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, -  {"jne",    std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })}, -  {"jng",    std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })}, -  {"jnge",   std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })}, -  {"jnl",    std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })}, -  {"jnle",   std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })}, -  {"jno",    std::bind(op_jmp, _1, OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 })}, -  {"jnp",    std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })}, -  {"jns",    std::bind(op_jmp, _1, OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 })}, -  {"jnz",    std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })}, -  {"jo",     std::bind(op_jmp, _1, OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 })}, -  {"jp",     std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })}, -  {"jpe",    std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })}, -  {"jpo",    std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })}, -  {"js",     std::bind(op_jmp, _1, OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 })}, -  {"jz",     std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })}, - -  // Memory Move -  { "mov", [](const std::vector<Token>& sl) -> InstructionCodeList { -   if (sl.size() == 3) { -    return { { std::vector<uint8_t>{ 0x88 } + ModRM(sl[2].value, sl[1].value), {} } };  // r/m8, r8: ModRM:r/m (w), ModRM:reg (r) -   } - -   // ... TODO -   throw std::runtime_error("Unknown command: "s + sl[0].value); -  }}, - -  { "xor", [](const std::vector<Token>& sl) -> InstructionCodeList { -   if (sl.size() == 3) { -    return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } };  // r8, r/m8: ModRM:reg (w), ModRM:r/m (r) -   } - -   // ... TODO -   throw std::runtime_error("Unknown command: "s + sl[0].value); -  }}, - - }; - -#if 0 - prefixes{ -  "lock", 0xf0, - -  // branch hint -  0x2e, "branch not taken" -  0x3e, "branch taken" - -  0x66, "operand size override" // switch between 16 and 32 bit operands -  0x67, "address size override" // switch between 16 and 32 bit addresses - }; - }; -#endif - -#ifdef ASM_PARSER - BNF GetBNF() { -  // TODO: -  return { -   { "assembler-unit", { -    {} -   }}, -   { "immediate-32", { -    {} -   }}, -   { "mnemonic", { -    {} -   }}, -   { "register", { -    {} -   }}, -   { "register-8", { -    {} -   }}, -   { "register-16", { -    {} -   }}, -   { "register-32", { -    {} -   }}, -   { "register-64", { -    {} -   }}, -  -  }; - }; -#endif -  - // Checks a 32 bit relative address if it's valid as 8 bit address - bool IsSmallAddress(const InstructionCode& insn) { -  if (insn.addresses.size() != 1) -   throw std::runtime_error("Bad number of addresses in insn"); - -  size_t i{insn.addresses[0].position}; - -  if (i > insn.machine_code.size() - 3) -   throw std::runtime_error("Bad Address index "s + std::to_string(i) + " in insn with "s + std::to_string(insn.machine_code.size()) + " bytes"s); - -  if (std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0x00) == 3 || -   std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0xFF) == 3) -   return true; - -  return false; - } - - -} // namespace - -class Assembler { - - std::unordered_map<std::string, size_t> labels; ///< labels with their positions in instruction list - - /// 1st Level: Instructions - /// 2nd Level: Alternatives - /// 3rd Level: Bytes of single instruction - std::vector<InstructionCodeList> insn_list; - - uint64_t addressFromInstructionIndex(size_t index) - { -  // TODO: cache this to prevent repetitive summing - -  if (index > insn_list.size()) -   throw std::runtime_error("Index "s + std::to_string(index) + " out of range ("s + std::to_string(insn_list.size()) + ")"s); - -  uint64_t sum{}; - -  for (size_t i = 0; i < index; i++) { -   if (insn_list[i].size() < 1) { -    throw std::runtime_error("Insufficient alternatives at index "s + std::to_string(i)); -   } - -   sum += static_cast<uint64_t>(insn_list[i][0].machine_code.size()); -  } - -  return sum; - } - - uint64_t addressFromLabel(std::string label) - { -  auto it{ labels.find(label) }; -  if (it == labels.end()) -   throw std::runtime_error("Label not found: "s + label); - -  return addressFromInstructionIndex(it->second); - } - - std::unordered_map<AddressType, std::function<void(std::vector<uint8_t>&, const Address&, uint64_t)>> addressInserters{ -  {AddressType::Relative8,  [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) -   { -    int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address; -    if (difference < -128 || difference > 127) -     throw std::runtime_error("Distance too big"); - -    int8_t diff8 = static_cast<int8_t>(difference); -    uint8_t diff_u8 = *reinterpret_cast<uint8_t*>(&diff8); - -    machine_code[target_address.position] = diff_u8; -   } -  }, -  {AddressType::Relative16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) { throw std::runtime_error("Relative16 Address not yet supported."); }}, -  {AddressType::Relative32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) -   { -    int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address; -    if (difference < -4294967296 || difference > 4294967295) -     throw std::runtime_error("Distance too big"); - -    int32_t diff32 = static_cast<int32_t>(difference); -    uint32_t diff_u32 = *reinterpret_cast<uint32_t*>(&diff32); - -    machine_code[target_address.position] = diff_u32 & 0xFF; // little endian -    machine_code[target_address.position + 1] = diff_u32 >> 8 & 0xFF; -    machine_code[target_address.position + 2] = diff_u32 >> 16 & 0xFF; -    machine_code[target_address.position + 3] = diff_u32 >> 24 & 0xFF; -   } -  }, -  {AddressType::Absolute8,  [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute8 Address not yet supported."); }}, -  {AddressType::Absolute16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute16 Address not yet supported."); }}, -  {AddressType::Absolute32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute32 Address not yet supported."); }}, - }; - - void produce_machine_code(std::vector<std::vector<Token>>& tl) - { -  for (const auto& t : tl) { -   // label: -   // label: mnemonic arg1, arg2, arg3 -   //        mnemonic arg1, arg2, arg3 - -   if (t.size() == 2 && t[0].type == "label" && t[1].type == ":") { // label -    if (labels.find(t[0].value) != labels.end()) -     throw std::runtime_error("Label already defined: "s + t[0].value); - -    labels[t[0].value] = insn_list.size(); -   } else if (t.size() >= 1 && t[0].type == "instruction") { // instruction -    std::string instruction{ t[0].value }; -    auto it = ops_old.find(instruction); -    if (it == ops_old.end()) -     throw std::runtime_error("Unknown instruction: "s + instruction); - -    InstructionCodeList codes = it->second(t); - -    if (codes.size() == 0) -     throw std::runtime_error("No instruction generated"); - -    insn_list.push_back(codes); - -   } else -    throw std::runtime_error("Syntax error"s); -  } - } - - void insert_addresses() - { -  for (size_t i = 0; i < insn_list.size(); i++) { -   InstructionCodeList& list{ insn_list[i] }; -   if (list.size() == 0) -    throw std::runtime_error("No instruction at index "s + std::to_string(i)); - -   InstructionCode& code{ list[0] }; - -   for (const auto& address : code.addresses) { -    addressInserters[address.type](code.machine_code, address, addressFromInstructionIndex(i)); -   } -  } - } - - void optimize() - { -  // reduce Jump sizes via alternatives if possible -  bool changed{}; -  do { -   changed = false; - -   for (size_t i = 0; i < insn_list.size(); i++) { -    InstructionCodeList& list{ insn_list[i] }; // Alternatives - -    // apply specific heuristics to optimization case -    if (list.size() == 2) { -     if (list[0].addresses.size() == 1 && list[1].addresses.size() == 1) { -      if (list[0].addresses[0].type == AddressType::Relative32 && list[1].addresses[0].type == AddressType::Relative8) { -       if (IsSmallAddress(list[0])) { -        list.pop_front(); -        break; // start over from start of program -       } -      } -     } -    } -   } - -   if (changed) -    insert_addresses(); // update - -  } while (changed); - } - - std::vector<uint8_t> collect_code() - { -  std::vector<uint8_t> result; - -  // collect generated machine instructions for result -  // Alternatives already resolved, if configured. Consider only 1st entry (no matter if optimized or not). -  for (size_t i = 0; i < insn_list.size(); i++) { -   InstructionCodeList& list{ insn_list[i] }; -   if (list.size() == 0) -    throw std::runtime_error("No instruction at index "s + std::to_string(i)); - -   InstructionCode& code{ list[0] }; - -   result.insert(result.end(), code.machine_code.begin(), code.machine_code.end()); -  } - -  return result; - } - -public: - Assembler() {} - - std::vector<uint8_t> assemble(std::vector<std::vector<Token>> tl) - { -  labels.clear(); -  insn_list.clear(); - -  produce_machine_code(tl); // 1st pass -  insert_addresses(); // 2nd pass -  if (O1) { -   optimize(); // 3rd pass -  } - -  return collect_code(); // 4th pass - } - -}; // class Assembler diff --git a/test-asm.cpp b/test-asm.cpp index d839683..256902c 100644 --- a/test-asm.cpp +++ b/test-asm.cpp @@ -39,7 +39,7 @@ protected:  TEST_F(AsmTest, Intel64_add) {   Segment segment; - AsmArgs args{{Register32("eax"), Immediate32(1)}}; + AsmArgs args{{AsmArgs::Register32("eax"), AsmArgs::Immediate32(1)}};   segment.push_back(makeOp("add", args));   ASSERT_EQ(segment.size(), 1); @@ -48,7 +48,7 @@ TEST_F(AsmTest, Intel64_add) {  TEST_F(AsmTest, Intel64_int_0) {   Segment segment; - AsmArgs args{{Immediate8(0)}}; + AsmArgs args{{AsmArgs::Immediate8(0)}};   segment.push_back(makeOp("int", args));   ASSERT_EQ(segment.size(), 1); @@ -57,7 +57,7 @@ TEST_F(AsmTest, Intel64_int_0) {  TEST_F(AsmTest, Intel64_int_1) {   Segment segment; - AsmArgs args{{Immediate8(1)}}; + AsmArgs args{{AsmArgs::Immediate8(1)}};   segment.push_back(makeOp("int", args));   ASSERT_EQ(segment.size(), 1); @@ -66,7 +66,7 @@ TEST_F(AsmTest, Intel64_int_1) {  TEST_F(AsmTest, Intel64_int_5) {   Segment segment; - AsmArgs args{{Immediate8(5)}}; + AsmArgs args{{AsmArgs::Immediate8(5)}};   segment.push_back(makeOp("int", args));   ASSERT_EQ(segment.size(), 1); @@ -93,7 +93,7 @@ TEST_F(AsmTest, Intel64_multiple) {   Segment segment;   segment.push_back(makeOp("nop")); - AsmArgs args{{Immediate8(5)}}; + AsmArgs args{{AsmArgs::Immediate8(5)}};   segment.push_back(makeOp("int", args));   segment.push_back(makeOp("ret"));   segment.push_back(makeLabel("data1")); | 
