diff options
| author | Roland Reichwein <mail@reichwein.it> | 2020-11-20 10:59:18 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2020-11-20 10:59:18 +0100 | 
| commit | 7250bbe5ae2d2ee6b0334bc462aab73f7d8dac0e (patch) | |
| tree | b910e778baa3e2434215abbb50e45e64aa92970c | |
| parent | 05faf77202d0f8762160f135e714961d2c456cca (diff) | |
Assembler bugfixes - tests run successfully now
| -rw-r--r-- | asm/assembler.cpp | 8 | ||||
| -rw-r--r-- | asm/assembler.h | 41 | ||||
| -rw-r--r-- | asm/intel64/add.cpp | 43 | ||||
| -rw-r--r-- | asm/intel64/encode.cpp | 8 | ||||
| -rw-r--r-- | asm/parse.cpp | 181 | ||||
| -rw-r--r-- | tests/test-asm.cpp | 20 | 
6 files changed, 252 insertions, 49 deletions
| diff --git a/asm/assembler.cpp b/asm/assembler.cpp index b555125..4eb37f0 100644 --- a/asm/assembler.cpp +++ b/asm/assembler.cpp @@ -2,6 +2,14 @@  using namespace std::string_literals; +Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64) +{ + if (imm64.value() < 0x100000000) +  m_value = static_cast<uint32_t>(imm64.value()); + else +  throw std::runtime_error("Immediate32: Constructed from too big Immediate64"); +} +  namespace {  std::unordered_map<std::string, FactoryFunction> ops; diff --git a/asm/assembler.h b/asm/assembler.h index ea23fbc..8cdaa31 100644 --- a/asm/assembler.h +++ b/asm/assembler.h @@ -28,18 +28,21 @@ public:   {   public:    Immediate8(uint8_t value): m_value(value) {} -  uint8_t value() {return m_value;} +  uint8_t value() const {return m_value;}    std::vector<uint8_t> getCode() {return {m_value};};   private:    uint8_t m_value;   }; + class Immediate64; +   class Immediate32   {   public:    Immediate32(uint32_t value): m_value(value) {} -  uint32_t value() { return m_value; } +  Immediate32(const Immediate64&); ///< Convert from Immediate64 if data is small enough +  uint32_t value() const { return m_value; }    std::vector<uint8_t> getCode() {     std::vector<uint8_t> result(size_t(4));     *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value); @@ -54,7 +57,7 @@ public:   {   public:    Immediate64(uint64_t value): m_value(value) {} -  uint64_t value() { return m_value; } +  uint64_t value() const { return m_value; }    std::vector<uint8_t> getCode() {     std::vector<uint8_t> result(size_t(8));     *(reinterpret_cast<uint64_t*>(result.data())) = boost::endian::native_to_little(m_value); @@ -69,7 +72,7 @@ public:   {   public:    Register8(const std::string& name): m_name(name) {} -  std::string name() { return m_name; } +  std::string name() const { return m_name; }   private:    std::string m_name; @@ -79,7 +82,7 @@ public:   {   public:    Register32(const std::string& name): m_name(name) {} -  std::string name() { return m_name; } +  std::string name() const { return m_name; }   private:    std::string m_name; @@ -89,7 +92,7 @@ public:   {   public:    Register64(const std::string& name): m_name(name) {} -  std::string name() { return m_name; } +  std::string name() const { return m_name; }   private:    std::string m_name; @@ -100,10 +103,10 @@ public:   {   public:    Mem8Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} -  Mem8Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -  std::string reg() { return m_reg; } -  std::string reg2() { return m_reg2; } -  int32_t offs() { return m_offs; } +  Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} +  std::string reg() const { return m_reg; } +  std::string reg2() const { return m_reg2; } +  int32_t offs() const { return m_offs; }   private:    std::string m_reg; @@ -116,10 +119,10 @@ public:   {   public:    Mem32Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} -  Mem32Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -  std::string reg() { return m_reg; } -  std::string reg2() { return m_reg2; } -  int32_t offs() { return m_offs; } +  Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} +  std::string reg() const { return m_reg; } +  std::string reg2() const { return m_reg2; } +  int32_t offs() const { return m_offs; }   private:    std::string m_reg; @@ -132,10 +135,10 @@ public:   {   public:    Mem64Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} -  Mem64Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -  std::string reg() { return m_reg; } -  std::string reg2() { return m_reg2; } -  int32_t offs() { return m_offs; } +  Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} +  std::string reg() const { return m_reg; } +  std::string reg2() const { return m_reg2; } +  int32_t offs() const { return m_offs; }   private:    std::string m_reg; @@ -147,7 +150,7 @@ public:   {   public:    Label(const std::string& name): m_name(name) {} -  std::string name() { return m_name; } +  std::string name() const { return m_name; }   private:    std::string m_name; diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp index 957c27f..07b14a1 100644 --- a/asm/intel64/add.cpp +++ b/asm/intel64/add.cpp @@ -28,12 +28,34 @@ Op_add::Op_add(const Asm::Args& args)   { // add rax, imm32    machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode(); + } else if (args[0].type() == typeid(Asm::Args::Register8) && args[1].type() == typeid(Asm::Args::Register8)) { // add reg8, reg8 +  machine_code = std::vector<uint8_t>{ 0x00 } + ModRM(std::any_cast<Asm::Args::Register8>(args[1]).name(), std::any_cast<Asm::Args::Register8>(args[0]).name()); + + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Register32)) { // add reg32, reg32 +  machine_code = std::vector<uint8_t>{ 0x01 } + ModRM(std::any_cast<Asm::Args::Register32>(args[1]).name(), std::any_cast<Asm::Args::Register32>(args[0]).name()); + + } else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Register64)) { // add reg64, reg64 +  machine_code = REX("W") + std::vector<uint8_t>{ 0x01 } + ModRM(std::any_cast<Asm::Args::Register64>(args[1]).name(), std::any_cast<Asm::Args::Register64>(args[0]).name()); +   } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Mem32Ptr64)) { // add reg32, [reg64]    machine_code = std::vector<uint8_t>{ 0x03 } + ModRM(std::any_cast<Asm::Args::Register32>(args[0]).name(), std::any_cast<Asm::Args::Mem32Ptr64>(args[1]).reg());   } else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Mem64Ptr64)) { // add reg64, [reg64]    machine_code = REX("W") + std::vector<uint8_t>{ 0x03 } + ModRM(std::any_cast<Asm::Args::Register64>(args[0]).name(), std::any_cast<Asm::Args::Mem64Ptr64>(args[1]).reg()); + } else if (args[0].type() == typeid(Asm::Args::Mem8Ptr64) && args[1].type() == typeid(Asm::Args::Immediate8)) { // add [reg64], imm8 +  machine_code = std::vector<uint8_t>{ 0x80 } + ModRM("/0", std::any_cast<Asm::Args::Mem8Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate8>(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem32Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add [reg64], imm32 +  machine_code = std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem32Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add qword ptr [reg64], imm32 (sign-extended) +  machine_code = REX("W") + std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem64Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate64)) { // add qword ptr [reg64], imm32 (sign-extended) - reduce imm64 to imm32! +  Asm::Args::Immediate32 imm32{std::any_cast<Asm::Args::Immediate64>(args[1])}; +  machine_code = REX("W") + std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem64Ptr64>(args[0]).reg()) + imm32.getCode(); +   } else {    throw std::runtime_error("Unimplemented: add "s + args[0].type().name() + " "s + args[1].type().name());   } @@ -48,11 +70,32 @@ bool registered {   registerOp(mangleName<Asm::Args::Register64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_add>(args);                               }) && + registerOp(mangleName<Asm::Args::Register8, Asm::Args::Register8>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Register32, Asm::Args::Register32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Register64, Asm::Args::Register64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) &&   registerOp(mangleName<Asm::Args::Register32, Asm::Args::Mem32Ptr64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_add>(args);                               }) &&   registerOp(mangleName<Asm::Args::Register64, Asm::Args::Mem64Ptr64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{                               return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Mem8Ptr64, Asm::Args::Immediate8>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Mem32Ptr64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Mem64Ptr64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ +                             return std::make_shared<Op_add>(args); +                             }) && + registerOp(mangleName<Asm::Args::Mem64Ptr64, Asm::Args::Immediate64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ // automatically converted to 32-bit (sign extended) if small enough. Intel doesn't support ADD ..., imm64 +                             return std::make_shared<Op_add>(args);                               })  }; diff --git a/asm/intel64/encode.cpp b/asm/intel64/encode.cpp index 51ca7a0..1b35d89 100644 --- a/asm/intel64/encode.cpp +++ b/asm/intel64/encode.cpp @@ -14,7 +14,7 @@ namespace {  std::shared_ptr<Op> makeLoadValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)  {   if (data.type() != FlowGraph::DataType::Int) { -  std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); +  throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));   }   if (!data.storage()) @@ -41,7 +41,7 @@ std::shared_ptr<Op> makeLoadValue(const FlowGraph::Data& data, const FlowGraph::  std::shared_ptr<Op> makeStoreValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)  {   if (data.type() != FlowGraph::DataType::Int) { -  std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); +  throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));   }   if (!data.storage()) @@ -60,7 +60,7 @@ std::shared_ptr<Op> makeStoreValue(const FlowGraph::Data& data, const FlowGraph:  std::shared_ptr<Op> makeAddValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)  {   if (data.type() != FlowGraph::DataType::Int) { -  std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); +  throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));   }   if (!data.storage()) @@ -87,7 +87,7 @@ std::shared_ptr<Op> makeAddValue(const FlowGraph::Data& data, const FlowGraph::G  std::vector<std::shared_ptr<Chunk>> makeMulValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)  {   if (data.type() != FlowGraph::DataType::Int) { -  std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); +  throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));   }   if (!data.storage()) diff --git a/asm/parse.cpp b/asm/parse.cpp index e62f585..8f6f831 100644 --- a/asm/parse.cpp +++ b/asm/parse.cpp @@ -5,6 +5,7 @@  #include <boost/algorithm/string.hpp>  #include <exception> +#include <functional>  #include <regex>  #include <unordered_set> @@ -43,11 +44,13 @@ namespace {    }   } + std::string reg_re{"[[:alpha:]][[:alnum:]]*"}; +   // parse optional label   bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) {    parseWhitespace(asm_code, pos); -  std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript); +  std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript);    std::smatch match;    if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) { @@ -64,7 +67,7 @@ namespace {   bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) {    parseWhitespace(asm_code, pos); -  std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript); +  std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript);    std::smatch match;    if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) { @@ -77,10 +80,10 @@ namespace {    return false;   } - bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {    parseWhitespace(asm_code, pos); -  std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); +  std::regex re_name(reg_re, std::regex_constants::ECMAScript);    std::smatch match;    if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -88,6 +91,7 @@ namespace {     if (reg8.contains(name)) {      pos += name.size();      result = Asm::Args::Register8(name); +    size_hint = 8;      return true;     }    } @@ -95,10 +99,10 @@ namespace {    return false;   } - bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {    parseWhitespace(asm_code, pos); -  std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); +  std::regex re_name(reg_re, std::regex_constants::ECMAScript);    std::smatch match;    if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -106,6 +110,7 @@ namespace {     if (reg32.contains(name)) {      pos += name.size();      result = Asm::Args::Register32(name); +    size_hint = 32;      return true;     }    } @@ -113,10 +118,10 @@ namespace {    return false;   } - bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {    parseWhitespace(asm_code, pos); -  std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); +  std::regex re_name(reg_re, std::regex_constants::ECMAScript);    std::smatch match;    if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -124,6 +129,64 @@ namespace {     if (reg64.contains(name)) {      pos += name.size();      result = Asm::Args::Register64(name); +    size_hint = 64; +    return true; +   } +  } + +  return false; + } + + bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[1].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem8Ptr64{name}; +    size_hint = 8; +    return true; +   } +  } + +  return false; + } + + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[2].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem32Ptr64(name); +    size_hint = 32; +    return true; +   } +  } + +  return false; + } + + bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[1].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem64Ptr64(name); +    size_hint = 64;      return true;     }    } @@ -131,7 +194,37 @@ namespace {    return false;   } - bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 8) +   return false; + +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   int32_t value{}; +   try { +    value = stoll(match[0]); +   } catch (...) { +    throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); +   } +   if (value < -128 || value > 255) +    throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str()); + +   pos += match[0].length(); +   result = Asm::Args::Immediate8(static_cast<uint8_t>(value)); +   return true; +  } + +  return false; + } + + bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 32 && size_hint != 0) +   return false; +      parseWhitespace(asm_code, pos);    std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); @@ -152,18 +245,54 @@ namespace {    return false;   } + bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 64) +   return false; + +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   int64_t value{}; +   try { +    value = stoll(match[0]); +   } catch (...) { +    throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); +   } + +   pos += match[0].length(); +   result = Asm::Args::Immediate64(static_cast<uint64_t>(value)); +   return true; +  } + +  return false; + } +   // parse optional single operand - bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {    parseWhitespace(asm_code, pos); -  if (parseRegister8(asm_code, pos, result)) +  if (parseRegister8(asm_code, pos, result, size_hint)) +   return true; +  if (parseRegister32(asm_code, pos, result, size_hint)) +   return true; +  if (parseRegister64(asm_code, pos, result, size_hint))     return true; -  if (parseRegister32(asm_code, pos, result)) +   +  if (parseMem8Ptr64(asm_code, pos, result, size_hint)) +   return true; +  if (parseMem32Ptr64(asm_code, pos, result, size_hint))     return true; -  if (parseRegister64(asm_code, pos, result)) +  if (parseMem64Ptr64(asm_code, pos, result, size_hint))     return true; -  if (parseImmediate32(asm_code, pos, result)) +  if (parseImmediate8(asm_code, pos, result, size_hint)) +   return true; +  if (parseImmediate32(asm_code, pos, result, size_hint)) +   return true; +  if (parseImmediate64(asm_code, pos, result, size_hint))     return true;    return false; @@ -172,12 +301,13 @@ namespace {   // parse optional multiple operands, separated by commas   void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) {    std::any operand; -  if (parseOperand(asm_code, pos, operand)) { +  size_t size_hint{0}; // in bits, 0=no hint +  if (parseOperand(asm_code, pos, operand, size_hint)) {     result.push_back(operand);     parseWhitespace(asm_code, pos);     while (pos < asm_code.size() && asm_code[pos] == ',') {      pos++; -    if (parseOperand(asm_code, pos, operand)) { +    if (parseOperand(asm_code, pos, operand, size_hint)) {       result.push_back(operand);      } else {       throw std::runtime_error("Assembler error: expected operand after comma"); @@ -223,20 +353,27 @@ namespace {    // all optional:    // label: mnemonic operands... ;comment <eol> -  std::string result_string; -  if (parseLabel(asm_code, pos, result_string)) -   result.emplace_back(std::make_shared<Label>(result_string)); +  std::string label; +  std::function<void()> label_fn {[](){}}; +  if (parseLabel(asm_code, pos, label)) +   label_fn = [&]() { result.emplace_back(std::make_shared<Label>(label)); }; // defer to successfully completed line -  if (parseMnemonic(asm_code, pos, result_string)) { -   Asm::Args args; +  std::string mnemonic; +  Asm::Args args; +  std::function<void()> mnemonic_fn {[](){}}; +  if (parseMnemonic(asm_code, pos, mnemonic)) {     parseOperands(asm_code, pos, args); -   result.emplace_back(makeOp(result_string, args)); +   mnemonic_fn = [&]() { result.emplace_back(makeOp(mnemonic, args)); }; // defer to successfully completed line    }    parseComment(asm_code, pos);    if (!parseEol(asm_code, pos))     throw std::runtime_error("Assembler error at pos "s + std::to_string(pos)); + +  // Append only if no error occured, to get the correct error +  label_fn(); +  mnemonic_fn();   }  } // namespace diff --git a/tests/test-asm.cpp b/tests/test-asm.cpp index f4a1a2c..019f89c 100644 --- a/tests/test-asm.cpp +++ b/tests/test-asm.cpp @@ -178,22 +178,34 @@ TEST_F(AsmParseTest, parse_op_3) {  }  TEST_F(AsmParseTest, parse_op_4) { - std::vector<std::shared_ptr<Chunk>> chunks4{parseAsm("add [edi], 3")}; + std::vector<std::shared_ptr<Chunk>> chunks4{parseAsm("add [rdi], 3")};   ASSERT_EQ(chunks4.size(), 1);  } +TEST_F(AsmParseTest, parse_op_4_error) { + ASSERT_THROW(parseAsm("add [edi], 3"), std::runtime_error); +} +  TEST_F(AsmParseTest, parse_op_5) { - std::vector<std::shared_ptr<Chunk>> chunks5{parseAsm("add byte ptr [edi], 3")}; + std::vector<std::shared_ptr<Chunk>> chunks5{parseAsm("add byte ptr [rdi], 3")};   ASSERT_EQ(chunks5.size(), 1);  } +TEST_F(AsmParseTest, parse_op_5_error) { + ASSERT_THROW(parseAsm("add byte ptr [edi], 3"), std::runtime_error); +} +  TEST_F(AsmParseTest, parse_op_6) { - std::vector<std::shared_ptr<Chunk>> chunks6{parseAsm("add dword ptr[edi], 3")}; + std::vector<std::shared_ptr<Chunk>> chunks6{parseAsm("add dword ptr[rdi], 3")};   ASSERT_EQ(chunks6.size(), 1);  } +TEST_F(AsmParseTest, parse_op_6_error) { + ASSERT_THROW(parseAsm("add dword ptr[al], 3"), std::runtime_error); +} +  TEST_F(AsmParseTest, parse_op_7) { - std::vector<std::shared_ptr<Chunk>> chunks7{parseAsm("add qword ptr[edi], 3")}; + std::vector<std::shared_ptr<Chunk>> chunks7{parseAsm("add qword ptr[rdi], 3")};   ASSERT_EQ(chunks7.size(), 1);  } | 
