From 6609c946f0230cd1655dc5caca112579fb991697 Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 4 Jan 2022 15:20:16 -0500 Subject: [PATCH 1/4] Decode Infrastructure --- lib/Arch/AArch32/Decode.cpp | 8 +- lib/Arch/Thumb2/Decode.cpp | 517 ++++++++++++++++++++++++++++++++++++ 2 files changed, 521 insertions(+), 4 deletions(-) create mode 100644 lib/Arch/Thumb2/Decode.cpp diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp index ad2e786c2..2c974a1a6 100644 --- a/lib/Arch/AArch32/Decode.cpp +++ b/lib/Arch/AArch32/Decode.cpp @@ -3556,14 +3556,14 @@ static TryDecode *TryDataProcessingAndMisc(uint32_t bits) { // This is the top level of the instruction encoding schema for AArch32. // Instructions are grouped into subsets based on this the top level and then // into smaller sets. -// cond op0 op1 +// cond op0 op1 // != 1111 00x Data-processing and miscellaneous instructions // != 1111 010 Load/Store Word, Unsigned Byte (immediate, literal) // != 1111 011 0 Load/Store Word, Unsigned Byte (register) // != 1111 011 1 Media instructions -// 10x Branch, branch with link, and block data transfer -// 11x System register access, Advanced SIMD, floating-point, and Supervisor call -// 1111 0xx Unconditional instructions +// 10x Branch, branch with link, and block data transfer +// 11x System register access, Advanced SIMD, floating-point, and Supervisor call +// 1111 0xx Unconditional instructions static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) { const TopLevelEncodings enc = {bits}; diff --git a/lib/Arch/Thumb2/Decode.cpp b/lib/Arch/Thumb2/Decode.cpp new file mode 100644 index 000000000..04a8f534e --- /dev/null +++ b/lib/Arch/Thumb2/Decode.cpp @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2022 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "Arch.h" +#include "remill/BC/ABI.h" + +namespace remill { + +namespace { + +typedef bool(TryDecode)(Instruction &, uint32_t); +typedef bool(TryDecode16)(Instruction &, uint16_t); +typedef std::optional(InstEval)(uint32_t, uint32_t); + + +// Add, subtract (three low registers) +union AddSub3LowReg16 { + uint16_t flat; + struct { + uint16_t _000110 : 6; + uint16_t S : 1; + uint16_t Rm : 3; + uint16_t Rn : 3; + uint16_t Rd : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSub3LowReg16) == 2, " "); + +// Add, subtract (two low registers and immediate) +union AddSub2LowRegImm16 { + uint16_t flat; + struct { + uint16_t _000111 : 6; + uint16_t S : 1; + uint16_t imm3 : 3; + uint16_t Rn : 3; + uint16_t Rd : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSub2LowRegImm16) == 2, " "); + +// Add, subtract, compare, move (one low register and immediate) +union AddSubComp1LowRegImm16 { + uint16_t flat; + struct { + uint16_t _001 : 3; + uint16_t op : 2; + uint16_t Rd : 3; + uint16_t imm8 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSubComp1LowRegImm16) == 2, " "); + +// MOV, MOVS (register) — T2 +union MOVrT2_16 { + uint16_t flat; + struct { + uint16_t _000 : 3; + uint16_t op : 2; + uint16_t imm5 : 5; + uint16_t Rm : 3; + uint16_t Rd : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(MOVrT2_16) == 2, " "); + +// Load/store word/byte (immediate offset) +union LoadStoreWordByteImm16 { + uint16_t flat; + struct { + uint16_t _011 : 3; + uint16_t B : 1; + uint16_t L : 1; + uint16_t imm5 : 5; + uint16_t Rn : 3; + uint16_t Rt : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreWordByteImm16) == 2, " "); + +// Load/store (SP-relative) +union LoadStoreSPRelative16 { + uint16_t flat; + struct { + uint16_t _1001 : 4; + uint16_t L : 1; + uint16_t Rt : 3; + uint16_t imm8 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreSPRelative16) == 2, " "); + +// Add PC/SP (immediate) +union AddPCSPImm16 { + uint16_t flat; + struct { + uint16_t _1010 : 4; + uint16_t SP : 1; + uint16_t Rd : 3; + uint16_t imm8 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddPCSPImm16) == 2, " "); + +// Miscellaneous 16-bit instructions +union Misc16 { + uint16_t flat; + struct { + uint16_t _1011 : 4; + uint16_t op0 : 4; + uint16_t op1 : 2; + uint16_t op2 : 1; + uint16_t _b4 : 1; + uint16_t op3 : 4; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(Misc16) == 2, " "); + +// B — T1 +union B_T1_16 { + uint16_t flat; + struct { + uint16_t _1101 : 4; + uint16_t cond : 4; + uint16_t imm8 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(B_T1_16) == 2, " "); + +// B — T2 +union B_T2_16 { + uint16_t flat; + struct { + uint16_t _1101 : 4; + uint16_t cond : 4; + uint16_t imm8 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(B_T2_16) == 2, " "); + +// Shift (immediate), add, subtract, move, and compare +union ShiftImmAddSubMoveComp16 { + uint16_t flat; + struct { + uint16_t _00 : 2; + uint16_t op0 : 1; + uint16_t op1 : 2; + uint16_t op2 : 1; + uint16_t _9_to_0 : 10; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(ShiftImmAddSubMoveComp16) == 2, " "); + +// Load/Store Multiple +union LoadStoreMult32 { + uint32_t flat; + struct { + uint32_t _1110100 : 7; + uint32_t opc : 2; + uint32_t _0_b22 : 1; + uint32_t W : 1; + uint32_t L : 1; + uint32_t Rn : 4; + uint32_t P : 1; + uint32_t M : 1; + uint32_t _0_b13 : 1; + uint32_t register_list : 13; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreMult32) == 4, " "); + +// BL, BLX (immediate) — T1 +union BLT1_32 { + uint32_t flat; + struct { + uint32_t _11110 : 5; + uint32_t S : 1; + uint32_t imm10 : 10; + uint32_t _11 : 2; + uint32_t J1 : 1; + uint32_t _1 : 1; + uint32_t J2 : 1; + uint32_t imm11 : 11; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BLT1_32) == 4, " "); + +// BL, BLX (immediate) — T2 +union BLXT2_32 { + uint32_t flat; + struct { + uint32_t _11110 : 5; + uint32_t S : 1; + uint32_t imm10H : 10; + uint32_t _11 : 2; + uint32_t J1 : 1; + uint32_t _0 : 1; + uint32_t J2 : 1; + uint32_t imm10L : 10; + uint32_t H : 1; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BLXT2_32) == 4, " "); + +// Branches and miscellaneous control +union BranchesMiscControl32 { + uint32_t flat; + struct { + uint32_t _11110 : 5; + uint32_t op0 : 1; + uint32_t op1 : 4; + uint32_t op2 : 2; + uint32_t _19_to_16 : 4; + uint32_t _1 : 1; + uint32_t op3 : 3; + uint32_t _b11 : 1; + uint32_t op4 : 3; + uint32_t _7_to_6 : 2; + uint32_t op5 : 1; + uint32_t _4_to_0 : 5; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BranchesMiscControl32) == 4, " "); + +// 32-bit instructions +union Top32bit { + uint32_t flat; + struct { + uint32_t _111 : 3; + uint32_t op0 : 4; + uint32_t op1 : 5; + uint32_t _19_to_16 : 4; + uint32_t op3 : 1; + uint32_t _14_to_0 : 15; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(Top32bit) == 4, " "); + +// ------------- 16 Bit TryDecode ------------- + +// S +// 0 ADD, ADDS (register) +// 1 SUB, SUBS (register) +// Add, subtract (three low registers) TODO(sonya) +static bool TryDecode16AddSub3LowReg(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const AddSub3LowReg16 enc = {bits}; +} + +// S +// 0 ADD, ADDS (immediate) +// 1 SUB, SUBS (immediate) +// Add, subtract (two low registers and immediate) TODO(sonya) +static bool TryDecode16AddSub2LowRegImm(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const AddSub2LowRegImm16 enc = {bits}; + if (enc.S) { + inst.function = "SUB"; + } else { + inst.function = "ADD"; + } +} + +// op +// 00 MOV, MOVS (immediate) +// 01 CMP (immediate) +// 10 ADD, ADDS (immediate) +// 11 SUB, SUBS (immediate) +// Add, subtract, compare, move (one low register and immediate) TODO(sonya) +static bool TryDecode16AddSubComp1LowRegImm(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const AddSubComp1LowRegImm16 enc = {bits}; +} + +// MOV, MOVS (register) — T2 TODO(sonya) +static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const MOVrT2_16 enc = {bits}; +} + +// Load/store word/byte (immediate offset) TODO(sonya) +static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const LoadStoreWordByteImm16 enc = {bits}; +} + +// Load/store (SP-relative) TODO(sonya) +static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const LoadStoreSPRelative16 enc = {bits}; +} + +// Add PC/SP (immediate) TODO(sonya) +static bool TryDecode16AddPCSP(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const AddPCSPImm16 enc = {bits}; +} + +// CBNZ, CBZ TODO(sonya) +static bool TryDecode16CBZ(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; +} + +// ------------- End 16 Bit TryDecode ------------- + +// B — T1 TODO(sonya) +static bool TryDecode16B_T1(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const B_T1_16 enc = {bits}; +} + +// ------------- 32 Bit TryDecode ------------- + +// Load/Store Multiple TODO(sonya) +// this should become a template probably +// (see TryDecodeLoadStoreMultiple in aarch32. the semantics are identical) +static bool TryDecode32LoadStoreMult(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const LoadStoreMult32 enc = {bits}; +} + +// BL, BLX (immediate) — T1 TODO(sonya) +static bool TryDecode32BL(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const BLT1_32 enc = {bits}; +} + +// BL, BLX (immediate) — T2 TODO (sonya) +static bool TryDecode32BLX(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const BLXT2_32 enc = {bits}; +} + +// ------------- End 32 Bit TryDecode ------------- + +// op0 +// 00xxxx Shift (immediate), add, subtract, move, and compare +// 010000 Data-processing (two low registers) +// 010001 Special data instructions and branch and exchange +// 01001x LDR (literal) — T1 +// 0101xx Load/store (register offset) +// 011xxx Load/store word/byte (immediate offset) +// 1000xx Load/store halfword (immediate offset) +// 1001xx Load/store (SP-relative) +// 1010xx Add PC/SP (immediate) +// 1011xx Miscellaneous 16-bit instructions +// 1100xx Load/store multiple +// 1101xx Conditional branch, and Supervisor Call +static TryDecode16 *Try16bit(uint16_t bits) { + uint16_t op0 = bits >> 10; + + // The following constraints also apply to this encoding: op0<5:3> != 111 + if ((op0 >> 3) != 0b111) { + return nullptr; + } + + // 00xxxx Shift (immediate), add, subtract, move, and compare + if (!(op0 >> 4)) { + + // op0 op1 op2 + // 0 11 0 Add, subtract (three low registers) + // 0 11 1 Add, subtract (two low registers and immediate) + // 0 != 11 MOV, MOVS (register) — T2 + // 1 Add, subtract, compare, move (one low register and immediate) + const ShiftImmAddSubMoveComp16 enc = {bits}; + + if (enc.op0) { + return TryDecode16AddSubComp1LowRegImm; + + } else if (enc.op1 != 0b11) { + return TryDecode16MOVrT2; + + } else if (enc.op2) { + return TryDecode16AddSub2LowRegImm; + + } else { + return TryDecode16AddSub3LowReg; + + } + + // 011xxx Load/store word/byte (immediate offset) + } else if ((op0 >> 3) == 0b011) { + return TryDecode16LoadStoreWordByteImm; + + // 1001xx Load/store (SP-relative) + } else if ((op0 >> 2) == 0b1001) { + return TryDecode16LoadStoreSPRelative; + + // 1010xx Add PC/SP (immediate) + } else if ((op0 >> 2) == 0b1010) { + return TryDecode16AddPCSP; + + // 1011xx Miscellaneous 16-bit instructions + } else if ((op0 >> 2) == 0b1011) { + const Misc16 enc = {bits}; + + // op0 == x0x1 CBNZ, CBZ + if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { + return TryDecode16CBZ; + } + + return nullptr; + + // 1101xx Conditional branch, and Supervisor Call + } else if ((op0 >> 2) == 0b1101) { + uint16_t _op0 = (bits << 4) >> 9; + + // op0 + // 111x Exception generation + // != 111x B — T1 + if (_op0 == 0b111) { + return nullptr; + } else { + return TryDecode16B_T1; + } + } + + return nullptr; +} + +// TODO(sonya): B — T2 encoding +static bool TryB_T2(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const B_T2_16 enc = {bits}; +} + + +// op0 op1 op3 +// x11x System register access, Advanced SIMD, and floating-point +// 0100 xx0xx Load/store multiple +// 0100 xx1xx Load/store dual, load/store exclusive, load-acquire/store-release, and table branch +// 0101 Data-processing (shifted register) +// 10xx 1 Branches and miscellaneous control +// 10x0 0 Data-processing (modified immediate) +// 10x1 0 Data-processing (plain binary immediate) +// 1100 1xxx0 Advanced SIMD element or structure load/store +// 1100 != 1xxx0 Load/store single +// 1101 0xxxx Data-processing (register) +// 1101 10xxx Multiply, multiply accumulate, and absolute difference +// 1101 11xxx Long multiply and divide +static TryDecode *Try32bit(uint32_t bits) { + const Top32bit enc = {bits}; + + // op0 == 0100, op1 == xx0xx, Load/store multiple + if ((enc.op0 == 0b0100) && !(enc.op1 & 0b00100)) { + return TryDecode32LoadStoreMult; + + // op0 == 10xx, op3 == 1, Branches and miscellaneous control + } else if (((enc.op0 >> 2) == 0b10) && enc.op3){ + const BranchesMiscControl32 enc = {bits}; + + if (enc.op3 >> 2) { // op3 == 1xx + if (enc.op3 & 0b001) { // op3 == 1x1 + return TryDecode32BL; + + } else { // // op3 == 1x0 + return TryDecode32BLX; + } + } + + } + return nullptr; +} + +// op0 op1 +// != 111 16-bit +// 111 00 B — T2 +// 111 != 00 32-bit +static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) { + // 16-bit instructions + if (bits >> 13 != 0b111) { + return Try16bit(uint16_t(bits >> 16)); + + // B — T2 + } else if (!((bits << 3) >> 11)) { + return TryB_T2; + + // 32-bit instructions + } else { + return Try32bit(bits); + } +} + +} // namespace +} // namespace remill + + From 3f7566a2a5383930222f2485a07a62fc25316c1d Mon Sep 17 00:00:00 2001 From: sschriner Date: Fri, 7 Jan 2022 15:28:25 -0500 Subject: [PATCH 2/4] Thumb2Decode: Some small updates --- lib/Arch/Thumb2/Decode.cpp | 150 +++++++++++++++++++++++++++---------- 1 file changed, 110 insertions(+), 40 deletions(-) diff --git a/lib/Arch/Thumb2/Decode.cpp b/lib/Arch/Thumb2/Decode.cpp index 04a8f534e..589a615c7 100644 --- a/lib/Arch/Thumb2/Decode.cpp +++ b/lib/Arch/Thumb2/Decode.cpp @@ -253,7 +253,7 @@ union Top32bit { } __attribute__((packed)); static_assert(sizeof(Top32bit) == 4, " "); -// ------------- 16 Bit TryDecode ------------- +// ------------- 16 Bit Instructions ------------- // S // 0 ADD, ADDS (register) @@ -299,6 +299,11 @@ static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { const MOVrT2_16 enc = {bits}; } +// B L +// 0 0 STR (immediate) +// 0 1 LDR (immediate) +// 1 0 STRB (immediate) +// 1 1 LDRB (immediate) // Load/store word/byte (immediate offset) TODO(sonya) static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; @@ -306,6 +311,9 @@ static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { const LoadStoreWordByteImm16 enc = {bits}; } +// L +// 0 STR (immediate) +// 1 LDR (immediate) // Load/store (SP-relative) TODO(sonya) static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; @@ -313,6 +321,9 @@ static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { const LoadStoreSPRelative16 enc = {bits}; } +// SP +// 0 ADR +// 1 ADD, ADDS (SP plus immediate) // Add PC/SP (immediate) TODO(sonya) static bool TryDecode16AddPCSP(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; @@ -326,19 +337,33 @@ static bool TryDecode16CBZ(Instruction &inst, uint16_t bits) { return false; } -// ------------- End 16 Bit TryDecode ------------- - -// B — T1 TODO(sonya) +// B — T1 encoding TODO(sonya) static bool TryDecode16B_T1(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; const B_T1_16 enc = {bits}; } -// ------------- 32 Bit TryDecode ------------- +// B — T2 encoding TODO(sonya) +static bool TryDecode16B_T2(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; + const B_T2_16 enc = {bits}; +} +// ------------- 32 Bit Instructions ------------- + +// opc L +// 00 0 SRS, SRSDA, SRSDB, SRSIA, SRSIB — T1 +// 00 1 RFE, RFEDA, RFEDB, RFEIA, RFEIB — T1 +// 01 0 STM, STMIA, STMEA +// 01 1 LDM, LDMIA, LDMFD +// 10 0 STMDB, STMFD +// 10 1 LDMDB, LDMEA +// 11 0 SRS, SRSDA, SRSDB, SRSIA, SRSIB — T2 +// 11 1 RFE, RFEDA, RFEDB, RFEIA, RFEIB — T2 // Load/Store Multiple TODO(sonya) -// this should become a template probably +// NOTE(sonya): this should become a template probably // (see TryDecodeLoadStoreMultiple in aarch32. the semantics are identical) static bool TryDecode32LoadStoreMult(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; @@ -353,14 +378,41 @@ static bool TryDecode32BL(Instruction &inst, uint16_t bits) { const BLT1_32 enc = {bits}; } -// BL, BLX (immediate) — T2 TODO (sonya) +// BL, BLX (immediate) — T2 TODO(sonya) static bool TryDecode32BLX(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; const BLXT2_32 enc = {bits}; } -// ------------- End 32 Bit TryDecode ------------- +// ----------------------------------------------- + +// op0 op1 op2 op3 +// 0000 Adjust SP (immediate) +// 0010 Extend +// 0110 00 0 SETPAN (ARMv8.1) +// 0110 00 1 UNALLOCATED +// 0110 01 Change Processor State +// 0110 1x UNALLOCATED +// 0111 UNALLOCATED +// 1000 UNALLOCATED +// 1010 10 HLT +// 1010 != 10 Reverse bytes +// 1110 BKPT +// 1111 0000 Hints +// 1111 != 0000 IT +// x0x1 CBNZ, CBZ +// x10x Push and Pop +static TryDecode16 *TryDecodeMisc16(uint16_t bits) { + const Misc16 enc = {bits}; + + // op0 == x0x1 CBNZ, CBZ + if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { + return TryDecode16CBZ; + } + + return nullptr; +} // op0 // 00xxxx Shift (immediate), add, subtract, move, and compare @@ -375,7 +427,7 @@ static bool TryDecode32BLX(Instruction &inst, uint16_t bits) { // 1011xx Miscellaneous 16-bit instructions // 1100xx Load/store multiple // 1101xx Conditional branch, and Supervisor Call -static TryDecode16 *Try16bit(uint16_t bits) { +static TryDecode16 *Try16Bit(uint16_t bits) { uint16_t op0 = bits >> 10; // The following constraints also apply to this encoding: op0<5:3> != 111 @@ -390,7 +442,8 @@ static TryDecode16 *Try16bit(uint16_t bits) { // 0 11 0 Add, subtract (three low registers) // 0 11 1 Add, subtract (two low registers and immediate) // 0 != 11 MOV, MOVS (register) — T2 - // 1 Add, subtract, compare, move (one low register and immediate) + // 1 Add, subtract, compare, move (one low register and + // immediate) const ShiftImmAddSubMoveComp16 enc = {bits}; if (enc.op0) { @@ -421,20 +474,13 @@ static TryDecode16 *Try16bit(uint16_t bits) { // 1011xx Miscellaneous 16-bit instructions } else if ((op0 >> 2) == 0b1011) { - const Misc16 enc = {bits}; - - // op0 == x0x1 CBNZ, CBZ - if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { - return TryDecode16CBZ; - } - - return nullptr; + return TryDecodeMisc16(bits); // 1101xx Conditional branch, and Supervisor Call } else if ((op0 >> 2) == 0b1101) { uint16_t _op0 = (bits << 4) >> 9; - // op0 + // op0 // 111x Exception generation // != 111x B — T1 if (_op0 == 0b111) { @@ -447,18 +493,50 @@ static TryDecode16 *Try16bit(uint16_t bits) { return nullptr; } -// TODO(sonya): B — T2 encoding -static bool TryB_T2(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; - const B_T2_16 enc = {bits}; +// op0 op1 op2 op3 op4 op5 +// 0 1110 0x 0x0 0 MSR (register) +// 0 1110 0x 0x0 1 MSR (Banked register) +// 0 1110 10 0x0 000 Hints +// 0 1110 10 0x0 != 000 Change processor state +// 0 1110 11 0x0 Miscellaneous system +// 0 1111 00 0x0 BXJ +// 0 1111 01 0x0 Exception return +// 0 1111 1x 0x0 0 MRS +// 0 1111 1x 0x0 1 MRS (Banked register) +// 1 1110 00 000 DCPS +// 1 1110 00 010 UNALLOCATED +// 1 1110 01 0x0 UNALLOCATED +// 1 1110 1x 0x0 UNALLOCATED +// 1 1111 0x 0x0 UNALLOCATED +// 1 1111 1x 0x0 Exception generation +// != 111x 0x0 B — T3 +// 0x1 B — T4 +// 1x0 BL, BLX (immediate) — T2 +// 1x1 BL, BLX (immediate) — T1 +// Branches and miscellaneous control +static TryDecode *TryBranchesMiscControl32(uint32_t bits) { + const BranchesMiscControl32 enc = {bits}; + + if (enc.op3 >> 2) { // op3 == 1xx + if (enc.op3 & 0b001) { // op3 == 1x1 + return TryDecode32BL; + + } else { // // op3 == 1x0 + return TryDecode32BLX; + + } + } + + return nullptr; } // op0 op1 op3 -// x11x System register access, Advanced SIMD, and floating-point +// x11x System register access, Advanced SIMD, and +// floating-point // 0100 xx0xx Load/store multiple -// 0100 xx1xx Load/store dual, load/store exclusive, load-acquire/store-release, and table branch +// 0100 xx1xx Load/store dual, load/store exclusive, +// load-acquire/store-release, and table branch // 0101 Data-processing (shifted register) // 10xx 1 Branches and miscellaneous control // 10x0 0 Data-processing (modified immediate) @@ -468,7 +546,7 @@ static bool TryB_T2(Instruction &inst, uint16_t bits) { // 1101 0xxxx Data-processing (register) // 1101 10xxx Multiply, multiply accumulate, and absolute difference // 1101 11xxx Long multiply and divide -static TryDecode *Try32bit(uint32_t bits) { +static TryDecode *Try32Bit(uint32_t bits) { const Top32bit enc = {bits}; // op0 == 0100, op1 == xx0xx, Load/store multiple @@ -477,18 +555,10 @@ static TryDecode *Try32bit(uint32_t bits) { // op0 == 10xx, op3 == 1, Branches and miscellaneous control } else if (((enc.op0 >> 2) == 0b10) && enc.op3){ - const BranchesMiscControl32 enc = {bits}; - - if (enc.op3 >> 2) { // op3 == 1xx - if (enc.op3 & 0b001) { // op3 == 1x1 - return TryDecode32BL; - - } else { // // op3 == 1x0 - return TryDecode32BLX; - } - } + return TryBranchesMiscControl32(bits); } + return nullptr; } @@ -499,15 +569,15 @@ static TryDecode *Try32bit(uint32_t bits) { static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) { // 16-bit instructions if (bits >> 13 != 0b111) { - return Try16bit(uint16_t(bits >> 16)); + return Try16Bit(uint16_t(bits >> 16)); // B — T2 } else if (!((bits << 3) >> 11)) { - return TryB_T2; + return TryDecode16B_T2; // 32-bit instructions } else { - return Try32bit(bits); + return Try32Bit(bits); } } From d5c74715b11f945d35c1255a66cb4f49c3f11e29 Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 23 Feb 2022 11:24:32 -0500 Subject: [PATCH 3/4] updates --- include/remill/Arch/Name.h | 1 + lib/Arch/AArch32/Arch.cpp | 18 +- lib/Arch/AArch32/CMakeLists.txt | 3 +- lib/Arch/AArch32/Decode.cpp | 29 +- lib/Arch/AArch32/Decode.h | 42 ++ .../Decode.cpp => AArch32/DecodeThumb2.cpp} | 365 ++++++++++++------ lib/Arch/AArch32/Runtime/CMakeLists.txt | 1 + lib/Arch/AArch32/Semantics/BINARY.cpp | 14 + lib/Arch/AArch32/Semantics/COND.cpp | 9 + lib/Arch/AArch32/Semantics/LOGICAL.cpp | 8 + lib/Arch/Arch.cpp | 13 +- lib/Arch/Instruction.cpp | 1 + lib/Arch/Name.cpp | 6 +- 13 files changed, 371 insertions(+), 139 deletions(-) create mode 100644 lib/Arch/AArch32/Decode.h rename lib/Arch/{Thumb2/Decode.cpp => AArch32/DecodeThumb2.cpp} (74%) diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h index 3df3ebe5e..61ca68704 100644 --- a/include/remill/Arch/Name.h +++ b/include/remill/Arch/Name.h @@ -81,6 +81,7 @@ enum ArchName : uint32_t { kArchAMD64_AVX, kArchAMD64_AVX512, + kArchThumb2LittleEndian, kArchAArch32LittleEndian, kArchAArch64LittleEndian, diff --git a/lib/Arch/AArch32/Arch.cpp b/lib/Arch/AArch32/Arch.cpp index e58d9ee07..ac41a2e58 100644 --- a/lib/Arch/AArch32/Arch.cpp +++ b/lib/Arch/AArch32/Arch.cpp @@ -55,11 +55,23 @@ AArch32Arch::~AArch32Arch(void) {} // TODO(pag): Eventually handle Thumb2 and unaligned addresses. uint64_t AArch32Arch::MinInstructionAlign(void) const { - return 4; + switch (arch_name) { + case kArchAArch32LittleEndian: return 4; + case kArchThumb2LittleEndian: return 2; + default: + LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 " + "architecture " << GetArchName(arch_name); + } } uint64_t AArch32Arch::MinInstructionSize(void) const { - return 4; + switch (arch_name) { + case kArchAArch32LittleEndian: return 4; + case kArchThumb2LittleEndian: return 2; + default: + LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 " + "architecture " << GetArchName(arch_name); + } } // Maximum number of bytes in an instruction for this particular architecture. @@ -77,6 +89,7 @@ llvm::Triple AArch32Arch::Triple(void) const { auto triple = BasicTriple(); switch (arch_name) { case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break; + case kArchThumb2LittleEndian: triple.setArch(llvm::Triple::thumb); break; default: LOG(FATAL) << "Cannot get triple for non-aarch32 architecture " << GetArchName(arch_name); @@ -159,6 +172,7 @@ void AArch32Arch::PopulateRegisterTable(void) const { REG(C, sr.c, u8); REG(Z, sr.z, u8); REG(V, sr.v, u8); + REG(T, sr.t, u8); } diff --git a/lib/Arch/AArch32/CMakeLists.txt b/lib/Arch/AArch32/CMakeLists.txt index 293aff642..796d63892 100644 --- a/lib/Arch/AArch32/CMakeLists.txt +++ b/lib/Arch/AArch32/CMakeLists.txt @@ -27,7 +27,8 @@ add_library(remill_arch_aarch32 STATIC Arch.cpp Decode.cpp -# Decode.h + DecodeThumb2.cpp + Decode.h # Extract.cpp ) diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp index 2c974a1a6..cd55843ad 100644 --- a/lib/Arch/AArch32/Decode.cpp +++ b/lib/Arch/AArch32/Decode.cpp @@ -19,11 +19,13 @@ #include #include "Arch.h" +#include "Decode.h" #include "remill/BC/ABI.h" +#include "remill/Arch/Name.h" namespace remill { -namespace { +namespace aarch32 { // Integer Data Processing (three register, register shift) union IntDataProcessingRRRR { @@ -562,17 +564,12 @@ union SpecialRegsAndHints { static_assert(sizeof(SpecialRegsAndHints) == 4, " "); static constexpr auto kAddressSize = 32u; -static constexpr auto kPCRegNum = 15u; -static constexpr auto kLRRegNum = 14u; static const char *const kIntRegName[] = { "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"}; -typedef bool(TryDecode)(Instruction &, uint32_t); -typedef std::optional(InstEval)(uint32_t, uint32_t); - -static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, +void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, Operand::Action action) { Operand::Register reg; reg.size = size; @@ -581,7 +578,7 @@ static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, op.action = action; } -static void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, +void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, Operand::Action action) { Operand::Register reg; reg.size = size; @@ -601,8 +598,8 @@ static void AddExprOp(Instruction &inst, OperandExpression *op_expr, op.action = action; } -static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, - bool is_signed = false) { +void AddImmOp(Instruction &inst, uint64_t value, unsigned size, + bool is_signed) { Operand::Immediate imm; imm.val = value; imm.is_signed = is_signed; @@ -3642,7 +3639,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, inst.has_branch_taken_delay_slot = false; inst.has_branch_not_taken_delay_slot = false; inst.arch_name = arch_name; - inst.sub_arch_name = arch_name; // TODO(pag): Thumb. + inst.sub_arch_name = arch_name; inst.arch = this; inst.category = Instruction::kCategoryInvalid; inst.operands.clear(); @@ -3662,9 +3659,13 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, } const auto bytes = reinterpret_cast(inst.bytes.data()); - const auto bits = BytesToBits(bytes); + const auto bits = aarch32::BytesToBits(bytes); + + if (arch_name == kArchThumb2LittleEndian) { + return aarch32::DecodeThumb2Instruction(inst, bits); + } - auto decoder = TryDecodeTopLevelEncodings(bits); + auto decoder = aarch32::TryDecodeTopLevelEncodings(bits); if (!decoder) { LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; return false; @@ -3672,7 +3673,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, auto ret = decoder(inst, bits); - // LOG(ERROR) << inst.Serialize(); + LOG(ERROR) << inst.Serialize(); return ret; } diff --git a/lib/Arch/AArch32/Decode.h b/lib/Arch/AArch32/Decode.h new file mode 100644 index 000000000..d21189ef9 --- /dev/null +++ b/lib/Arch/AArch32/Decode.h @@ -0,0 +1,42 @@ +/* + * Decode.h + * + * Created on: Feb 15, 2022 + * Author: sonyaschriner + */ + +#pragma once + +#include + +namespace remill { + +class Instruction; + +namespace aarch32 { + +bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits); + +typedef bool(TryDecode)(Instruction &, uint32_t); +typedef bool(TryDecode16)(Instruction &, uint16_t); + +static constexpr auto kPCRegNum = 15u; +static constexpr auto kLRRegNum = 14u; +static constexpr auto kSPRegNum = 13u; + +typedef std::optional(InstEval)(uint32_t, uint32_t); + +//bool DecodeCondition(Instruction &inst, uint32_t cond); + +void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, + Operand::Action action); + +void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, + Operand::Action action); + +void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, + bool is_signed = false); + +} +} + diff --git a/lib/Arch/Thumb2/Decode.cpp b/lib/Arch/AArch32/DecodeThumb2.cpp similarity index 74% rename from lib/Arch/Thumb2/Decode.cpp rename to lib/Arch/AArch32/DecodeThumb2.cpp index 589a615c7..d7784d4e6 100644 --- a/lib/Arch/Thumb2/Decode.cpp +++ b/lib/Arch/AArch32/DecodeThumb2.cpp @@ -19,26 +19,22 @@ #include #include "Arch.h" +#include "Decode.h" #include "remill/BC/ABI.h" namespace remill { -namespace { - -typedef bool(TryDecode)(Instruction &, uint32_t); -typedef bool(TryDecode16)(Instruction &, uint16_t); -typedef std::optional(InstEval)(uint32_t, uint32_t); - +namespace aarch32 { // Add, subtract (three low registers) union AddSub3LowReg16 { uint16_t flat; struct { - uint16_t _000110 : 6; - uint16_t S : 1; - uint16_t Rm : 3; - uint16_t Rn : 3; uint16_t Rd : 3; + uint16_t Rn : 3; + uint16_t Rm : 3; + uint16_t S : 1; + uint16_t _000110 : 6; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(AddSub3LowReg16) == 2, " "); @@ -47,11 +43,11 @@ static_assert(sizeof(AddSub3LowReg16) == 2, " "); union AddSub2LowRegImm16 { uint16_t flat; struct { - uint16_t _000111 : 6; - uint16_t S : 1; - uint16_t imm3 : 3; - uint16_t Rn : 3; uint16_t Rd : 3; + uint16_t Rn : 3; + uint16_t imm3 : 3; + uint16_t S : 1; + uint16_t _000111 : 6; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(AddSub2LowRegImm16) == 2, " "); @@ -60,23 +56,34 @@ static_assert(sizeof(AddSub2LowRegImm16) == 2, " "); union AddSubComp1LowRegImm16 { uint16_t flat; struct { - uint16_t _001 : 3; - uint16_t op : 2; - uint16_t Rd : 3; uint16_t imm8 : 8; + uint16_t Rd : 3; + uint16_t op : 2; + uint16_t _001 : 3; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(AddSubComp1LowRegImm16) == 2, " "); +// Adjust SP (immediate) +union AdjustSPImm16 { + uint16_t flat; + struct { + uint16_t imm7 : 7; + uint16_t S : 1; + uint16_t _10110000 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AdjustSPImm16) == 2, " "); + // MOV, MOVS (register) — T2 union MOVrT2_16 { uint16_t flat; struct { - uint16_t _000 : 3; - uint16_t op : 2; - uint16_t imm5 : 5; - uint16_t Rm : 3; uint16_t Rd : 3; + uint16_t Rm : 3; + uint16_t imm5 : 5; + uint16_t op : 2; + uint16_t _000 : 3; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(MOVrT2_16) == 2, " "); @@ -85,12 +92,12 @@ static_assert(sizeof(MOVrT2_16) == 2, " "); union LoadStoreWordByteImm16 { uint16_t flat; struct { - uint16_t _011 : 3; - uint16_t B : 1; - uint16_t L : 1; - uint16_t imm5 : 5; - uint16_t Rn : 3; uint16_t Rt : 3; + uint16_t Rn : 3; + uint16_t imm5 : 5; + uint16_t L : 1; + uint16_t B : 1; + uint16_t _011 : 3; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(LoadStoreWordByteImm16) == 2, " "); @@ -99,10 +106,10 @@ static_assert(sizeof(LoadStoreWordByteImm16) == 2, " "); union LoadStoreSPRelative16 { uint16_t flat; struct { - uint16_t _1001 : 4; - uint16_t L : 1; - uint16_t Rt : 3; uint16_t imm8 : 8; + uint16_t Rt : 3; + uint16_t L : 1; + uint16_t _1001 : 4; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(LoadStoreSPRelative16) == 2, " "); @@ -111,10 +118,10 @@ static_assert(sizeof(LoadStoreSPRelative16) == 2, " "); union AddPCSPImm16 { uint16_t flat; struct { - uint16_t _1010 : 4; - uint16_t SP : 1; - uint16_t Rd : 3; uint16_t imm8 : 8; + uint16_t Rd : 3; + uint16_t SP : 1; + uint16_t _1010 : 4; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(AddPCSPImm16) == 2, " "); @@ -123,12 +130,12 @@ static_assert(sizeof(AddPCSPImm16) == 2, " "); union Misc16 { uint16_t flat; struct { - uint16_t _1011 : 4; - uint16_t op0 : 4; - uint16_t op1 : 2; - uint16_t op2 : 1; - uint16_t _b4 : 1; uint16_t op3 : 4; + uint16_t _b4 : 1; + uint16_t op2 : 1; + uint16_t op1 : 2; + uint16_t op0 : 4; + uint16_t _1011 : 4; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(Misc16) == 2, " "); @@ -137,9 +144,9 @@ static_assert(sizeof(Misc16) == 2, " "); union B_T1_16 { uint16_t flat; struct { - uint16_t _1101 : 4; - uint16_t cond : 4; uint16_t imm8 : 8; + uint16_t cond : 4; + uint16_t _1101 : 4; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(B_T1_16) == 2, " "); @@ -148,9 +155,8 @@ static_assert(sizeof(B_T1_16) == 2, " "); union B_T2_16 { uint16_t flat; struct { - uint16_t _1101 : 4; - uint16_t cond : 4; - uint16_t imm8 : 8; + uint16_t imm11 : 11; + uint16_t _11100 : 5; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(B_T2_16) == 2, " "); @@ -159,11 +165,11 @@ static_assert(sizeof(B_T2_16) == 2, " "); union ShiftImmAddSubMoveComp16 { uint16_t flat; struct { - uint16_t _00 : 2; - uint16_t op0 : 1; - uint16_t op1 : 2; - uint16_t op2 : 1; uint16_t _9_to_0 : 10; + uint16_t op2 : 1; + uint16_t op1 : 2; + uint16_t op0 : 1; + uint16_t _00 : 2; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(ShiftImmAddSubMoveComp16) == 2, " "); @@ -172,16 +178,16 @@ static_assert(sizeof(ShiftImmAddSubMoveComp16) == 2, " "); union LoadStoreMult32 { uint32_t flat; struct { - uint32_t _1110100 : 7; - uint32_t opc : 2; - uint32_t _0_b22 : 1; - uint32_t W : 1; - uint32_t L : 1; - uint32_t Rn : 4; - uint32_t P : 1; - uint32_t M : 1; - uint32_t _0_b13 : 1; uint32_t register_list : 13; + uint32_t _0_b13 : 1; + uint32_t M : 1; + uint32_t P : 1; + uint32_t Rn : 4; + uint32_t L : 1; + uint32_t W : 1; + uint32_t _0_b22 : 1; + uint32_t opc : 2; + uint32_t _1110100 : 7; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(LoadStoreMult32) == 4, " "); @@ -190,14 +196,14 @@ static_assert(sizeof(LoadStoreMult32) == 4, " "); union BLT1_32 { uint32_t flat; struct { - uint32_t _11110 : 5; - uint32_t S : 1; - uint32_t imm10 : 10; - uint32_t _11 : 2; - uint32_t J1 : 1; - uint32_t _1 : 1; - uint32_t J2 : 1; uint32_t imm11 : 11; + uint32_t J2 : 1; + uint32_t _1 : 1; + uint32_t J1 : 1; + uint32_t _11 : 2; + uint32_t imm10 : 10; + uint32_t S : 1; + uint32_t _11110 : 5; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(BLT1_32) == 4, " "); @@ -206,15 +212,15 @@ static_assert(sizeof(BLT1_32) == 4, " "); union BLXT2_32 { uint32_t flat; struct { - uint32_t _11110 : 5; - uint32_t S : 1; - uint32_t imm10H : 10; - uint32_t _11 : 2; - uint32_t J1 : 1; - uint32_t _0 : 1; - uint32_t J2 : 1; - uint32_t imm10L : 10; uint32_t H : 1; + uint32_t imm10L : 10; + uint32_t J2 : 1; + uint32_t _0 : 1; + uint32_t J1 : 1; + uint32_t _11 : 2; + uint32_t imm10H : 10; + uint32_t S : 1; + uint32_t _11110 : 5; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(BLXT2_32) == 4, " "); @@ -243,43 +249,71 @@ static_assert(sizeof(BranchesMiscControl32) == 4, " "); union Top32bit { uint32_t flat; struct { - uint32_t _111 : 3; - uint32_t op0 : 4; - uint32_t op1 : 5; - uint32_t _19_to_16 : 4; - uint32_t op3 : 1; uint32_t _14_to_0 : 15; + uint32_t op3 : 1; + uint32_t _19_to_16 : 4; + uint32_t op1 : 5; + uint32_t op0 : 4; + uint32_t _111 : 3; } __attribute__((packed)); } __attribute__((packed)); static_assert(sizeof(Top32bit) == 4, " "); // ------------- 16 Bit Instructions ------------- +static const char *const kIdpNamesAddSubLowReg[] = { + [0b0] = "ADDL_T2", [0b1] = "SUBL_T2" +}; + // S // 0 ADD, ADDS (register) // 1 SUB, SUBS (register) -// Add, subtract (three low registers) TODO(sonya) +// Add, subtract (three low registers) static bool TryDecode16AddSub3LowReg(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; + + // TODO(sonya) ADDS, SUBS - Decide how to handle InITBlock() + const AddSub3LowReg16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + AddIntRegOp(inst, uint32_t(enc.Rn), 32u, Operand::kActionRead); + AddIntRegOp(inst, uint32_t(enc.Rm), 32u, Operand::kActionRead); + + return true; + } // S // 0 ADD, ADDS (immediate) // 1 SUB, SUBS (immediate) -// Add, subtract (two low registers and immediate) TODO(sonya) +// Add, subtract (two low registers and immediate) static bool TryDecode16AddSub2LowRegImm(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; + + // TODO(sonya) ADDS, SUBS - Decide how to handle InITBlock() + const AddSub2LowRegImm16 enc = {bits}; - if (enc.S) { - inst.function = "SUB"; - } else { - inst.function = "ADD"; - } + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + AddIntRegOp(inst, uint32_t(enc.Rn), 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm3)); + + return true; + } +static const char *const kIdpAddSubComp1LowRegImm[] = { + [0b00] = "MOVL_T2", [0b01] = "CMPL_T2", + [0b10] = "ADDL_T2", [0b11] = "SUBL_T2" +}; + + // op // 00 MOV, MOVS (immediate) // 01 CMP (immediate) @@ -287,16 +321,35 @@ static bool TryDecode16AddSub2LowRegImm(Instruction &inst, uint16_t bits) { // 11 SUB, SUBS (immediate) // Add, subtract, compare, move (one low register and immediate) TODO(sonya) static bool TryDecode16AddSubComp1LowRegImm(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; + + // TODO(sonya): setflags = !InITBlock() + const AddSubComp1LowRegImm16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpAddSubComp1LowRegImm[enc.op]; + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + if (enc.op) { + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionRead); + } + AddImmOp(inst, uint32_t(enc.imm8)); + + return true; + } // MOV, MOVS (register) — T2 TODO(sonya) static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; - const MOVrT2_16 enc = {bits}; + +// const MOVrT2_16 enc = {bits}; +// inst.category = Instruction::kCategoryNormal; +// inst.function = "MOVL_T2"; +// +// return true; + } // B L @@ -308,7 +361,7 @@ static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; - const LoadStoreWordByteImm16 enc = {bits}; +// const LoadStoreWordByteImm16 enc = {bits}; } // L @@ -318,7 +371,7 @@ static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; - const LoadStoreSPRelative16 enc = {bits}; +// const LoadStoreSPRelative16 enc = {bits}; } // SP @@ -326,9 +379,24 @@ static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { // 1 ADD, ADDS (SP plus immediate) // Add PC/SP (immediate) TODO(sonya) static bool TryDecode16AddPCSP(Instruction &inst, uint16_t bits) { + + const AddPCSPImm16 enc = {bits}; + inst.function = enc.SP ? "ADDL_T2" : "ADR"; + + // TODO(sonya): ADR + + if (enc.SP) { + inst.category = Instruction::kCategoryNormal; + + AddIntRegOp(inst, enc.Rd, 32u, Operand::kActionWrite); + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm8 << 2)); + + return true; + } + inst.category = Instruction::kCategoryError; return false; - const AddPCSPImm16 enc = {bits}; } // CBNZ, CBZ TODO(sonya) @@ -337,18 +405,36 @@ static bool TryDecode16CBZ(Instruction &inst, uint16_t bits) { return false; } + +// Adjust SP (immediate) +static bool TryDecode16AdjustSPImm(Instruction &inst, uint16_t bits) { + + const AdjustSPImm16 enc = {bits}; + + // TODO(sonya): setflags = !InITBlock() + + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionWrite); + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm7 << 2)); + + return true; +} + // B — T1 encoding TODO(sonya) static bool TryDecode16B_T1(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; - const B_T1_16 enc = {bits}; +// const B_T1_16 enc = {bits}; } // B — T2 encoding TODO(sonya) static bool TryDecode16B_T2(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; return false; - const B_T2_16 enc = {bits}; +// const B_T2_16 enc = {bits}; } // ------------- 32 Bit Instructions ------------- @@ -365,24 +451,24 @@ static bool TryDecode16B_T2(Instruction &inst, uint16_t bits) { // Load/Store Multiple TODO(sonya) // NOTE(sonya): this should become a template probably // (see TryDecodeLoadStoreMultiple in aarch32. the semantics are identical) -static bool TryDecode32LoadStoreMult(Instruction &inst, uint16_t bits) { +static bool TryDecode32LoadStoreMult(Instruction &inst, uint32_t bits) { inst.category = Instruction::kCategoryError; return false; - const LoadStoreMult32 enc = {bits}; +// const LoadStoreMult32 enc = {bits}; } // BL, BLX (immediate) — T1 TODO(sonya) -static bool TryDecode32BL(Instruction &inst, uint16_t bits) { +static bool TryDecode32BL(Instruction &inst, uint32_t bits) { inst.category = Instruction::kCategoryError; return false; - const BLT1_32 enc = {bits}; +// const BLT1_32 enc = {bits}; } // BL, BLX (immediate) — T2 TODO(sonya) -static bool TryDecode32BLX(Instruction &inst, uint16_t bits) { +static bool TryDecode32BLX(Instruction &inst, uint32_t bits) { inst.category = Instruction::kCategoryError; return false; - const BLXT2_32 enc = {bits}; +// const BLXT2_32 enc = {bits}; } // ----------------------------------------------- @@ -406,8 +492,11 @@ static bool TryDecode32BLX(Instruction &inst, uint16_t bits) { static TryDecode16 *TryDecodeMisc16(uint16_t bits) { const Misc16 enc = {bits}; + if (!enc.op0) { + return TryDecode16AdjustSPImm; + // op0 == x0x1 CBNZ, CBZ - if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { + } else if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { return TryDecode16CBZ; } @@ -427,11 +516,11 @@ static TryDecode16 *TryDecodeMisc16(uint16_t bits) { // 1011xx Miscellaneous 16-bit instructions // 1100xx Load/store multiple // 1101xx Conditional branch, and Supervisor Call -static TryDecode16 *Try16Bit(uint16_t bits) { +static TryDecode16 *Try16bit(uint16_t bits) { uint16_t op0 = bits >> 10; // The following constraints also apply to this encoding: op0<5:3> != 111 - if ((op0 >> 3) != 0b111) { + if ((op0 >> 3) == 0b111) { return nullptr; } @@ -459,6 +548,11 @@ static TryDecode16 *Try16Bit(uint16_t bits) { return TryDecode16AddSub3LowReg; } + // 010001 Special data instructions and branch and exchange + } else if (op0 == 0b010001) { + // TODO(sonya): Add, subtract, compare, move (two high registers) + // -- for ADD, ADDS (register) + return nullptr; // 011xxx Load/store word/byte (immediate offset) } else if ((op0 >> 3) == 0b011) { @@ -562,26 +656,57 @@ static TryDecode *Try32Bit(uint32_t bits) { return nullptr; } -// op0 op1 -// != 111 16-bit -// 111 00 B — T2 -// 111 != 00 32-bit -static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) { - // 16-bit instructions - if (bits >> 13 != 0b111) { - return Try16Bit(uint16_t(bits >> 16)); - - // B — T2 - } else if (!((bits << 3) >> 11)) { - return TryDecode16B_T2; - - // 32-bit instructions - } else { - return Try32Bit(bits); +bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits) { + bool ret; + + // op0 op1 + // != 111 16-bit + // 111 00 B — T2 + // 111 != 00 32-bit + // TODO(sonya): make adjustments to inst for a 16 bit increment + { + auto bits16 = uint16_t(bits >> 16); + + // 16-bit instructions + if (bits >> 13 != 0b111) { + inst.next_pc = inst.pc + 2ull; // Default fall-through. + //inst.bytes = inst_bytes; + + auto decoder = Try16bit(bits16); + if (!decoder) { + LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; + LOG(ERROR) << "unhandled bits16 " << std::hex << bits16 << std::dec; + return false; + } + ret = decoder(inst, bits16); + + // B — T2 + } else if (!((bits << 3) >> 11)) { + inst.next_pc = inst.pc + 2ull; // Default fall-through. + //inst.bytes = inst_bytes; + + auto decoder = TryDecode16B_T2; + ret = decoder(inst, bits16); + + // 32-bit instructions + } else { + auto decoder = Try32Bit(bits); + + if (!decoder) { + LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; + return false; + } + + ret = decoder(inst, bits); + + } } -} + LOG(ERROR) << inst.Serialize(); + return ret; +} } // namespace + } // namespace remill diff --git a/lib/Arch/AArch32/Runtime/CMakeLists.txt b/lib/Arch/AArch32/Runtime/CMakeLists.txt index 560e8d054..ffcbf23f1 100644 --- a/lib/Arch/AArch32/Runtime/CMakeLists.txt +++ b/lib/Arch/AArch32/Runtime/CMakeLists.txt @@ -64,3 +64,4 @@ function(add_runtime_helper target_name little_endian) endfunction() add_runtime_helper(aarch32 1) +add_runtime_helper(thumb2 1) diff --git a/lib/Arch/AArch32/Semantics/BINARY.cpp b/lib/Arch/AArch32/Semantics/BINARY.cpp index fc5e98db8..f8006365a 100644 --- a/lib/Arch/AArch32/Semantics/BINARY.cpp +++ b/lib/Arch/AArch32/Semantics/BINARY.cpp @@ -94,6 +94,12 @@ DEF_COND_SEM(SUB, R32W dst, R32 src1, I32 src2, R32W maybe_next_pc_dst) { return memory; } +DEF_SEM(SUBL_T2, R32W dst, R32 src1, I32 src2) { + auto value = Read(src2); + Write(dst, USub(Read(src1), value)); + return memory; +} + DEF_COND_SEM(SUBS, R32W dst, R32 src1, I32 src2, I8 carry_out, R32W maybe_next_pc_dst) { auto rhs = Read(src2); @@ -111,6 +117,12 @@ DEF_COND_SEM(ADD, R32W dst, R32 src1, I32 src2, R32W maybe_next_pc_dst) { return memory; } +DEF_SEM(ADDL_T2, R32W dst, R32 src1, I32 src2) { + auto value = Read(src2); + Write(dst, UAdd(Read(src1), value)); + return memory; +} + DEF_COND_SEM(ADDS, R32W dst, R32 src1, I32 src2, I8 carry_out, R32W maybe_next_pc_dst) { auto rhs = Read(src2); @@ -178,12 +190,14 @@ DEF_ISEL(ANDSrr) = ANDS; DEF_ISEL(EORrr) = EOR; DEF_ISEL(EORSrr) = EORS; DEF_ISEL(ADDrr) = ADD; +DEF_ISEL(ADDL_T2) = ADDL_T2; DEF_ISEL(ADDSrr) = ADDS; DEF_ISEL(ADCrr) = ADC; DEF_ISEL(ADCSrr) = ADCS; DEF_ISEL(RSBrr) = RSB; DEF_ISEL(RSBSrr) = RSBS; DEF_ISEL(SUBrr) = SUB; +DEF_ISEL(SUBL_T2) = SUBL_T2; DEF_ISEL(SUBSrr) = SUBS; DEF_ISEL(SBCrr) = SBC; DEF_ISEL(SBCSrr) = SBCS; diff --git a/lib/Arch/AArch32/Semantics/COND.cpp b/lib/Arch/AArch32/Semantics/COND.cpp index 7e298c0af..00f258286 100644 --- a/lib/Arch/AArch32/Semantics/COND.cpp +++ b/lib/Arch/AArch32/Semantics/COND.cpp @@ -44,6 +44,13 @@ DEF_COND_SEM(CMP, R32 src1, I32 src2, I8 carry_out) { return memory; } +DEF_SEM(CMPL_T2, R32 src1, I32 src2, I8 carry_out) { + auto rhs = Read(src2); + auto lhs = Read(src1); + AddWithCarryNZCV(state, lhs, UNot(rhs), uint32_t(1)); + return memory; +} + DEF_COND_SEM(CMN, R32 src1, I32 src2, I8 carry_out) { auto rhs = Read(src2); auto lhs = Read(src1); @@ -56,3 +63,5 @@ DEF_ISEL(TSTr) = TST; DEF_ISEL(TEQr) = TEQ; DEF_ISEL(CMPr) = CMP; DEF_ISEL(CMNr) = CMN; + +DEF_ISEL(CMPL_T2) = CMPL_T2; diff --git a/lib/Arch/AArch32/Semantics/LOGICAL.cpp b/lib/Arch/AArch32/Semantics/LOGICAL.cpp index b6c5617be..09cd32c8c 100644 --- a/lib/Arch/AArch32/Semantics/LOGICAL.cpp +++ b/lib/Arch/AArch32/Semantics/LOGICAL.cpp @@ -82,6 +82,14 @@ DEF_COND_SEM(MOVT, R32W dst, R32 src1, R32 src2) { Write(dst, result); return memory; } + +DEF_SEM(MOVL_T2, R32W dst, I32 src1) { + auto value = Read(src1); + Write(dst, value); + return memory; +} + } // namespace DEF_ISEL(MOVT) = MOVT; +DEF_ISEL(MOVL_T2) = MOVL_T2; diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index 7cd353036..e61b5d6e5 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -56,6 +56,7 @@ static unsigned AddressSize(ArchName arch_name) { case kArchInvalid: LOG(FATAL) << "Cannot get address size for invalid arch."; return 0; + case kArchThumb2LittleEndian: case kArchX86: case kArchX86_AVX: case kArchX86_AVX512: @@ -146,6 +147,12 @@ auto Arch::Build(llvm::LLVMContext *context_, OSName os_name_, break; } + case kArchThumb2LittleEndian: { + DLOG(INFO) << "Using architecture: Thumb2, feature set: Little Endian"; + ret = GetAArch32(context_, os_name_, arch_name_); + break; + } + case kArchAArch32LittleEndian: { DLOG(INFO) << "Using architecture: AArch32, feature set: Little Endian"; ret = GetAArch32(context_, os_name_, arch_name_); @@ -348,7 +355,11 @@ bool Arch::IsAMD64(void) const { } bool Arch::IsAArch32(void) const { - return remill::kArchAArch32LittleEndian == arch_name; + switch (arch_name) { + case remill::kArchAArch32LittleEndian: + case remill::kArchThumb2LittleEndian: return true; + default: return false; + } } bool Arch::IsAArch64(void) const { diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index 12029fdbb..144c66592 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -656,6 +656,7 @@ std::string Instruction::Serialize(void) const { case kArchX86: case kArchX86_AVX: case kArchX86_AVX512: ss << "X86"; break; + case kArchThumb2LittleEndian: ss << "Thumb2"; break; case kArchAArch32LittleEndian: ss << "AArch32"; break; case kArchAArch64LittleEndian: ss << "AArch64"; break; case kArchSparc32: ss << "SPARC32"; break; diff --git a/lib/Arch/Name.cpp b/lib/Arch/Name.cpp index ecf3a09b8..14a471ae4 100644 --- a/lib/Arch/Name.cpp +++ b/lib/Arch/Name.cpp @@ -26,7 +26,7 @@ ArchName GetArchName(const llvm::Triple &triple) { case llvm::Triple::ArchType::x86_64: return kArchAMD64; case llvm::Triple::ArchType::aarch64: return kArchAArch64LittleEndian; case llvm::Triple::ArchType::arm: return kArchAArch32LittleEndian; - case llvm::Triple::ArchType::thumb: return kArchAArch32LittleEndian; + case llvm::Triple::ArchType::thumb: return kArchThumb2LittleEndian; case llvm::Triple::sparc: return kArchSparc32; case llvm::Triple::sparcv9: return kArchSparc64; default: return kArchInvalid; @@ -52,6 +52,9 @@ ArchName GetArchName(std::string_view arch_name) { } else if (arch_name == "amd64_avx512") { return kArchAMD64_AVX512; + } else if (arch_name == "thumb2") { + return kArchThumb2LittleEndian; + } else if (arch_name == "aarch32") { return kArchAArch32LittleEndian; @@ -79,6 +82,7 @@ static const std::string_view kArchNames[] = { [kArchAMD64] = "amd64", [kArchAMD64_AVX] = "amd64_avx", [kArchAMD64_AVX512] = "amd64_avx512", + [kArchThumb2LittleEndian] = "thumb2", [kArchAArch32LittleEndian] = "aarch32", [kArchAArch64LittleEndian] = "aarch64", [kArchSparc32] = "sparc32", From 3fed097de65dc9d247e2e80c27d86d07ddd8b747 Mon Sep 17 00:00:00 2001 From: sschriner Date: Mon, 7 Mar 2022 18:56:40 -0500 Subject: [PATCH 4/4] Load/store word/byte (immediate offset) --- lib/Arch/AArch32/Decode.cpp | 16 +++---- lib/Arch/AArch32/Decode.h | 13 ++++++ lib/Arch/AArch32/DecodeThumb2.cpp | 67 +++++++++++++++++++++++------- lib/Arch/AArch32/Semantics/MEM.cpp | 28 +++++++++++++ 4 files changed, 98 insertions(+), 26 deletions(-) diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp index cd55843ad..95d77507b 100644 --- a/lib/Arch/AArch32/Decode.cpp +++ b/lib/Arch/AArch32/Decode.cpp @@ -565,10 +565,6 @@ static_assert(sizeof(SpecialRegsAndHints) == 4, " "); static constexpr auto kAddressSize = 32u; -static const char *const kIntRegName[] = { - "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", - "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"}; - void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, Operand::Action action) { Operand::Register reg; @@ -608,9 +604,9 @@ void AddImmOp(Instruction &inst, uint64_t value, unsigned size, op.size = size; } -static void AddAddrRegOp(Instruction &inst, const char *reg_name, - unsigned mem_size, Operand::Action mem_action, - unsigned disp, unsigned scale = 0) { +void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size, + Operand::Action mem_action, + unsigned disp, unsigned scale) { Operand::Address addr; addr.address_size = 32; addr.base_reg.name = reg_name; @@ -907,7 +903,7 @@ static void AddShiftImmCarryOperand(Instruction &inst, uint32_t reg_num, // (shift_t, shift_n) = DecodeImmShift(type, imm5); // (shifted, carry) = Shift_C(R[m], shift_t, shift_n, PSTATE.C); // See an instruction in Integer Data Processing (three register, immediate shift) set for an example -static void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, +void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, uint32_t shift_type, uint32_t shift_size, bool carry_out, bool can_shift_right_by_32) { auto is_rrx = false; @@ -1190,7 +1186,7 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd, auto src2 = EvalOperand(inst, inst.operands[4], uses_linkreg); AddAddrRegOp(inst, kNextPCVariableName.data(), kAddressSize, - Operand::kActionWrite, 0); + Operand::kActionWrite, 0u); if (uses_linkreg) { @@ -3625,7 +3621,7 @@ static uint32_t BytesToBits(const uint8_t *bytes) { bits = (bits << 8) | static_cast(bytes[0]); return bits; } -} // namespace +} // namespace aarch32 // Decode an instruction bool AArch32Arch::DecodeInstruction(uint64_t address, diff --git a/lib/Arch/AArch32/Decode.h b/lib/Arch/AArch32/Decode.h index d21189ef9..082b6eab9 100644 --- a/lib/Arch/AArch32/Decode.h +++ b/lib/Arch/AArch32/Decode.h @@ -24,6 +24,10 @@ static constexpr auto kPCRegNum = 15u; static constexpr auto kLRRegNum = 14u; static constexpr auto kSPRegNum = 13u; +static const char *const kIntRegName[] = { + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"}; + typedef std::optional(InstEval)(uint32_t, uint32_t); //bool DecodeCondition(Instruction &inst, uint32_t cond); @@ -34,9 +38,18 @@ void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, Operand::Action action); +void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size, + Operand::Action mem_action, + unsigned disp, unsigned scale = 0); + void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, bool is_signed = false); +void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, + uint32_t shift_type, uint32_t shift_size, + bool carry_out, bool can_shift_right_by_32); + + } } diff --git a/lib/Arch/AArch32/DecodeThumb2.cpp b/lib/Arch/AArch32/DecodeThumb2.cpp index d7784d4e6..4117b5683 100644 --- a/lib/Arch/AArch32/DecodeThumb2.cpp +++ b/lib/Arch/AArch32/DecodeThumb2.cpp @@ -21,6 +21,7 @@ #include "Arch.h" #include "Decode.h" #include "remill/BC/ABI.h" +#include "remill/Arch/Name.h" namespace remill { @@ -319,7 +320,7 @@ static const char *const kIdpAddSubComp1LowRegImm[] = { // 01 CMP (immediate) // 10 ADD, ADDS (immediate) // 11 SUB, SUBS (immediate) -// Add, subtract, compare, move (one low register and immediate) TODO(sonya) +// Add, subtract, compare, move (one low register and immediate) static bool TryDecode16AddSubComp1LowRegImm(Instruction &inst, uint16_t bits) { // TODO(sonya): setflags = !InITBlock() @@ -339,37 +340,68 @@ static bool TryDecode16AddSubComp1LowRegImm(Instruction &inst, uint16_t bits) { } -// MOV, MOVS (register) — T2 TODO(sonya) +// MOV, MOVS (register) — T2 static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; -// const MOVrT2_16 enc = {bits}; -// inst.category = Instruction::kCategoryNormal; -// inst.function = "MOVL_T2"; -// -// return true; + const MOVrT2_16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = "MOVL_T2"; + + // TODO(sonya): setflags = !InITBlock() + // if op == '00' && imm5 == '00000' && InITBlock() then UNPREDICTABLE; + + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + + // (shift_t, shift_n) = DecodeImmShift(op, imm5); + AddShiftRegImmOperand(inst, uint32_t(enc.Rm), uint32_t(enc.op), + uint32_t(enc.imm5), false, false); + + return true; } +static const char *const kIdpLoadStoreWordByte[] = { + [0b00] = "STR_T2", [0b01] = "LDR_T2", + [0b10] = "STRB_T2", [0b11] = "LDRB_T2" +}; + // B L // 0 0 STR (immediate) // 0 1 LDR (immediate) // 1 0 STRB (immediate) // 1 1 LDRB (immediate) -// Load/store word/byte (immediate offset) TODO(sonya) +// Load/store word/byte (immediate offset) +template static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { - inst.category = Instruction::kCategoryError; - return false; -// const LoadStoreWordByteImm16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + const LoadStoreWordByteImm16 enc = {bits}; + inst.function = kIdpLoadStoreWordByte[(enc.B << 1) | enc.L]; + + AddAddrRegOp(inst, kIntRegName[enc.Rt], 32u, kRegAction, 0u); + AddAddrRegOp(inst, kIntRegName[enc.Rn], 32u, kMemAction, enc.imm5 << 2); + + return true; } +static TryDecode16 *kDecode16LoadStoreWordByteImm[] = { + [0b00] = TryDecode16LoadStoreWordByteImm, + [0b01] = TryDecode16LoadStoreWordByteImm, + [0b10] = TryDecode16LoadStoreWordByteImm, + [0b11] = TryDecode16LoadStoreWordByteImm +}; + // L // 0 STR (immediate) // 1 LDR (immediate) // Load/store (SP-relative) TODO(sonya) static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { inst.category = Instruction::kCategoryError; + return false; // const LoadStoreSPRelative16 enc = {bits}; } @@ -377,7 +409,7 @@ static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { // SP // 0 ADR // 1 ADD, ADDS (SP plus immediate) -// Add PC/SP (immediate) TODO(sonya) +// Add PC/SP (immediate) static bool TryDecode16AddPCSP(Instruction &inst, uint16_t bits) { const AddPCSPImm16 enc = {bits}; @@ -556,7 +588,7 @@ static TryDecode16 *Try16bit(uint16_t bits) { // 011xxx Load/store word/byte (immediate offset) } else if ((op0 >> 3) == 0b011) { - return TryDecode16LoadStoreWordByteImm; + return kDecode16LoadStoreWordByteImm[(op0 >> 1) & 0b11]; // 1001xx Load/store (SP-relative) } else if ((op0 >> 2) == 0b1001) { @@ -587,6 +619,8 @@ static TryDecode16 *Try16bit(uint16_t bits) { return nullptr; } + + // op0 op1 op2 op3 op4 op5 // 0 1110 0x 0x0 0 MSR (register) // 0 1110 0x 0x0 1 MSR (Banked register) @@ -705,7 +739,8 @@ bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits) { LOG(ERROR) << inst.Serialize(); return ret; } -} // namespace + +} // namespace aarch32 } // namespace remill diff --git a/lib/Arch/AArch32/Semantics/MEM.cpp b/lib/Arch/AArch32/Semantics/MEM.cpp index bfa1a446c..004368892 100644 --- a/lib/Arch/AArch32/Semantics/MEM.cpp +++ b/lib/Arch/AArch32/Semantics/MEM.cpp @@ -507,3 +507,31 @@ DEF_ISEL(STMIB) = STMDB; DEF_ISEL(LDMIB) = LDM; // DEF_ISEL(LDMe) = LDMe; + +// Thumb2 +namespace { + + +template +DEF_SEM(STR_T2, M dst, R32 src1) { + auto src = TruncTo(Read(src1)); + WriteZExt(dst, src); + + return memory; +} + +template +DEF_SEM(LDR_T2, R32W dst, M src1) { + auto src = Read(src1); + WriteZExt(dst, src); + + return memory; +} +} // namespace + +DEF_ISEL(STR_T2) = STR_T2; +DEF_ISEL(LDR_T2) = LDR_T2; +DEF_ISEL(STRB_T2) = STR_T2; +DEF_ISEL(LDRB_T2) = LDR_T2; + +