Skip to content

Commit acb26d6

Browse files
authored
adds support for big-endian arm and thumb architectures (#1309)
The big endian thumb and arm targets are [not supported by LLVM][1] and while [the fix][2] is available for many years it is still not reviewed. I tried to [raise the attention][3] to it but with no success. Since it looks like that they are not going to fix it we need to fix it on our side. The solution is a little bit hacky but we can keep it until we switch to Ghidra for disassembling/lifting. Examples, ``` $ bap mc --arch=thumb --show-insn=asm --show-bil -- 1a 42 tst r2, r3 { #1 := R2 & R3 ZF := #1 = 0 NF := extract:31:31[#1] } $ bap mc --arch=thumb --order=big --show-insn=asm --show-bil -- 42 1a tst r2, r3 { #1 := R2 & R3 ZF := #1 = 0 NF := extract:31:31[#1] } $ file echo echo: ELF 32-bit MSB executable, ARM, version 1 (ARM), dynamically linked, interpreter /lib/ld-uClibc.so.0, for GNU/Linux 2.0.0, stripped $ bap echo -dasm | grep 8c90 -A4 8c90: <main> 8c90: 8c90: e9 2d 47 f0 push {r4, r5, r6, r7, r8, r9, r10, lr} 8c94: e5 9f 33 34 ldr r3, [pc, #0x334] 8c98: e1 a0 70 01 mov r7, r1 8c9c: e1 a0 60 00 mov r6, r0 8ca0: e3 a0 00 06 mov r0, #6 $ objdump echo -d | grep 8c90 -A4 00008c90 <main@@base>: 8c90: e92d47f0 push {r4, r5, r6, r7, r8, r9, sl, lr} 8c94: e59f3334 ldr r3, [pc, #820] ; 8fd0 <main@@base+0x340> 8c98: e1a07001 mov r7, r1 8c9c: e1a06000 mov r6, r0 8ca0: e3a00006 mov r0, #6 ``` fixes #1299 [1]: https://bugs.llvm.org/show_bug.cgi?id=38721 [2]: https://reviews.llvm.org/D48811 [3]: https://twitter.com/ivg_t/status/1384932479967055877?s=20
1 parent 2f38fb4 commit acb26d6

File tree

7 files changed

+95
-19
lines changed

7 files changed

+95
-19
lines changed

lib/arm/arm_target.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,9 @@ let llvm_a64 = CT.Language.declare ~package "llvm-aarch64"
388388
module Dis = Disasm_expert.Basic
389389

390390
let register ?attrs encoding triple =
391-
Dis.register encoding @@ fun _ ->
391+
Dis.register encoding @@ fun t ->
392+
let triple = if Theory.Endianness.(eb = Theory.Target.endianness t)
393+
then triple ^ "eb" else triple in
392394
Dis.create ?attrs ~backend:"llvm" triple
393395

394396
let symbol_values doc =

lib/bap_llvm/llvm_disasm.cpp

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,22 +55,85 @@ bool ends_with(const std::string& str, const std::string &suffix) {
5555

5656

5757
class MemoryObject {
58-
memory mem;
58+
const char *data;
59+
uint64_t base;
60+
uint64_t size;
61+
uint64_t offset;
5962
public:
60-
MemoryObject(memory mem) : mem(mem) {}
63+
MemoryObject() :
64+
data(NULL),
65+
base(0),
66+
size(0),
67+
offset(0)
68+
{}
69+
explicit MemoryObject(const memory &mem) {
70+
set_memory(mem);
71+
}
72+
73+
virtual ~MemoryObject() {}
6174

6275
uint64_t getBase() const {
63-
return mem.base;
76+
return base;
77+
}
78+
79+
uint64_t getExtent() const {
80+
return size;
81+
}
82+
83+
virtual llvm::ArrayRef<uint8_t> view(uint64_t pc) {
84+
int off = pc - base;
85+
int len = size - off;
86+
return llvm::ArrayRef<uint8_t>((const uint8_t*)&data[offset+off], len);
87+
}
88+
89+
void set_memory(const memory &m) {
90+
data = m.data;
91+
base = m.base;
92+
size = m.loc.len;
93+
offset = m.loc.off;
6494
}
95+
};
96+
97+
// at any time provides a view on the first two bytes
98+
// of the memory in a reversed order.
99+
// Necessary to fix #1299 until it is resolved in upstream
100+
// (see https://reviews.llvm.org/D48811 for the upstream fix).
65101

66-
uint64_t getExtent() {
67-
return mem.loc.len;
102+
class ReversingMemoryObject2 : public MemoryObject {
103+
uint8_t buf[4];
104+
public:
105+
llvm::ArrayRef<uint8_t> view(uint64_t pc) {
106+
auto data = MemoryObject::view(pc);
107+
int size = data.size();
108+
if (size > 1) {
109+
buf[0] = data[1];
110+
buf[1] = data[0];
111+
}
112+
if (size > 3) {
113+
buf[2] = data[3];
114+
buf[3] = data[2];
115+
}
116+
return llvm::ArrayRef<uint8_t>(&buf[0],std::min(4,size));
68117
}
118+
};
69119

120+
// at any time provides a view on the first four bytes
121+
// of the memory in a reversed order.
122+
class ReversingMemoryObject4 : public MemoryObject {
123+
uint8_t buf[4];
124+
public:
70125
llvm::ArrayRef<uint8_t> view(uint64_t pc) {
71-
int off = pc - this->getBase();
72-
int len = this->getExtent() - off;
73-
return llvm::ArrayRef<uint8_t>((const uint8_t*)&mem.data[mem.loc.off+off], len);
126+
auto data = MemoryObject::view(pc);
127+
int size = data.size();
128+
if (size > 3) {
129+
buf[0] = data[3];
130+
buf[1] = data[2];
131+
}
132+
if (size > 1) {
133+
buf[2] = data[1];
134+
buf[3] = data[0];
135+
}
136+
return llvm::ArrayRef<uint8_t>(&buf[0],std::min(4,size));
74137
}
75138
};
76139

@@ -256,6 +319,18 @@ class llvm_disassembler : public disassembler_interface {
256319
self->ins_tab = self->create_table(ins_info->getNumOpcodes(), ins_info);
257320
self->reg_tab = self->create_table(reg_info->getNumRegs(), reg_info);
258321
self->init_prefixes();
322+
323+
switch (t.getArch()) {
324+
case llvm::Triple::armeb:
325+
self->mem.reset(new ReversingMemoryObject4());
326+
break;
327+
case llvm::Triple::thumbeb:
328+
self->mem.reset(new ReversingMemoryObject2());
329+
break;
330+
default:
331+
self->mem.reset(new MemoryObject());
332+
};
333+
259334
return {self, {0} };
260335
}
261336

@@ -268,9 +343,8 @@ class llvm_disassembler : public disassembler_interface {
268343
return reg_tab;
269344
}
270345

271-
//! this member function will not be needed anymore
272346
void set_memory(memory m) {
273-
mem.reset(new MemoryObject(m));
347+
mem->set_memory(m);
274348
}
275349

276350
bool is_prefix() const {

oasis/arm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Library arm_plugin
3737
Path: plugins/arm
3838
FindlibName: bap-plugin-arm
3939
BuildDepends: bap, bap-core-theory, bap-abi, bap-arm, bap-c,
40-
core_kernel, bap-main, bap-api, monads
40+
core_kernel, bap-main, bap-api, monads, ppx_bap
4141
InternalModules: Arm_main, Arm_gnueabi
4242
DataFiles: semantics/*.lisp ($datadir/bap/primus/semantics)
4343
XMETADescription: provide ARM lifter

plugins/arm/arm_main.ml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@ let interworking =
1616
~doc:"Enable ARM/Thumb interworking. Defaults to (auto),
1717
i.e., to the automatic detection of interworking"
1818

19+
type arms = [
20+
| Arch.arm
21+
| Arch.armeb
22+
] [@@deriving enumerate]
1923

2024
let () = Bap_main.Extension.declare ~doc @@ fun ctxt ->
2125
let interworking = ctxt-->interworking in
2226
Arm_target.load ?interworking ();
23-
List.iter Arch.all_of_arm ~f:(fun arch ->
27+
List.iter all_of_arms ~f:(fun arch ->
2428
register_target (arch :> arch) (module ARM);
2529
Arm_gnueabi.setup ());
2630
Ok ()

plugins/arm/semantics/arm-bits.lisp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
(defpackage arm (:use core target))
2-
(declare (context (target armv4+le)))
2+
(declare (context (target arm)))
33

44
(in-package arm)
55

plugins/arm/semantics/thumb.lisp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
(require bits)
22
(require arm-bits)
33

4-
(declare (context (target armv4+le)))
4+
(declare (context (target arm)))
55

66
(defpackage thumb (:use core target arm))
77
(defpackage llvm-thumb (:use thumb))

plugins/mc/mc_main.ml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ type error =
9090
| Bad_insn of mem * int * int
9191
| Create_mem of Error.t
9292
| No_input
93-
| Unknown_arch
9493
| Invalid_base of string
9594
| Trailing_data of int
9695
| Inconsistency of KB.conflict
@@ -651,9 +650,6 @@ let string_of_failure = function
651650
KB.Conflict.pp conflict
652651
| Bad_user_input -> "Could not parse: malformed input"
653652
| No_input -> "No input was received"
654-
| Unknown_arch ->
655-
sprintf "Unknown architecture. Supported architectures:\n%s" @@
656-
String.concat ~sep:"\n" @@ List.map Arch.all ~f:Arch.to_string
657653
| Trailing_data 1 -> "the last byte wasn't disassembled"
658654
| Trailing_data left ->
659655
sprintf "%d bytes were left non disassembled" left

0 commit comments

Comments
 (0)