Skip to content

Commit 74cec63

Browse files
committed
[BOLT] Add symbolizer for AArch64 disassembler. NFCI
Add AArch64MCSymbolizer that symbolizes MCInst operands during disassembly. The symbolization was previously done in `BinaryFunction::disassemble()`, but it is also required by `scanExternalRefs()` for "lite" mode functionality. Hence, similar to x86, I've implemented the symbolizer interface that uses BinaryFunction relocations to properly create instruction operands. I expect the result of the disassembly to be identical after the change. So far, the only quirk of AArch64 disassembler that I found is that it marks `ldr` instructions as branch by setting `IsBranch` parameter to true. Ignore the parameter and rely on `MCPlusBuilder` interface instead.
1 parent 92ddbbd commit 74cec63

File tree

5 files changed

+139
-20
lines changed

5 files changed

+139
-20
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,9 +1434,8 @@ Error BinaryFunction::disassemble() {
14341434
if (BC.isAArch64())
14351435
handleAArch64IndirectCall(Instruction, Offset);
14361436
}
1437-
} else if (BC.isAArch64() || BC.isRISCV()) {
1437+
} else if (BC.isRISCV()) {
14381438
// Check if there's a relocation associated with this instruction.
1439-
bool UsedReloc = false;
14401439
for (auto Itr = Relocations.lower_bound(Offset),
14411440
ItrE = Relocations.lower_bound(Offset + Size);
14421441
Itr != ItrE; ++Itr) {
@@ -1461,24 +1460,6 @@ Error BinaryFunction::disassemble() {
14611460
Relocation.Type);
14621461
(void)Result;
14631462
assert(Result && "cannot replace immediate with relocation");
1464-
1465-
// For aarch64, if we replaced an immediate with a symbol from a
1466-
// relocation, we mark it so we do not try to further process a
1467-
// pc-relative operand. All we need is the symbol.
1468-
UsedReloc = true;
1469-
}
1470-
1471-
if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1472-
if (auto NewE = handleErrors(
1473-
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1474-
[&](const BOLTError &E) -> Error {
1475-
if (E.isFatal())
1476-
return Error(std::make_unique<BOLTError>(std::move(E)));
1477-
if (!E.getMessage().empty())
1478-
E.log(BC.errs());
1479-
return Error::success();
1480-
}))
1481-
return Error(std::move(NewE));
14821463
}
14831464
}
14841465

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "AArch64MCSymbolizer.h"
1314
#include "MCTargetDesc/AArch64AddressingModes.h"
1415
#include "MCTargetDesc/AArch64FixupKinds.h"
1516
#include "MCTargetDesc/AArch64MCExpr.h"
@@ -133,6 +134,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
133134
public:
134135
using MCPlusBuilder::MCPlusBuilder;
135136

137+
std::unique_ptr<MCSymbolizer>
138+
createTargetSymbolizer(BinaryFunction &Function,
139+
bool CreateNewSymbols) const override {
140+
return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
141+
}
142+
136143
MCPhysReg getStackPointer() const override { return AArch64::SP; }
137144
MCPhysReg getFramePointer() const override { return AArch64::FP; }
138145

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "AArch64MCSymbolizer.h"
10+
#include "bolt/Core/BinaryContext.h"
11+
#include "bolt/Core/BinaryFunction.h"
12+
#include "bolt/Core/MCPlusBuilder.h"
13+
#include "bolt/Core/Relocation.h"
14+
#include "llvm/MC/MCInst.h"
15+
#include "llvm/MC/MCRegisterInfo.h"
16+
#include "llvm/Support/Debug.h"
17+
18+
#define DEBUG_TYPE "bolt-symbolizer"
19+
20+
namespace llvm {
21+
namespace bolt {
22+
23+
AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
24+
25+
bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
26+
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
27+
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
28+
BinaryContext &BC = Function.getBinaryContext();
29+
MCContext *Ctx = BC.Ctx.get();
30+
31+
// NOTE: the callee may incorrectly set IsBranch.
32+
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
33+
return false;
34+
35+
// TODO: add handling for linker "relaxation". At the moment, relocations
36+
// corresponding to "relaxed" instructions are excluded from BinaryFunction
37+
// relocation list.
38+
39+
const uint64_t InstOffset = InstAddress - Function.getAddress();
40+
const Relocation *Relocation = Function.getRelocationAt(InstOffset);
41+
42+
/// Add symbolic operand to the instruction with an optional addend.
43+
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
44+
uint64_t RelType) {
45+
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
46+
if (Addend)
47+
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
48+
*Ctx);
49+
Inst.addOperand(MCOperand::createExpr(
50+
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
51+
};
52+
53+
if (Relocation) {
54+
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
55+
return true;
56+
}
57+
58+
if (!BC.MIB->hasPCRelOperand(Inst))
59+
return false;
60+
61+
Value += InstAddress;
62+
const MCSymbol *TargetSymbol;
63+
uint64_t TargetOffset;
64+
if (!CreateNewSymbols) {
65+
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
66+
TargetSymbol = BD->getSymbol();
67+
TargetOffset = Value - BD->getAddress();
68+
} else {
69+
return false;
70+
}
71+
} else {
72+
std::tie(TargetSymbol, TargetOffset) =
73+
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
74+
}
75+
76+
addOperand(TargetSymbol, TargetOffset, 0);
77+
78+
return true;
79+
}
80+
81+
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
82+
int64_t Value,
83+
uint64_t Address) {}
84+
85+
} // namespace bolt
86+
} // namespace llvm
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
10+
#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
11+
12+
#include "bolt/Core/BinaryFunction.h"
13+
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
14+
15+
namespace llvm {
16+
namespace bolt {
17+
18+
class AArch64MCSymbolizer : public MCSymbolizer {
19+
protected:
20+
BinaryFunction &Function;
21+
bool CreateNewSymbols{true};
22+
23+
public:
24+
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
25+
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
26+
Function(Function), CreateNewSymbols(CreateNewSymbols) {}
27+
28+
AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
29+
AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
30+
virtual ~AArch64MCSymbolizer();
31+
32+
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
33+
int64_t Value, uint64_t Address, bool IsBranch,
34+
uint64_t Offset, uint64_t OpSize,
35+
uint64_t InstSize) override;
36+
37+
void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
38+
uint64_t Address) override;
39+
};
40+
41+
} // namespace bolt
42+
} // namespace llvm
43+
44+
#endif

bolt/lib/Target/AArch64/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ endif()
1818

1919
add_llvm_library(LLVMBOLTTargetAArch64
2020
AArch64MCPlusBuilder.cpp
21+
AArch64MCSymbolizer.cpp
2122

2223
NO_EXPORT
2324
DISABLE_LLVM_LINK_LLVM_DYLIB

0 commit comments

Comments
 (0)