Skip to content

Commit b971d4d

Browse files
authored
[BOLT][AArch64] Add symbolizer for AArch64 disassembler. NFCI (#127969)
Add AArch64MCSymbolizer that symbolizes `MCInst` operands during disassembly. The symbolization was previously done in `BinaryFunction::disassemble()`, but it is also required by `scanExternalRefs()` for "lite" mode functionality. Hence, similar to x86, I've implemented the symbolizer interface that uses `BinaryFunction` relocations to properly create instruction operands. I expect the result of the disassembly to be identical after the change. AArch64 disassembler was not calling `tryAddingSymbolicOperand()` for `MOV` instructions. Fix that. Additionally, the disassembler marks `ldr` instructions as branches by setting `IsBranch` parameter to true. Ignore the parameter and rely on `MCPlusBuilder` interface instead. I've modified `--check-encoding` flag to check symolization of operands of instructions that have relocations against them.
1 parent 39402cd commit b971d4d

File tree

6 files changed

+162
-23
lines changed

6 files changed

+162
-23
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,22 @@ Error BinaryFunction::disassemble() {
13331333
BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
13341334
BC.errs() << '\n';
13351335
}
1336+
1337+
// Verify that we've symbolized an operand if the instruction has a
1338+
// relocation against it.
1339+
if (getRelocationInRange(Offset, Offset + Size)) {
1340+
bool HasSymbolicOp = false;
1341+
for (MCOperand &Op : Instruction) {
1342+
if (Op.isExpr()) {
1343+
HasSymbolicOp = true;
1344+
break;
1345+
}
1346+
}
1347+
if (!HasSymbolicOp)
1348+
return createFatalBOLTError(
1349+
"expected symbolized operand for instruction at 0x" +
1350+
Twine::utohexstr(AbsoluteInstrAddr));
1351+
}
13361352
}
13371353

13381354
// Special handling for AVX-512 instructions.
@@ -1440,9 +1456,8 @@ Error BinaryFunction::disassemble() {
14401456
if (BC.isAArch64())
14411457
handleAArch64IndirectCall(Instruction, Offset);
14421458
}
1443-
} else if (BC.isAArch64() || BC.isRISCV()) {
1459+
} else if (BC.isRISCV()) {
14441460
// Check if there's a relocation associated with this instruction.
1445-
bool UsedReloc = false;
14461461
for (auto Itr = Relocations.lower_bound(Offset),
14471462
ItrE = Relocations.lower_bound(Offset + Size);
14481463
Itr != ItrE; ++Itr) {
@@ -1467,24 +1482,6 @@ Error BinaryFunction::disassemble() {
14671482
Relocation.Type);
14681483
(void)Result;
14691484
assert(Result && "cannot replace immediate with relocation");
1470-
1471-
// For aarch64, if we replaced an immediate with a symbol from a
1472-
// relocation, we mark it so we do not try to further process a
1473-
// pc-relative operand. All we need is the symbol.
1474-
UsedReloc = true;
1475-
}
1476-
1477-
if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1478-
if (auto NewE = handleErrors(
1479-
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1480-
[&](const BOLTError &E) -> Error {
1481-
if (E.isFatal())
1482-
return Error(std::make_unique<BOLTError>(std::move(E)));
1483-
if (!E.getMessage().empty())
1484-
E.log(BC.errs());
1485-
return Error::success();
1486-
}))
1487-
return Error(std::move(NewE));
14881485
}
14891486
}
14901487

@@ -1580,8 +1577,9 @@ bool BinaryFunction::scanExternalRefs() {
15801577
assert(FunctionData.size() == getMaxSize() &&
15811578
"function size does not match raw data size");
15821579

1583-
BC.SymbolicDisAsm->setSymbolizer(
1584-
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1580+
if (BC.isX86())
1581+
BC.SymbolicDisAsm->setSymbolizer(
1582+
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
15851583

15861584
// Disassemble contents of the function. Detect code entry points and create
15871585
// relocations for references to code that will be moved.

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "AArch64MCSymbolizer.h"
1314
#include "MCTargetDesc/AArch64AddressingModes.h"
1415
#include "MCTargetDesc/AArch64FixupKinds.h"
1516
#include "MCTargetDesc/AArch64MCExpr.h"
@@ -134,6 +135,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
134135
public:
135136
using MCPlusBuilder::MCPlusBuilder;
136137

138+
std::unique_ptr<MCSymbolizer>
139+
createTargetSymbolizer(BinaryFunction &Function,
140+
bool CreateNewSymbols) const override {
141+
return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
142+
}
143+
137144
MCPhysReg getStackPointer() const override { return AArch64::SP; }
138145
MCPhysReg getFramePointer() const override { return AArch64::FP; }
139146

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "AArch64MCSymbolizer.h"
10+
#include "bolt/Core/BinaryContext.h"
11+
#include "bolt/Core/BinaryFunction.h"
12+
#include "bolt/Core/MCPlusBuilder.h"
13+
#include "bolt/Core/Relocation.h"
14+
#include "llvm/MC/MCInst.h"
15+
#include "llvm/Support/Debug.h"
16+
17+
#define DEBUG_TYPE "bolt-symbolizer"
18+
19+
namespace llvm {
20+
namespace bolt {
21+
22+
AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
23+
24+
bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
25+
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
26+
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
27+
BinaryContext &BC = Function.getBinaryContext();
28+
MCContext *Ctx = BC.Ctx.get();
29+
30+
// NOTE: the callee may incorrectly set IsBranch.
31+
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
32+
return false;
33+
34+
// TODO: add handling for linker "relaxation". At the moment, relocations
35+
// corresponding to "relaxed" instructions are excluded from BinaryFunction
36+
// relocation list.
37+
38+
const uint64_t InstOffset = InstAddress - Function.getAddress();
39+
const Relocation *Relocation = Function.getRelocationAt(InstOffset);
40+
41+
/// Add symbolic operand to the instruction with an optional addend.
42+
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
43+
uint64_t RelType) {
44+
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
45+
if (Addend)
46+
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
47+
*Ctx);
48+
Inst.addOperand(MCOperand::createExpr(
49+
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
50+
};
51+
52+
if (Relocation) {
53+
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
54+
return true;
55+
}
56+
57+
if (!BC.MIB->hasPCRelOperand(Inst))
58+
return false;
59+
60+
Value += InstAddress;
61+
const MCSymbol *TargetSymbol;
62+
uint64_t TargetOffset;
63+
if (!CreateNewSymbols) {
64+
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
65+
TargetSymbol = BD->getSymbol();
66+
TargetOffset = Value - BD->getAddress();
67+
} else {
68+
return false;
69+
}
70+
} else {
71+
std::tie(TargetSymbol, TargetOffset) =
72+
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
73+
}
74+
75+
addOperand(TargetSymbol, TargetOffset, 0);
76+
77+
return true;
78+
}
79+
80+
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
81+
int64_t Value,
82+
uint64_t Address) {}
83+
84+
} // namespace bolt
85+
} // namespace llvm
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
10+
#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
11+
12+
#include "bolt/Core/BinaryFunction.h"
13+
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
14+
15+
namespace llvm {
16+
namespace bolt {
17+
18+
class AArch64MCSymbolizer : public MCSymbolizer {
19+
protected:
20+
BinaryFunction &Function;
21+
bool CreateNewSymbols{true};
22+
23+
public:
24+
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
25+
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
26+
Function(Function), CreateNewSymbols(CreateNewSymbols) {}
27+
28+
AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
29+
AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
30+
virtual ~AArch64MCSymbolizer();
31+
32+
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
33+
int64_t Value, uint64_t Address, bool IsBranch,
34+
uint64_t Offset, uint64_t OpSize,
35+
uint64_t InstSize) override;
36+
37+
void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
38+
uint64_t Address) override;
39+
};
40+
41+
} // namespace bolt
42+
} // namespace llvm
43+
44+
#endif

bolt/lib/Target/AArch64/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(LLVM_LINK_COMPONENTS
22
MC
3+
MCDisassembler
34
Support
45
AArch64Desc
56
)
@@ -18,6 +19,7 @@ endif()
1819

1920
add_llvm_library(LLVMBOLTTargetAArch64
2021
AArch64MCPlusBuilder.cpp
22+
AArch64MCSymbolizer.cpp
2123

2224
NO_EXPORT
2325
DISABLE_LLVM_LINK_LLVM_DYLIB

llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
744744
Inst.getOpcode() == AArch64::MOVKXi)
745745
Inst.addOperand(Inst.getOperand(0));
746746

747-
Inst.addOperand(MCOperand::createImm(imm));
747+
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
748+
0, 4))
749+
Inst.addOperand(MCOperand::createImm(imm));
750+
748751
Inst.addOperand(MCOperand::createImm(shift));
749752
return Success;
750753
}

0 commit comments

Comments
 (0)