Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 20 additions & 22 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,22 @@ Error BinaryFunction::disassemble() {
BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
BC.errs() << '\n';
}

// Verify that we've symbolized an operand if the instruction has a
// relocation against it.
if (getRelocationInRange(Offset, Offset + Size)) {
bool HasSymbolicOp = false;
for (MCOperand &Op : Instruction) {
if (Op.isExpr()) {
HasSymbolicOp = true;
break;
}
}
if (!HasSymbolicOp)
return createFatalBOLTError(
"expected symbolized operand for instruction at 0x" +
Twine::utohexstr(AbsoluteInstrAddr));
}
}

// Special handling for AVX-512 instructions.
Expand Down Expand Up @@ -1434,9 +1450,8 @@ Error BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
} else if (BC.isAArch64() || BC.isRISCV()) {
} else if (BC.isRISCV()) {
// Check if there's a relocation associated with this instruction.
bool UsedReloc = false;
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
Expand All @@ -1461,24 +1476,6 @@ Error BinaryFunction::disassemble() {
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");

// For aarch64, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
UsedReloc = true;
}

if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
if (auto NewE = handleErrors(
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
[&](const BOLTError &E) -> Error {
if (E.isFatal())
return Error(std::make_unique<BOLTError>(std::move(E)));
if (!E.getMessage().empty())
E.log(BC.errs());
return Error::success();
}))
return Error(std::move(NewE));
}
}

Expand Down Expand Up @@ -1578,8 +1575,9 @@ bool BinaryFunction::scanExternalRefs() {
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");

BC.SymbolicDisAsm->setSymbolizer(
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
if (BC.isX86())
BC.SymbolicDisAsm->setSymbolizer(
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));

// Disassemble contents of the function. Detect code entry points and create
// relocations for references to code that will be moved.
Expand Down
7 changes: 7 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

#include "AArch64MCSymbolizer.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
Expand Down Expand Up @@ -133,6 +134,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;

std::unique_ptr<MCSymbolizer>
createTargetSymbolizer(BinaryFunction &Function,
bool CreateNewSymbols) const override {
return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
}

MCPhysReg getStackPointer() const override { return AArch64::SP; }
MCPhysReg getFramePointer() const override { return AArch64::FP; }

Expand Down
85 changes: 85 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "AArch64MCSymbolizer.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/Relocation.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "bolt-symbolizer"

namespace llvm {
namespace bolt {

AArch64MCSymbolizer::~AArch64MCSymbolizer() {}

bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
BinaryContext &BC = Function.getBinaryContext();
MCContext *Ctx = BC.Ctx.get();

// NOTE: the callee may incorrectly set IsBranch.
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
return false;

// TODO: add handling for linker "relaxation". At the moment, relocations
// corresponding to "relaxed" instructions are excluded from BinaryFunction
// relocation list.

const uint64_t InstOffset = InstAddress - Function.getAddress();
const Relocation *Relocation = Function.getRelocationAt(InstOffset);

/// Add symbolic operand to the instruction with an optional addend.
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
uint64_t RelType) {
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
if (Addend)
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
*Ctx);
Inst.addOperand(MCOperand::createExpr(
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
};

if (Relocation) {
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
return true;
}

if (!BC.MIB->hasPCRelOperand(Inst))
return false;

Value += InstAddress;
const MCSymbol *TargetSymbol;
uint64_t TargetOffset;
if (!CreateNewSymbols) {
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
TargetSymbol = BD->getSymbol();
TargetOffset = Value - BD->getAddress();
} else {
return false;
}
} else {
std::tie(TargetSymbol, TargetOffset) =
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
}

addOperand(TargetSymbol, TargetOffset, 0);

return true;
}

void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}

} // namespace bolt
} // namespace llvm
44 changes: 44 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H

#include "bolt/Core/BinaryFunction.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"

namespace llvm {
namespace bolt {

class AArch64MCSymbolizer : public MCSymbolizer {
protected:
BinaryFunction &Function;
bool CreateNewSymbols{true};

public:
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
Function(Function), CreateNewSymbols(CreateNewSymbols) {}

AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
virtual ~AArch64MCSymbolizer();

bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
int64_t Value, uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) override;

void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
uint64_t Address) override;
};

} // namespace bolt
} // namespace llvm

#endif
2 changes: 2 additions & 0 deletions bolt/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(LLVM_LINK_COMPONENTS
MC
MCDisassembler
Support
AArch64Desc
)
Expand All @@ -18,6 +19,7 @@ endif()

Copy link
Member

@paschalis-mpeis paschalis-mpeis Feb 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On AArch64 I get linking errors. I believe on LLVM_LINK_COMPONENTS (line 2) we also need MCDisassembler.

add_llvm_library(LLVMBOLTTargetAArch64
AArch64MCPlusBuilder.cpp
AArch64MCSymbolizer.cpp

NO_EXPORT
DISABLE_LLVM_LINK_LLVM_DYLIB
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
Inst.getOpcode() == AArch64::MOVKXi)
Inst.addOperand(Inst.getOperand(0));

Inst.addOperand(MCOperand::createImm(imm));
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
0, 4))
Inst.addOperand(MCOperand::createImm(imm));

Inst.addOperand(MCOperand::createImm(shift));
return Success;
}
Expand Down