diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index bc45caf3ec8b7..0b52fb434c60d 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1327,6 +1327,22 @@ Error BinaryFunction::disassemble() { BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr); BC.errs() << '\n'; } + + // Verify that we've symbolized an operand if the instruction has a + // relocation against it. + if (getRelocationInRange(Offset, Offset + Size)) { + bool HasSymbolicOp = false; + for (MCOperand &Op : Instruction) { + if (Op.isExpr()) { + HasSymbolicOp = true; + break; + } + } + if (!HasSymbolicOp) + return createFatalBOLTError( + "expected symbolized operand for instruction at 0x" + + Twine::utohexstr(AbsoluteInstrAddr)); + } } // Special handling for AVX-512 instructions. @@ -1434,9 +1450,8 @@ Error BinaryFunction::disassemble() { if (BC.isAArch64()) handleAArch64IndirectCall(Instruction, Offset); } - } else if (BC.isAArch64() || BC.isRISCV()) { + } else if (BC.isRISCV()) { // Check if there's a relocation associated with this instruction. - bool UsedReloc = false; for (auto Itr = Relocations.lower_bound(Offset), ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) { @@ -1461,24 +1476,6 @@ Error BinaryFunction::disassemble() { Relocation.Type); (void)Result; assert(Result && "cannot replace immediate with relocation"); - - // For aarch64, if we replaced an immediate with a symbol from a - // relocation, we mark it so we do not try to further process a - // pc-relative operand. All we need is the symbol. - UsedReloc = true; - } - - if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) { - if (auto NewE = handleErrors( - handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size), - [&](const BOLTError &E) -> Error { - if (E.isFatal()) - return Error(std::make_unique(std::move(E))); - if (!E.getMessage().empty()) - E.log(BC.errs()); - return Error::success(); - })) - return Error(std::move(NewE)); } } @@ -1578,8 +1575,9 @@ bool BinaryFunction::scanExternalRefs() { assert(FunctionData.size() == getMaxSize() && "function size does not match raw data size"); - BC.SymbolicDisAsm->setSymbolizer( - BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false)); + if (BC.isX86()) + BC.SymbolicDisAsm->setSymbolizer( + BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false)); // Disassemble contents of the function. Detect code entry points and create // relocations for references to code that will be moved. diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 4b21ff719b3ab..60efa25c54ffa 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64MCSymbolizer.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" @@ -133,6 +134,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { public: using MCPlusBuilder::MCPlusBuilder; + std::unique_ptr + createTargetSymbolizer(BinaryFunction &Function, + bool CreateNewSymbols) const override { + return std::make_unique(Function, CreateNewSymbols); + } + MCPhysReg getStackPointer() const override { return AArch64::SP; } MCPhysReg getFramePointer() const override { return AArch64::FP; } diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp new file mode 100644 index 0000000000000..7145e77a1edbb --- /dev/null +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp @@ -0,0 +1,85 @@ +//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCSymbolizer.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/MCPlusBuilder.h" +#include "bolt/Core/Relocation.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "bolt-symbolizer" + +namespace llvm { +namespace bolt { + +AArch64MCSymbolizer::~AArch64MCSymbolizer() {} + +bool AArch64MCSymbolizer::tryAddingSymbolicOperand( + MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress, + bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) { + BinaryContext &BC = Function.getBinaryContext(); + MCContext *Ctx = BC.Ctx.get(); + + // NOTE: the callee may incorrectly set IsBranch. + if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) + return false; + + // TODO: add handling for linker "relaxation". At the moment, relocations + // corresponding to "relaxed" instructions are excluded from BinaryFunction + // relocation list. + + const uint64_t InstOffset = InstAddress - Function.getAddress(); + const Relocation *Relocation = Function.getRelocationAt(InstOffset); + + /// Add symbolic operand to the instruction with an optional addend. + auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend, + uint64_t RelType) { + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx); + if (Addend) + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx), + *Ctx); + Inst.addOperand(MCOperand::createExpr( + BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType))); + }; + + if (Relocation) { + addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type); + return true; + } + + if (!BC.MIB->hasPCRelOperand(Inst)) + return false; + + Value += InstAddress; + const MCSymbol *TargetSymbol; + uint64_t TargetOffset; + if (!CreateNewSymbols) { + if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) { + TargetSymbol = BD->getSymbol(); + TargetOffset = Value - BD->getAddress(); + } else { + return false; + } + } else { + std::tie(TargetSymbol, TargetOffset) = + BC.handleAddressRef(Value, Function, /*IsPCRel*/ true); + } + + addOperand(TargetSymbol, TargetOffset, 0); + + return true; +} + +void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream, + int64_t Value, + uint64_t Address) {} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h new file mode 100644 index 0000000000000..56ba4fbcaf275 --- /dev/null +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h @@ -0,0 +1,44 @@ +//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H +#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H + +#include "bolt/Core/BinaryFunction.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" + +namespace llvm { +namespace bolt { + +class AArch64MCSymbolizer : public MCSymbolizer { +protected: + BinaryFunction &Function; + bool CreateNewSymbols{true}; + +public: + AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true) + : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr), + Function(Function), CreateNewSymbols(CreateNewSymbols) {} + + AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete; + AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete; + virtual ~AArch64MCSymbolizer(); + + bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; + + void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value, + uint64_t Address) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt index 8435ea7245e7e..cb38117de659e 100644 --- a/bolt/lib/Target/AArch64/CMakeLists.txt +++ b/bolt/lib/Target/AArch64/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS MC + MCDisassembler Support AArch64Desc ) @@ -18,6 +19,7 @@ endif() add_llvm_library(LLVMBOLTTargetAArch64 AArch64MCPlusBuilder.cpp + AArch64MCSymbolizer.cpp NO_EXPORT DISABLE_LLVM_LINK_LLVM_DYLIB diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 8b1c16d319b2c..f78e6926f2c7d 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn, Inst.getOpcode() == AArch64::MOVKXi) Inst.addOperand(Inst.getOperand(0)); - Inst.addOperand(MCOperand::createImm(imm)); + if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0, + 0, 4)) + Inst.addOperand(MCOperand::createImm(imm)); + Inst.addOperand(MCOperand::createImm(shift)); return Success; }