diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 94fe4aa8aa0e5..8bec1db70e25a 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -1435,6 +1435,17 @@ class BinaryContext { bool PrintRelocations = false, StringRef Endl = "\n") const; + /// Print data when embedded in the instruction stream keeping the format + /// similar to printInstruction(). + void printData(raw_ostream &OS, ArrayRef Data, + uint64_t Offset) const; + + /// Extract data from the binary corresponding to [Address, Address + Size) + /// range. Return an empty ArrayRef if the address range does not belong to + /// any section in the binary, crosses a section boundary, or falls into a + /// virtual section. + ArrayRef extractData(uint64_t Address, uint64_t Size) const; + /// Print a range of instructions. template uint64_t diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index e8b2757f7db21..942840a7621fd 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -2060,6 +2060,11 @@ class BinaryFunction { return Islands ? Islands->getAlignment() : 1; } + /// If there is a constant island in the range [StartOffset, EndOffset), + /// return its address. + std::optional getIslandInRange(uint64_t StartOffset, + uint64_t EndOffset) const; + uint64_t estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const { if (!Islands) diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index f5e11358daaa3..1327cbc6a62ba 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1942,6 +1942,43 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, OS << " discriminator:" << Row.Discriminator; } +ArrayRef BinaryContext::extractData(uint64_t Address, + uint64_t Size) const { + ArrayRef Res; + + const ErrorOr Section = getSectionForAddress(Address); + if (!Section || Section->isVirtual()) + return Res; + + if (!Section->containsRange(Address, Size)) + return Res; + + auto *Bytes = + reinterpret_cast(Section->getContents().data()); + return ArrayRef(Bytes + Address - Section->getAddress(), Size); +} + +void BinaryContext::printData(raw_ostream &OS, ArrayRef Data, + uint64_t Offset) const { + DataExtractor DE(Data, AsmInfo->isLittleEndian(), + AsmInfo->getCodePointerSize()); + uint64_t DataOffset = 0; + while (DataOffset + 4 <= Data.size()) { + OS << format(" %08" PRIx64 ": \t.word\t0x", Offset + DataOffset); + const auto Word = DE.getUnsigned(&DataOffset, 4); + OS << Twine::utohexstr(Word) << '\n'; + } + if (DataOffset + 2 <= Data.size()) { + OS << format(" %08" PRIx64 ": \t.short\t0x", Offset + DataOffset); + const auto Short = DE.getUnsigned(&DataOffset, 2); + OS << Twine::utohexstr(Short) << '\n'; + } + if (DataOffset + 1 == Data.size()) { + OS << format(" %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset, + Data[DataOffset]); + } +} + void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, uint64_t Offset, const BinaryFunction *Function, diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 88562a60dd50d..317ade95c51f3 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -491,11 +491,27 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { // Offset of the instruction in function. uint64_t Offset = 0; + auto printConstantIslandInRange = [&](uint64_t Offset, uint64_t Size) { + std::optional IslandOffset = + getIslandInRange(Offset, Offset + Size); + + if (!IslandOffset) + return; + + const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset); + BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize), + *IslandOffset); + }; + if (BasicBlocks.empty() && !Instructions.empty()) { // Print before CFG was built. + uint64_t PrevOffset = 0; for (const std::pair &II : Instructions) { Offset = II.first; + // Print any constant islands inbeetween the instructions. + printConstantIslandInRange(PrevOffset, Offset); + // Print label if exists at this offset. auto LI = Labels.find(Offset); if (LI != Labels.end()) { @@ -506,7 +522,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { } BC.printInstruction(OS, II.second, Offset, this); + + PrevOffset = Offset; } + + // Print any data at the end of the function. + printConstantIslandInRange(PrevOffset, getMaxSize()); } StringRef SplitPointMsg = ""; @@ -1048,6 +1069,19 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const { return getSize() - Offset; } +std::optional +BinaryFunction::getIslandInRange(uint64_t StartOffset, + uint64_t EndOffset) const { + if (!Islands) + return std::nullopt; + + auto Iter = llvm::lower_bound(Islands->DataOffsets, StartOffset); + if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset) + return *Iter; + + return std::nullopt; +} + bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const { ArrayRef FunctionData = *getData(); uint64_t EndOfCode = getSize(); diff --git a/bolt/test/AArch64/data-in-code.s b/bolt/test/AArch64/data-in-code.s new file mode 100644 index 0000000000000..8d3179a0c3350 --- /dev/null +++ b/bolt/test/AArch64/data-in-code.s @@ -0,0 +1,31 @@ +## Check that llvm-bolt prints data embedded in code. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \ +# RUN: -fuse-ld=lld -Wl,-q + +## Check disassembly of BOLT input. +# RUN: llvm-objdump %t.exe -d | FileCheck %s + +# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s + +.text +.balign 4 + +.global _start +.type _start, %function +_start: + mov x0, #0x0 + .word 0x4f82e010 + ret + .byte 0x0, 0xff, 0x42 +# CHECK-LABEL: _start +# CHECK: mov x0, #0x0 +# CHECK-NEXT: .word 0x4f82e010 +# CHECK-NEXT: ret +# CHECK-NEXT: .short 0xff00 +# CHECK-NEXT: .byte 0x42 +.size _start, .-_start + +## Force relocation mode. + .reloc 0, R_AARCH64_NONE