From 1853e089fea6522dae8caf08799bbc78e3821830 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 24 Jul 2025 11:49:53 -0700 Subject: [PATCH] [BOLT][AArch64] Compensate for missing code markers Code written in assembly can have missing code markers. In BOLT, we can compensate by recognizing that a function entry point should start a code sequence. Seen such code in lua jit library. --- bolt/lib/Rewrite/RewriteInstance.cpp | 14 +++++++++++++ bolt/test/AArch64/missing-code-marker.s | 26 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 bolt/test/AArch64/missing-code-marker.s diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 9f243a1366928..fe4a23cc01382 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -896,6 +896,20 @@ void RewriteInstance::discoverFileObjects() { continue; MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol); + + // Treat ST_Function as code. + Expected TypeOrError = SymInfo.Symbol.getType(); + consumeError(TypeOrError.takeError()); + if (TypeOrError && *TypeOrError == SymbolRef::ST_Function) { + if (IsData) { + Expected NameOrError = SymInfo.Symbol.getName(); + consumeError(NameOrError.takeError()); + BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError + << " lacks code marker\n"; + } + MarkerType = MarkerSymType::CODE; + } + if (MarkerType != MarkerSymType::NONE) { SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType}); LastAddr = SymInfo.Address; diff --git a/bolt/test/AArch64/missing-code-marker.s b/bolt/test/AArch64/missing-code-marker.s new file mode 100644 index 0000000000000..591c9abd34c23 --- /dev/null +++ b/bolt/test/AArch64/missing-code-marker.s @@ -0,0 +1,26 @@ +## Check that llvm-bolt is able to recover a missing code marker. + +# RUN: %clang %cflags %s -o %t.exe -nostdlib -fuse-ld=lld -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s + +# CHECK: BOLT-WARNING: function symbol foo lacks code marker + +.text +.balign 4 + +.word 0 + +## Function foo starts immediately after a data object and does not have +## a matching "$x" symbol to indicate the start of code. +.global foo +.type foo, %function +foo: + .word 0xd65f03c0 +.size foo, .-foo + +.global _start +.type _start, %function +_start: + bl foo + ret +.size _start, .-_start