Skip to content

Conversation

@maksfb
Copy link
Contributor

@maksfb maksfb commented Jan 25, 2025

The code for jump table detection on AArch64 asserts liberally whenever the input instruction sequence does not match the expected pattern. As a result, BOLT fails to process binaries with such sequences instead of ignoring functions with unknown control flow.

Remove asserts in analyzeIndirectBranchFragment() and mark indirect jumps as instructions with unknown control flow instead.

The code for jump table detection on AArch64 asserts liberally whenever
the input instruction sequence does not match the expected pattern. As a
result, BOLT fails to process binaries with such sequences instead of
ignoring functions with unknown control flow.

Remove asserts in analyzeIndirectBranchFragment() and mark indirect
jumps as instructions with unknown control flow instead.
@llvmbot
Copy link
Member

llvmbot commented Jan 25, 2025

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

Changes

The code for jump table detection on AArch64 asserts liberally whenever the input instruction sequence does not match the expected pattern. As a result, BOLT fails to process binaries with such sequences instead of ignoring functions with unknown control flow.

Remove asserts in analyzeIndirectBranchFragment() and mark indirect jumps as instructions with unknown control flow instead.


Full diff: https://github.com/llvm/llvm-project/pull/124372.diff

3 Files Affected:

  • (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+24-21)
  • (added) bolt/test/AArch64/jump-table-heuristic-fail.s (+29)
  • (modified) bolt/test/AArch64/test-indirect-branch.s (+5-4)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index ac709c5dd063ac..c1055311141322 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -834,6 +834,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   ///                                   #  of this BB)
   ///   br      x0                      # Indirect jump instruction
   ///
+  /// Return true on successful jump table instruction sequence match, false
+  /// otherwise.
   bool analyzeIndirectBranchFragment(
       const MCInst &Inst,
       DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain,
@@ -842,6 +844,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     // Expect AArch64 BR
     assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode");
 
+    JumpTable = nullptr;
+
     // Match the indirect branch pattern for aarch64
     SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst];
     if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr)
@@ -879,8 +883,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       // Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0
       return false;
     }
-    assert(DefAdd->getOpcode() == AArch64::ADDXrx &&
-           "Failed to match indirect branch!");
+    if (DefAdd->getOpcode() != AArch64::ADDXrx)
+      return false;
 
     // Validate ADD operands
     int64_t OperandExtension = DefAdd->getOperand(3).getImm();
@@ -897,8 +901,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //   ldr     w7, [x6]
       //   add     x6, x6, w7, sxtw => no shift amount
       //   br      x6
-      errs() << "BOLT-WARNING: "
-                "Failed to match indirect branch: ShiftVAL != 2 \n";
+      dbgs() << "BOLT-DEBUG: "
+                "failed to match indirect branch: ShiftVAL != 2\n";
       return false;
     }
 
@@ -909,7 +913,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     else if (ExtendType == AArch64_AM::SXTW)
       ScaleValue = 4LL;
     else
-      llvm_unreachable("Failed to match indirect branch! (fragment 3)");
+      return false;
 
     // Match an ADR to load base address to be used when addressing JT targets
     SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd];
@@ -920,18 +924,15 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       return false;
     }
     MCInst *DefBaseAddr = UsesAdd[1];
-    assert(DefBaseAddr->getOpcode() == AArch64::ADR &&
-           "Failed to match indirect branch pattern! (fragment 3)");
+    if (DefBaseAddr->getOpcode() != AArch64::ADR)
+      return false;
 
     PCRelBase = DefBaseAddr;
     // Match LOAD to load the jump table (relative) target
     const MCInst *DefLoad = UsesAdd[2];
-    assert(mayLoad(*DefLoad) &&
-           "Failed to match indirect branch load pattern! (1)");
-    assert((ScaleValue != 1LL || isLDRB(*DefLoad)) &&
-           "Failed to match indirect branch load pattern! (2)");
-    assert((ScaleValue != 2LL || isLDRH(*DefLoad)) &&
-           "Failed to match indirect branch load pattern! (3)");
+    if (!mayLoad(*DefLoad) || (ScaleValue == 1LL && !isLDRB(*DefLoad)) ||
+        (ScaleValue == 2LL && !isLDRH(*DefLoad)))
+      return false;
 
     // Match ADD that calculates the JumpTable Base Address (not the offset)
     SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad];
@@ -941,7 +942,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
         isRegToRegMove(*DefJTBaseAdd, From, To)) {
       // Sometimes base address may have been defined in another basic block
       // (hoisted). Return with no jump table info.
-      JumpTable = nullptr;
       return true;
     }
 
@@ -953,24 +953,27 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //  adr     x12, 0x247b30 <__gettextparse+0x5b0>
       //  add     x13, x12, w13, sxth #2
       //  br      x13
-      errs() << "BOLT-WARNING: Failed to match indirect branch: "
-                "nop/adr instead of adrp/add \n";
+      dbgs() << "BOLT-DEBUG: failed to match indirect branch: "
+                "nop/adr instead of adrp/add\n";
       return false;
     }
 
-    assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
-           "Failed to match jump table base address pattern! (1)");
+    if (DefJTBaseAdd->getOpcode() != AArch64::ADDXri) {
+      dbgs() << "BOLT-DEBUG: "
+                "failed to match jump table base address pattern! (1)\n";
+      return false;
+    }
 
     if (DefJTBaseAdd->getOperand(2).isImm())
       Offset = DefJTBaseAdd->getOperand(2).getImm();
     SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd];
     const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
     if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
-      JumpTable = nullptr;
       return true;
     }
-    assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
-           "Failed to match jump table base page pattern! (2)");
+    if (DefJTBasePage->getOpcode() != AArch64::ADRP)
+      return false;
+
     if (DefJTBasePage->getOperand(1).isExpr())
       JumpTable = DefJTBasePage->getOperand(1).getExpr();
     return true;
diff --git a/bolt/test/AArch64/jump-table-heuristic-fail.s b/bolt/test/AArch64/jump-table-heuristic-fail.s
new file mode 100644
index 00000000000000..724171ac399251
--- /dev/null
+++ b/bolt/test/AArch64/jump-table-heuristic-fail.s
@@ -0,0 +1,29 @@
+## Verify that BOLT does not crash while encountering instruction sequence that
+## does not perfectly match jump table pattern.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg 2>&1 | FileCheck %s
+
+  .section .text
+  .align 4
+  .globl _start
+  .type  _start, %function
+_start:
+  sub     w0, w0, #0x4a
+## The address loaded into x22 is undefined. However, the instructions that
+## follow ldr, use the x22 address as a regular jump table.
+  ldr     x22, [x29, #0x98]
+  ldrb    w0, [x22, w0, uxtw]
+  adr     x1, #12
+  add     x0, x1, w0, sxtb #2
+  br      x0
+# CHECK: br x0 # UNKNOWN
+.L0:
+  ret
+.size _start, .-_start
+
+## Force relocation mode.
+  .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/test-indirect-branch.s b/bolt/test/AArch64/test-indirect-branch.s
index 168e50c8f47f52..1e16e76b11530f 100644
--- a/bolt/test/AArch64/test-indirect-branch.s
+++ b/bolt/test/AArch64/test-indirect-branch.s
@@ -3,10 +3,11 @@
 
 // clang-format off
 
-// REQUIRES: system-linux
+// REQUIRES: system-linux, asserts
+
 // RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
 // RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
-// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict\
+// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict --debug-only=mcplus \
 // RUN:  -v=1 2>&1 | FileCheck %s
 
 // Pattern 1: there is no shift amount after the 'add' instruction.
@@ -39,7 +40,7 @@ _start:
 // svc #0
 
 // Pattern 1
-// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
+// CHECK: BOLT-DEBUG: failed to match indirect branch: ShiftVAL != 2
   .globl test1
   .type  test1, %function
 test1:
@@ -57,7 +58,7 @@ test1_2:
    ret
 
 // Pattern 2
-// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
+// CHECK: BOLT-DEBUG: failed to match indirect branch: nop/adr instead of adrp/add
   .globl test2
   .type  test2, %function
 test2:

Copy link
Contributor

@aaupov aaupov left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG with suggestion to move debug prints under LLVM_DEBUG.

@maksfb maksfb merged commit 1b4bd4e into llvm:main Jan 25, 2025
7 checks passed
@yavtuk
Copy link
Contributor

yavtuk commented Jan 27, 2025

Issue #120782 should be covered by this commit

@maksfb maksfb deleted the llvm-fix-arm-jump-table branch March 6, 2025 02:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants