Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 98 additions & 16 deletions bolt/lib/Passes/PAuthGadgetScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ namespace PAuthGadgetScanner {
dbgs() << "\n";
}

// Iterates over BinaryFunction's instructions like a range-based for loop:
//
// iterateOverInstrs(BF, [&](MCInstReference Inst) {
// // loop body
// });
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (int64_t I = 0, E = BB.size(); I < E; ++I)
Fn(MCInstInBBReference(&BB, I));
} else {
for (auto I : BF.instrs())
Fn(MCInstInBFReference(&BF, I.first));
}
}

// This class represents mapping from a set of arbitrary physical registers to
// consecutive array indexes.
class TrackedRegisters {
Expand Down Expand Up @@ -342,6 +358,29 @@ class SrcSafetyAnalysis {
return S;
}

/// Computes a reasonably pessimistic estimation of the register state when
/// the previous instruction is not known for sure. Takes the set of registers
/// which are trusted at function entry and removes all registers that can be
/// clobbered inside this function.
SrcState computePessimisticState(BinaryFunction &BF) {
BitVector ClobberedRegs(NumRegs);
iterateOverInstrs(BF, [&](MCInstReference Inst) {
BC.MIB->getClobberedRegs(Inst, ClobberedRegs);

// If this is a call instruction, no register is safe anymore, unless
// it is a tail call. Ignore tail calls for the purpose of estimating the
// worst-case scenario, assuming no instructions are executed in the
// caller after this point anyway.
if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst))
ClobberedRegs.set();
});

SrcState S = createEntryState();
S.SafeToDerefRegs.reset(ClobberedRegs);
S.TrustedRegs.reset(ClobberedRegs);
return S;
}

BitVector getClobberedRegs(const MCInst &Point) const {
BitVector Clobbered(NumRegs);
// Assume a call can clobber all registers, including callee-saved
Expand Down Expand Up @@ -545,6 +584,10 @@ class DataflowSrcSafetyAnalysis
using SrcSafetyAnalysis::BC;
using SrcSafetyAnalysis::computeNext;

// Pessimistic initial state for basic blocks without any predecessors
// (not needed for most functions, thus initialized lazily).
SrcState PessimisticState;

public:
DataflowSrcSafetyAnalysis(BinaryFunction &BF,
MCPlusBuilder::AllocatorIdTy AllocId,
Expand Down Expand Up @@ -585,6 +628,18 @@ class DataflowSrcSafetyAnalysis
if (BB.isEntryPoint())
return createEntryState();

// If a basic block without any predecessors is found in an optimized code,
// this likely means that some CFG edges were not detected. Pessimistically
// assume any register that can ever be clobbered in this function to be
// unsafe before this basic block.
// Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely
// means imprecise CFG information.
if (BB.pred_empty()) {
if (PessimisticState.empty())
PessimisticState = computePessimisticState(*BB.getParent());
return PessimisticState;
}

return SrcState();
}

Expand Down Expand Up @@ -1344,17 +1399,6 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
return make_gadget_report(AuthOracleKind, Inst, *AuthReg);
}

template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (int64_t I = 0, E = BB.size(); I < E; ++I)
Fn(MCInstInBBReference(&BB, I));
} else {
for (auto I : BF.instrs())
Fn(MCInstInBFReference(&BF, I.first));
}
}

static SmallVector<MCPhysReg>
collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
SmallSet<MCPhysReg, 4> RegsToTrack;
Expand All @@ -1375,17 +1419,55 @@ void FunctionAnalysisContext::findUnsafeUses(
BF.dump();
});

bool UnreachableBBReported = false;
if (BF.hasCFG()) {
// Warn on basic blocks being unreachable according to BOLT (at most once
// per BinaryFunction), as this likely means the CFG reconstructed by BOLT
// is imprecise. A basic block can be
// * reachable from an entry basic block - a hopefully correct non-empty
// state is propagated to that basic block sooner or later. All basic
// blocks are expected to belong to this category under normal conditions.
// * reachable from a "directly unreachable" BB (a basic block that has no
// direct predecessors and this is not because it is an entry BB) - *some*
// non-empty state is propagated to this basic block sooner or later, as
// the initial state of directly unreachable basic blocks is
// pessimistically initialized to "all registers are unsafe"
// - a warning can be printed for the "directly unreachable" basic block
// * neither reachable from an entry nor from a "directly unreachable" BB
// (such as if this BB is in an isolated loop of basic blocks) - the final
// state is computed to be empty for this basic block
// - a warning can be printed for this basic block
for (BinaryBasicBlock &BB : BF) {
MCInst *FirstInst = BB.getFirstNonPseudoInstr();
// Skip empty basic block early for simplicity.
if (!FirstInst)
continue;

bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint();
bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty();
if (!IsDirectlyUnreachable && !HasNoStateComputed)
continue;

// Arbitrarily attach the report to the first instruction of BB.
Reports.push_back(
make_generic_report(MCInstReference::get(FirstInst, BF),
"Warning: the function has unreachable basic "
"blocks (possibly incomplete CFG)"));
UnreachableBBReported = true;
break; // One warning per function.
}
}

iterateOverInstrs(BF, [&](MCInstReference Inst) {
if (BC.MIB->isCFI(Inst))
return;

const SrcState &S = Analysis->getStateBefore(Inst);

// If non-empty state was never propagated from the entry basic block
// to Inst, assume it to be unreachable and report a warning.
if (S.empty()) {
Reports.push_back(
make_generic_report(Inst, "Warning: unreachable instruction found"));
LLVM_DEBUG(
{ traceInst(BC, "Instruction has no state, skipping", Inst); });
assert(UnreachableBBReported && "Should be reported at least once");
(void)UnreachableBBReported;
return;
}

Expand Down
2 changes: 1 addition & 1 deletion bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ f_callclobbered_calleesaved:
.globl f_unreachable_instruction
.type f_unreachable_instruction,@function
f_unreachable_instruction:
// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: add x0, x1, x2
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
b 1f
Expand Down
84 changes: 84 additions & 0 deletions bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
Original file line number Diff line number Diff line change
Expand Up @@ -1428,6 +1428,90 @@ printed_instrs_nocfg:
br x0
.size printed_instrs_nocfg, .-printed_instrs_nocfg

// Test handling of unreachable basic blocks.
//
// Basic blocks without any predecessors were observed in real-world optimized
// code. At least sometimes they were actually reachable via jump table, which
// was not detected, but the function was processed as if its CFG was
// reconstructed successfully.
//
// As a more predictable model example, let's use really unreachable code
// for testing.

.globl bad_unreachable_call
.type bad_unreachable_call,@function
bad_unreachable_call:
// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function bad_unreachable_call, basic block {{[^,]+}}, at address
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, with this patch basic blocks that are not part of the CFG as reconstructed by BOLT are now also analyzed. These blocks are analyzed with a pessimistic initial state.
There are 2 possible cases why a basic block is not part of the CFG:

  1. BOLT wasn't able to reconstruct the CFG correctly.
    In this case, the pessimistic assumptions are probably going to cause more false positives in these "not-part-of-the-CFG basic blocks" than if BOLT was able to reconstruct the CFG?
    If all my assumptions above are correct, maybe the warning messages should state that more clearly, for example, something like "Warning: function {function_name} has seemingly unreachable basic blocks, possibly due to limits in how bolt can reverse engineer the CFG. This may lead to more false positives being reported in these basic blocks"
    Having said just that, I'm also thinking that maybe this is similar to the situation where BOLT cannot create a CFG at all. If so, should we (are we already?) producing a warning then too? But maybe that would produce way too many warnings?
  2. The code really contains dead code (not impossible, code generators and assembly writers are known to sometimes make mistakes, or there might be a legitimate reason for seemingly dead code to be present in the binary)
    Would it be correct to say that in this case all reports against these dead code basic blocks would be false positives?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the suggestion, I rephrased the warning message in cdc20ab, though I used less concrete wording: not "more false positives" but "the analysis quality may be degraded". Even if incomplete CFG can only result in false positives when caused by jump table not being understood by BOLT (the only reason I observed in the wild so far), this seems to indicate the analyses in BOLT are somewhat broken for the function, so I would not be sure that this cannot result in lots of false negatives under different conditions.

All the reports for dead code are probably false positives, but if the particular code is actually seemingly dead (maybe some precompiled snippet for JIT or something...), then nothing can be told for sure, but this is hardly an issue of the scanner :) Anyway, this does not look like a widespread issue worth implementing a workaround at first glance, but maybe scanning large code bases will prove the opposite...

Considering the functions without CFG at all, printing a warning for them sounds reasonable, but this turned out to break a lot of tests relying on CHECK-NOT: function_name_nocfg - added a FIXME in e14dce9 for now.

// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_unreachable_call, basic block {{[^,]+}}, at address
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
// CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are:
paciasp
stp x29, x30, [sp, #-16]!
mov x29, sp

b 1f
// unreachable basic block:
blr x0

1: // reachable basic block:
ldp x29, x30, [sp], #16
autiasp
ret
.size bad_unreachable_call, .-bad_unreachable_call

.globl good_unreachable_call
.type good_unreachable_call,@function
good_unreachable_call:
// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function good_unreachable_call, basic block {{[^,]+}}, at address
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
paciasp
stp x29, x30, [sp, #-16]!
mov x29, sp

b 1f
// unreachable basic block:
autia x0, x1
blr x0 // <-- this call is definitely protected provided at least
// basic block boundaries are detected correctly

1: // reachable basic block:
ldp x29, x30, [sp], #16
autiasp
ret
.size good_unreachable_call, .-good_unreachable_call

.globl unreachable_loop_of_bbs
.type unreachable_loop_of_bbs,@function
unreachable_loop_of_bbs:
// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function unreachable_loop_of_bbs, basic block {{[^,]+}}, at address
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
paciasp
stp x29, x30, [sp, #-16]!
mov x29, sp
b .Lreachable_epilogue_bb

.Lfirst_unreachable_bb:
blr x0 // <-- this call is not analyzed
b .Lsecond_unreachable_bb
.Lsecond_unreachable_bb:
blr x1 // <-- this call is not analyzed
b .Lfirst_unreachable_bb

.Lreachable_epilogue_bb:
ldp x29, x30, [sp], #16
autiasp
ret
.size unreachable_loop_of_bbs, .-unreachable_loop_of_bbs

.globl main
.type main,@function
main:
Expand Down
Loading