Skip to content

Commit 6bc7087

Browse files
authored
merge main into amd-staging (llvm#2826)
2 parents ae94756 + daedad8 commit 6bc7087

File tree

94 files changed

+3250
-1346
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+3250
-1346
lines changed

bolt/lib/Passes/PAuthGadgetScanner.cpp

Lines changed: 108 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ namespace PAuthGadgetScanner {
8282
dbgs() << "\n";
8383
}
8484

85+
// Iterates over BinaryFunction's instructions like a range-based for loop:
86+
//
87+
// iterateOverInstrs(BF, [&](MCInstReference Inst) {
88+
// // loop body
89+
// });
90+
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
91+
if (BF.hasCFG()) {
92+
for (BinaryBasicBlock &BB : BF)
93+
for (int64_t I = 0, E = BB.size(); I < E; ++I)
94+
Fn(MCInstInBBReference(&BB, I));
95+
} else {
96+
for (auto I : BF.instrs())
97+
Fn(MCInstInBFReference(&BF, I.first));
98+
}
99+
}
100+
85101
// This class represents mapping from a set of arbitrary physical registers to
86102
// consecutive array indexes.
87103
class TrackedRegisters {
@@ -342,6 +358,29 @@ class SrcSafetyAnalysis {
342358
return S;
343359
}
344360

361+
/// Computes a reasonably pessimistic estimation of the register state when
362+
/// the previous instruction is not known for sure. Takes the set of registers
363+
/// which are trusted at function entry and removes all registers that can be
364+
/// clobbered inside this function.
365+
SrcState computePessimisticState(BinaryFunction &BF) {
366+
BitVector ClobberedRegs(NumRegs);
367+
iterateOverInstrs(BF, [&](MCInstReference Inst) {
368+
BC.MIB->getClobberedRegs(Inst, ClobberedRegs);
369+
370+
// If this is a call instruction, no register is safe anymore, unless
371+
// it is a tail call. Ignore tail calls for the purpose of estimating the
372+
// worst-case scenario, assuming no instructions are executed in the
373+
// caller after this point anyway.
374+
if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst))
375+
ClobberedRegs.set();
376+
});
377+
378+
SrcState S = createEntryState();
379+
S.SafeToDerefRegs.reset(ClobberedRegs);
380+
S.TrustedRegs.reset(ClobberedRegs);
381+
return S;
382+
}
383+
345384
BitVector getClobberedRegs(const MCInst &Point) const {
346385
BitVector Clobbered(NumRegs);
347386
// Assume a call can clobber all registers, including callee-saved
@@ -545,6 +584,10 @@ class DataflowSrcSafetyAnalysis
545584
using SrcSafetyAnalysis::BC;
546585
using SrcSafetyAnalysis::computeNext;
547586

587+
// Pessimistic initial state for basic blocks without any predecessors
588+
// (not needed for most functions, thus initialized lazily).
589+
SrcState PessimisticState;
590+
548591
public:
549592
DataflowSrcSafetyAnalysis(BinaryFunction &BF,
550593
MCPlusBuilder::AllocatorIdTy AllocId,
@@ -585,6 +628,18 @@ class DataflowSrcSafetyAnalysis
585628
if (BB.isEntryPoint())
586629
return createEntryState();
587630

631+
// If a basic block without any predecessors is found in an optimized code,
632+
// this likely means that some CFG edges were not detected. Pessimistically
633+
// assume any register that can ever be clobbered in this function to be
634+
// unsafe before this basic block.
635+
// Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely
636+
// means imprecise CFG information.
637+
if (BB.pred_empty()) {
638+
if (PessimisticState.empty())
639+
PessimisticState = computePessimisticState(*BB.getParent());
640+
return PessimisticState;
641+
}
642+
588643
return SrcState();
589644
}
590645

@@ -682,19 +737,14 @@ template <typename StateTy> class CFGUnawareAnalysis {
682737
//
683738
// Then, a function can be split into a number of disjoint contiguous sequences
684739
// of instructions without labels in between. These sequences can be processed
685-
// the same way basic blocks are processed by data-flow analysis, assuming
686-
// pessimistically that all registers are unsafe at the start of each sequence.
740+
// the same way basic blocks are processed by data-flow analysis, with the same
741+
// pessimistic estimation of the initial state at the start of each sequence
742+
// (except the first instruction of the function).
687743
class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
688744
public CFGUnawareAnalysis<SrcState> {
689745
using SrcSafetyAnalysis::BC;
690746
BinaryFunction &BF;
691747

692-
/// Creates a state with all registers marked unsafe (not to be confused
693-
/// with empty state).
694-
SrcState createUnsafeState() const {
695-
return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
696-
}
697-
698748
public:
699749
CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
700750
MCPlusBuilder::AllocatorIdTy AllocId,
@@ -704,6 +754,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
704754
}
705755

706756
void run() override {
757+
const SrcState DefaultState = computePessimisticState(BF);
707758
SrcState S = createEntryState();
708759
for (auto &I : BF.instrs()) {
709760
MCInst &Inst = I.second;
@@ -718,7 +769,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
718769
LLVM_DEBUG({
719770
traceInst(BC, "Due to label, resetting the state before", Inst);
720771
});
721-
S = createUnsafeState();
772+
S = DefaultState;
722773
}
723774

724775
// Attach the state *before* this instruction executes.
@@ -1344,17 +1395,6 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
13441395
return make_gadget_report(AuthOracleKind, Inst, *AuthReg);
13451396
}
13461397

1347-
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
1348-
if (BF.hasCFG()) {
1349-
for (BinaryBasicBlock &BB : BF)
1350-
for (int64_t I = 0, E = BB.size(); I < E; ++I)
1351-
Fn(MCInstInBBReference(&BB, I));
1352-
} else {
1353-
for (auto I : BF.instrs())
1354-
Fn(MCInstInBFReference(&BF, I.first));
1355-
}
1356-
}
1357-
13581398
static SmallVector<MCPhysReg>
13591399
collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
13601400
SmallSet<MCPhysReg, 4> RegsToTrack;
@@ -1375,17 +1415,60 @@ void FunctionAnalysisContext::findUnsafeUses(
13751415
BF.dump();
13761416
});
13771417

1418+
bool UnreachableBBReported = false;
1419+
if (BF.hasCFG()) {
1420+
// Warn on basic blocks being unreachable according to BOLT (at most once
1421+
// per BinaryFunction), as this likely means the CFG reconstructed by BOLT
1422+
// is imprecise. A basic block can be
1423+
// * reachable from an entry basic block - a hopefully correct non-empty
1424+
// state is propagated to that basic block sooner or later. All basic
1425+
// blocks are expected to belong to this category under normal conditions.
1426+
// * reachable from a "directly unreachable" BB (a basic block that has no
1427+
// direct predecessors and this is not because it is an entry BB) - *some*
1428+
// non-empty state is propagated to this basic block sooner or later, as
1429+
// the initial state of directly unreachable basic blocks is
1430+
// pessimistically initialized to "all registers are unsafe"
1431+
// - a warning can be printed for the "directly unreachable" basic block
1432+
// * neither reachable from an entry nor from a "directly unreachable" BB
1433+
// (such as if this BB is in an isolated loop of basic blocks) - the final
1434+
// state is computed to be empty for this basic block
1435+
// - a warning can be printed for this basic block
1436+
for (BinaryBasicBlock &BB : BF) {
1437+
MCInst *FirstInst = BB.getFirstNonPseudoInstr();
1438+
// Skip empty basic block early for simplicity.
1439+
if (!FirstInst)
1440+
continue;
1441+
1442+
bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint();
1443+
bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty();
1444+
if (!IsDirectlyUnreachable && !HasNoStateComputed)
1445+
continue;
1446+
1447+
// Arbitrarily attach the report to the first instruction of BB.
1448+
// This is printed as "[message] in function [name], basic block ...,
1449+
// at address ..." when the issue is reported to the user.
1450+
Reports.push_back(make_generic_report(
1451+
MCInstReference::get(FirstInst, BF),
1452+
"Warning: possibly imprecise CFG, the analysis quality may be "
1453+
"degraded in this function. According to BOLT, unreachable code is "
1454+
"found" /* in function [name]... */));
1455+
UnreachableBBReported = true;
1456+
break; // One warning per function.
1457+
}
1458+
}
1459+
// FIXME: Warn the user about imprecise analysis when the function has no CFG
1460+
// information at all.
1461+
13781462
iterateOverInstrs(BF, [&](MCInstReference Inst) {
13791463
if (BC.MIB->isCFI(Inst))
13801464
return;
13811465

13821466
const SrcState &S = Analysis->getStateBefore(Inst);
1383-
1384-
// If non-empty state was never propagated from the entry basic block
1385-
// to Inst, assume it to be unreachable and report a warning.
13861467
if (S.empty()) {
1387-
Reports.push_back(
1388-
make_generic_report(Inst, "Warning: unreachable instruction found"));
1468+
LLVM_DEBUG(
1469+
{ traceInst(BC, "Instruction has no state, skipping", Inst); });
1470+
assert(UnreachableBBReported && "Should be reported at least once");
1471+
(void)UnreachableBBReported;
13891472
return;
13901473
}
13911474

bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ f_callclobbered_calleesaved:
215215
.globl f_unreachable_instruction
216216
.type f_unreachable_instruction,@function
217217
f_unreachable_instruction:
218-
// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
218+
// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
219219
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: add x0, x1, x2
220220
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
221221
b 1f
@@ -224,20 +224,33 @@ f_unreachable_instruction:
224224
ret
225225
.size f_unreachable_instruction, .-f_unreachable_instruction
226226

227-
// Expected false positive: without CFG, the state is reset to all-unsafe
228-
// after an unconditional branch.
227+
// Without CFG, the state is reset at labels, assuming every register that can
228+
// be clobbered in the function was actually clobbered.
229229

230-
.globl state_is_reset_after_indirect_branch_nocfg
231-
.type state_is_reset_after_indirect_branch_nocfg,@function
232-
state_is_reset_after_indirect_branch_nocfg:
233-
// CHECK-LABEL: GS-PAUTH: non-protected ret found in function state_is_reset_after_indirect_branch_nocfg, at address
234-
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: ret
230+
.globl lr_untouched_nocfg
231+
.type lr_untouched_nocfg,@function
232+
lr_untouched_nocfg:
233+
// CHECK-NOT: lr_untouched_nocfg
234+
adr x2, 1f
235+
br x2
236+
1:
237+
ret
238+
.size lr_untouched_nocfg, .-lr_untouched_nocfg
239+
240+
.globl lr_clobbered_nocfg
241+
.type lr_clobbered_nocfg,@function
242+
lr_clobbered_nocfg:
243+
// CHECK-LABEL: GS-PAUTH: non-protected ret found in function lr_clobbered_nocfg, at address
244+
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: ret
235245
// CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are:
236246
adr x2, 1f
237247
br x2
238248
1:
249+
b 2f
250+
bl g // never executed, but affects the expected worst-case scenario
251+
2:
239252
ret
240-
.size state_is_reset_after_indirect_branch_nocfg, .-state_is_reset_after_indirect_branch_nocfg
253+
.size lr_clobbered_nocfg, .-lr_clobbered_nocfg
241254

242255
/// Now do a basic sanity check on every different Authentication instruction:
243256

bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -491,10 +491,6 @@ good_address_arith_multi_bb:
491491
ret
492492
.size good_address_arith_multi_bb, .-good_address_arith_multi_bb
493493

494-
// FIXME: Most *_nocfg test cases contain paciasp+autiasp instructions even if
495-
// LR is not spilled - this is a workaround for RET instructions being
496-
// reported as non-protected, because LR state is reset at every label.
497-
498494
.globl good_ret_nocfg
499495
.type good_ret_nocfg,@function
500496
good_ret_nocfg:
@@ -541,29 +537,25 @@ good_branch_nocfg:
541537
.type good_load_other_reg_nocfg,@function
542538
good_load_other_reg_nocfg:
543539
// CHECK-NOT: good_load_other_reg_nocfg
544-
paciasp
545540
adr x2, 1f
546541
br x2
547542
1:
548543
autia x0, x1
549544
ldr x2, [x0]
550545

551-
autiasp
552546
ret
553547
.size good_load_other_reg_nocfg, .-good_load_other_reg_nocfg
554548

555549
.globl good_load_same_reg_nocfg
556550
.type good_load_same_reg_nocfg,@function
557551
good_load_same_reg_nocfg:
558552
// CHECK-NOT: good_load_same_reg_nocfg
559-
paciasp
560553
adr x2, 1f
561554
br x2
562555
1:
563556
autia x0, x1
564557
ldr x0, [x0]
565558

566-
autiasp
567559
ret
568560
.size good_load_same_reg_nocfg, .-good_load_same_reg_nocfg
569561

@@ -575,13 +567,11 @@ bad_unchecked_nocfg:
575567
// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unchecked_nocfg, at address
576568
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
577569
// CHECK-NEXT: The 0 instructions that leak the affected registers are:
578-
paciasp
579570
adr x2, 1f
580571
br x2
581572
1:
582573
autia x0, x1
583574

584-
autiasp
585575
ret
586576
.size bad_unchecked_nocfg, .-bad_unchecked_nocfg
587577

@@ -615,15 +605,13 @@ bad_unknown_usage_read_nocfg:
615605
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
616606
// CHECK-NEXT: The 1 instructions that leak the affected registers are:
617607
// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul x3, x0, x1
618-
paciasp
619608
adr x2, 1f
620609
br x2
621610
1:
622611
autia x0, x1
623612
mul x3, x0, x1
624613
ldr x2, [x0]
625614

626-
autiasp
627615
ret
628616
.size bad_unknown_usage_read_nocfg, .-bad_unknown_usage_read_nocfg
629617

@@ -634,15 +622,13 @@ bad_unknown_usage_subreg_read_nocfg:
634622
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
635623
// CHECK-NEXT: The 1 instructions that leak the affected registers are:
636624
// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul w3, w0, w1
637-
paciasp
638625
adr x2, 1f
639626
br x2
640627
1:
641628
autia x0, x1
642629
mul w3, w0, w1
643630
ldr x2, [x0]
644631

645-
autiasp
646632
ret
647633
.size bad_unknown_usage_subreg_read_nocfg, .-bad_unknown_usage_subreg_read_nocfg
648634

@@ -653,38 +639,33 @@ bad_unknown_usage_update_nocfg:
653639
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
654640
// CHECK-NEXT: The 1 instructions that leak the affected registers are:
655641
// CHECK-NEXT: 1. {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16
656-
paciasp
657642
adr x2, 1f
658643
br x2
659644
1:
660645
autia x0, x1
661646
movk x0, #42, lsl #16 // does not overwrite x0 completely
662647
ldr x2, [x0]
663648

664-
autiasp
665649
ret
666650
.size bad_unknown_usage_update_nocfg, .-bad_unknown_usage_update_nocfg
667651

668652
.globl good_overwrite_with_constant_nocfg
669653
.type good_overwrite_with_constant_nocfg,@function
670654
good_overwrite_with_constant_nocfg:
671655
// CHECK-NOT: good_overwrite_with_constant_nocfg
672-
paciasp
673656
adr x2, 1f
674657
br x2
675658
1:
676659
autia x0, x1
677660
mov x0, #42
678661

679-
autiasp
680662
ret
681663
.size good_overwrite_with_constant_nocfg, .-good_overwrite_with_constant_nocfg
682664

683665
.globl good_address_arith_nocfg
684666
.type good_address_arith_nocfg,@function
685667
good_address_arith_nocfg:
686668
// CHECK-NOT: good_address_arith_nocfg
687-
paciasp
688669
adr x2, 1f
689670
br x2
690671
1:
@@ -698,7 +679,6 @@ good_address_arith_nocfg:
698679
mov x1, #0
699680
mov x2, #0
700681

701-
autiasp
702682
ret
703683
.size good_address_arith_nocfg, .-good_address_arith_nocfg
704684

0 commit comments

Comments
 (0)