Skip to content

Commit d5a1fba

Browse files
keesLukacma
authored andcommitted
[ARM][KCFI] Add backend support for Kernel Control-Flow Integrity (llvm#163698)
Implement KCFI (Kernel Control Flow Integrity) backend support for ARM32, Thumb2, and Thumb1. The Linux kernel has supported ARM KCFI via Clang's generic KCFI implementation, but this has finally started to [cause problems](ClangBuiltLinux/linux#2124) so it's time to get the KCFI operand bundle lowering working on ARM. Supports patchable-function-prefix with adjusted load offsets. Provides an instruction size worst case estimate of how large the KCFI bundle is so that range-limited instructions (e.g. cbz) know how big the indirect calls can become. ARM implementation notes: - Four-instruction EOR sequence builds the 32-bit type ID byte-by-byte to work within ARM's modified immediate encoding constraints. - Scratch register selection: r12 (IP) is preferred, r3 used as fallback when r12 holds the call target. r3 gets spilled/reloaded if it is being used as a call argument. - UDF trap encoding: 0x8000 | (0x1F << 5) | target_reg_index, similar to aarch64's trap encoding. Thumb2 implementation notes: - Logically the same as ARM - UDF trap encoding: 0x80 | target_reg_index Thumb1 implementation notes: - Due to register pressure, 2 scratch registers are needed: r3 and r2, which get spilled/reloaded if they are being used as call args. - Instead of EOR, add/lsl sequence to load immediate, followed by a compare. - No trap encoding. Update tests to validate all three sub targets.
1 parent 36cbc1e commit d5a1fba

File tree

15 files changed

+1262
-31
lines changed

15 files changed

+1262
-31
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
684684
PassBuilder &PB) {
685685
// If the back-end supports KCFI operand bundle lowering, skip KCFIPass.
686686
if (TargetTriple.getArch() == llvm::Triple::x86_64 ||
687-
TargetTriple.isAArch64(64) || TargetTriple.isRISCV())
687+
TargetTriple.isAArch64(64) || TargetTriple.isRISCV() ||
688+
TargetTriple.isARM() || TargetTriple.isThumb())
688689
return;
689690

690691
// Ensure we lower KCFI operand bundles with -O0.

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 435 additions & 0 deletions
Large diffs are not rendered by default.

llvm/lib/Target/ARM/ARMAsmPrinter.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,20 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
123123
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
124124
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
125125

126+
// KCFI check lowering
127+
void LowerKCFI_CHECK(const MachineInstr &MI);
128+
126129
private:
127130
void EmitSled(const MachineInstr &MI, SledKind Kind);
128131

132+
// KCFI check emission helpers
133+
void EmitKCFI_CHECK_ARM32(Register AddrReg, int64_t Type,
134+
const MachineInstr &Call, int64_t PrefixNops);
135+
void EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type,
136+
const MachineInstr &Call, int64_t PrefixNops);
137+
void EmitKCFI_CHECK_Thumb1(Register AddrReg, int64_t Type,
138+
const MachineInstr &Call, int64_t PrefixNops);
139+
129140
// Helpers for emitStartOfAsmFile() and emitEndOfAsmFile()
130141
void emitAttributes();
131142

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2301,6 +2301,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
23012301
for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
23022302
NewMI->addOperand(MBBI->getOperand(i));
23032303

2304+
NewMI->setCFIType(*MBB.getParent(), MI.getCFIType());
2305+
23042306
// Update call info and delete the pseudo instruction TCRETURN.
23052307
if (MI.isCandidateForAdditionalCallInfo())
23062308
MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI);

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,13 +2849,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
28492849
if (isTailCall) {
28502850
MF.getFrameInfo().setHasTailCall();
28512851
SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
2852+
if (CLI.CFIType)
2853+
Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
28522854
DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
28532855
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
28542856
return Ret;
28552857
}
28562858

28572859
// Returns a chain and a flag for retval copy to use.
28582860
Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
2861+
if (CLI.CFIType)
2862+
Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
28592863
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
28602864
InGlue = Chain.getValue(1);
28612865
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -12008,6 +12012,71 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
1200812012
.add(predOps(ARMCC::AL));
1200912013
}
1201012014

12015+
bool ARMTargetLowering::supportKCFIBundles() const {
12016+
// KCFI is supported in all ARM/Thumb modes
12017+
return true;
12018+
}
12019+
12020+
MachineInstr *
12021+
ARMTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
12022+
MachineBasicBlock::instr_iterator &MBBI,
12023+
const TargetInstrInfo *TII) const {
12024+
assert(MBBI->isCall() && MBBI->getCFIType() &&
12025+
"Invalid call instruction for a KCFI check");
12026+
12027+
MachineOperand *TargetOp = nullptr;
12028+
switch (MBBI->getOpcode()) {
12029+
// ARM mode opcodes
12030+
case ARM::BLX:
12031+
case ARM::BLX_pred:
12032+
case ARM::BLX_noip:
12033+
case ARM::BLX_pred_noip:
12034+
case ARM::BX_CALL:
12035+
TargetOp = &MBBI->getOperand(0);
12036+
break;
12037+
case ARM::TCRETURNri:
12038+
case ARM::TCRETURNrinotr12:
12039+
case ARM::TAILJMPr:
12040+
case ARM::TAILJMPr4:
12041+
TargetOp = &MBBI->getOperand(0);
12042+
break;
12043+
// Thumb mode opcodes (Thumb1 and Thumb2)
12044+
// Note: Most Thumb call instructions have predicate operands before the
12045+
// target register Format: tBLXr pred, predreg, target_register, ...
12046+
case ARM::tBLXr: // Thumb1/Thumb2: BLX register (requires V5T)
12047+
case ARM::tBLXr_noip: // Thumb1/Thumb2: BLX register, no IP clobber
12048+
case ARM::tBX_CALL: // Thumb1 only: BX call (push LR, BX)
12049+
TargetOp = &MBBI->getOperand(2);
12050+
break;
12051+
// Tail call instructions don't have predicates, target is operand 0
12052+
case ARM::tTAILJMPr: // Thumb1/Thumb2: Tail call via register
12053+
TargetOp = &MBBI->getOperand(0);
12054+
break;
12055+
default:
12056+
llvm_unreachable("Unexpected CFI call opcode");
12057+
}
12058+
12059+
assert(TargetOp && TargetOp->isReg() && "Invalid target operand");
12060+
TargetOp->setIsRenamable(false);
12061+
12062+
// Select the appropriate KCFI_CHECK variant based on the instruction set
12063+
unsigned KCFICheckOpcode;
12064+
if (Subtarget->isThumb()) {
12065+
if (Subtarget->isThumb2()) {
12066+
KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2;
12067+
} else {
12068+
KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1;
12069+
}
12070+
} else {
12071+
KCFICheckOpcode = ARM::KCFI_CHECK_ARM;
12072+
}
12073+
12074+
return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(KCFICheckOpcode))
12075+
.addReg(TargetOp->getReg())
12076+
.addImm(MBBI->getCFIType())
12077+
.getInstr();
12078+
}
12079+
1201112080
MachineBasicBlock *
1201212081
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1201312082
MachineBasicBlock *BB) const {

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,12 @@ class VectorType;
447447
void AdjustInstrPostInstrSelection(MachineInstr &MI,
448448
SDNode *Node) const override;
449449

450+
bool supportKCFIBundles() const override;
451+
452+
MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
453+
MachineBasicBlock::instr_iterator &MBBI,
454+
const TargetInstrInfo *TII) const override;
455+
450456
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
451457
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
452458
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6535,6 +6535,36 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out),
65356535

65366536
def : Pat<(atomic_fence (timm), 0), (MEMBARRIER)>;
65376537

6538+
//===----------------------------------------------------------------------===//
6539+
// KCFI check pseudo-instruction.
6540+
//===----------------------------------------------------------------------===//
6541+
// KCFI_CHECK pseudo-instruction for Kernel Control-Flow Integrity.
6542+
// Expands to a sequence that verifies the function pointer's type hash.
6543+
// Different sizes for different architectures due to different expansions.
6544+
6545+
def KCFI_CHECK_ARM
6546+
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
6547+
Sched<[]>,
6548+
Requires<[IsARM]> {
6549+
let Size = 28; // 7 instructions (bic, ldr, 4x eor, beq, udf)
6550+
}
6551+
6552+
def KCFI_CHECK_Thumb2
6553+
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
6554+
Sched<[]>,
6555+
Requires<[IsThumb2]> {
6556+
let Size =
6557+
32; // worst-case 9 instructions (push, bic, ldr, 4x eor, pop, beq.w, udf)
6558+
}
6559+
6560+
def KCFI_CHECK_Thumb1
6561+
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
6562+
Sched<[]>,
6563+
Requires<[IsThumb1Only]> {
6564+
let Size = 50; // worst-case 25 instructions (pushes, bic helper, type
6565+
// building, cmp, pops)
6566+
}
6567+
65386568
//===----------------------------------------------------------------------===//
65396569
// Instructions used for emitting unwind opcodes on Windows.
65406570
//===----------------------------------------------------------------------===//

llvm/lib/Target/ARM/ARMTargetMachine.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
111111
initializeMVELaneInterleavingPass(Registry);
112112
initializeARMFixCortexA57AES1742098Pass(Registry);
113113
initializeARMDAGToDAGISelLegacyPass(Registry);
114+
initializeKCFIPass(Registry);
114115
}
115116

116117
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -487,6 +488,9 @@ void ARMPassConfig::addPreSched2() {
487488
// proper scheduling.
488489
addPass(createARMExpandPseudoPass());
489490

491+
// Emit KCFI checks for indirect calls.
492+
addPass(createKCFIPass());
493+
490494
if (getOptLevel() != CodeGenOptLevel::None) {
491495
// When optimising for size, always run the Thumb2SizeReduction pass before
492496
// IfConversion. Otherwise, check whether IT blocks are restricted
@@ -517,9 +521,12 @@ void ARMPassConfig::addPreSched2() {
517521
void ARMPassConfig::addPreEmitPass() {
518522
addPass(createThumb2SizeReductionPass());
519523

520-
// Constant island pass work on unbundled instructions.
524+
// Unpack bundles for:
525+
// - Thumb2: Constant island pass requires unbundled instructions
526+
// - KCFI: KCFI_CHECK pseudo instructions need to be unbundled for AsmPrinter
521527
addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
522-
return MF.getSubtarget<ARMSubtarget>().isThumb2();
528+
return MF.getSubtarget<ARMSubtarget>().isThumb2() ||
529+
MF.getFunction().getParent()->getModuleFlag("kcfi");
523530
}));
524531

525532
// Don't optimize barriers or block placement at -O0.
@@ -530,6 +537,7 @@ void ARMPassConfig::addPreEmitPass() {
530537
}
531538

532539
void ARMPassConfig::addPreEmitPass2() {
540+
533541
// Inserts fixup instructions before unsafe AES operations. Instructions may
534542
// be inserted at the start of blocks and at within blocks so this pass has to
535543
// come before those below.

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@
166166
; CHECK-NEXT: ARM Execution Domain Fix
167167
; CHECK-NEXT: BreakFalseDeps
168168
; CHECK-NEXT: ARM pseudo instruction expansion pass
169+
; CHECK-NEXT: Insert KCFI indirect call checks
169170
; CHECK-NEXT: Thumb2 instruction size reduce pass
170171
; CHECK-NEXT: MachineDominator Tree Construction
171172
; CHECK-NEXT: Machine Natural Loop Construction

llvm/test/CodeGen/ARM/kcfi-arm.ll

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=ASM
3+
; RUN: llc -mtriple=armv7-linux-gnueabi -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s --check-prefixes=MIR,ISEL
4+
; RUN: llc -mtriple=armv7-linux-gnueabi -verify-machineinstrs -stop-after=kcfi < %s | FileCheck %s --check-prefixes=MIR,KCFI
5+
6+
; MIR checks for all functions (grouped here to prevent update_llc_test_checks.py from removing them)
7+
8+
; MIR-LABEL: name: f1
9+
; MIR: body:
10+
11+
; ISEL: BLX %0, csr_aapcs,{{.*}} cfi-type 12345678
12+
13+
; KCFI: BUNDLE{{.*}} {
14+
; KCFI-NEXT: KCFI_CHECK_ARM $r0, 12345678
15+
; KCFI-NEXT: BLX killed $r0, csr_aapcs,{{.*}}
16+
; KCFI-NEXT: }
17+
18+
; MIR-LABEL: name: f2
19+
; MIR: body:
20+
21+
; ISEL: TCRETURNri %0, 0, csr_aapcs, implicit $sp, cfi-type 12345678
22+
23+
; KCFI: BUNDLE{{.*}} {
24+
; KCFI-NEXT: KCFI_CHECK_ARM $r0, 12345678
25+
; KCFI-NEXT: TAILJMPr killed $r0, csr_aapcs, implicit $sp, implicit $sp
26+
; KCFI-NEXT: }
27+
28+
; ASM: .long 12345678
29+
define void @f1(ptr noundef %x) !kcfi_type !1 {
30+
; ASM-LABEL: f1:
31+
; ASM: @ %bb.0:
32+
; ASM-NEXT: .save {r11, lr}
33+
; ASM-NEXT: push {r11, lr}
34+
; ASM-NEXT: bic r12, r0, #1
35+
; ASM-NEXT: ldr r12, [r12, #-4]
36+
; ASM-NEXT: eor r12, r12, #78
37+
; ASM-NEXT: eor r12, r12, #24832
38+
; ASM-NEXT: eor r12, r12, #12320768
39+
; ASM-NEXT: eors r12, r12, #0
40+
; ASM-NEXT: beq .Ltmp0
41+
; ASM-NEXT: udf #33760
42+
; ASM-NEXT: .Ltmp0:
43+
; ASM-NEXT: blx r0
44+
; ASM-NEXT: pop {r11, pc}
45+
46+
call void %x() [ "kcfi"(i32 12345678) ]
47+
ret void
48+
}
49+
50+
; Test with tail call
51+
define void @f2(ptr noundef %x) !kcfi_type !1 {
52+
; ASM-LABEL: f2:
53+
; ASM: @ %bb.0:
54+
; ASM-NEXT: bic r12, r0, #1
55+
; ASM-NEXT: ldr r12, [r12, #-4]
56+
; ASM-NEXT: eor r12, r12, #78
57+
; ASM-NEXT: eor r12, r12, #24832
58+
; ASM-NEXT: eor r12, r12, #12320768
59+
; ASM-NEXT: eors r12, r12, #0
60+
; ASM-NEXT: beq .Ltmp1
61+
; ASM-NEXT: udf #33760
62+
; ASM-NEXT: .Ltmp1:
63+
; ASM-NEXT: bx r0
64+
65+
tail call void %x() [ "kcfi"(i32 12345678) ]
66+
ret void
67+
}
68+
69+
; Test r3 spill/reload when target is r12 and r3 is a call argument.
70+
; With 5+ arguments (target + 4 args), r0-r3 are all used for arguments,
71+
; forcing r3 to be spilled when we need it as scratch register.
72+
define void @f3_r3_spill(ptr noundef %target, i32 %a, i32 %b, i32 %c, i32 %d) !kcfi_type !1 {
73+
; ASM-LABEL: f3_r3_spill:
74+
; ASM: @ %bb.0:
75+
; ASM-NEXT: .save {r11, lr}
76+
; ASM-NEXT: push {r11, lr}
77+
; ASM-NEXT: mov lr, r3
78+
; ASM-NEXT: ldr r3, [sp, #8]
79+
; ASM-NEXT: mov r12, r0
80+
; ASM-NEXT: mov r0, r1
81+
; ASM-NEXT: mov r1, r2
82+
; ASM-NEXT: mov r2, lr
83+
; ASM-NEXT: stmdb sp!, {r3}
84+
; ASM-NEXT: bic r3, r12, #1
85+
; ASM-NEXT: ldr r3, [r3, #-4]
86+
; ASM-NEXT: eor r3, r3, #78
87+
; ASM-NEXT: eor r3, r3, #24832
88+
; ASM-NEXT: eor r3, r3, #12320768
89+
; ASM-NEXT: eors r3, r3, #0
90+
; ASM-NEXT: ldm sp!, {r3}
91+
; ASM-NEXT: beq .Ltmp2
92+
; ASM-NEXT: udf #33772
93+
; ASM-NEXT: .Ltmp2:
94+
; ASM-NEXT: blx r12
95+
; ASM-NEXT: pop {r11, pc}
96+
; Arguments: r0=%target, r1=%a, r2=%b, r3=%c, [sp]=%d
97+
; Call needs: r0=%a, r1=%b, r2=%c, r3=%d, target in r12
98+
; Compiler shuffles arguments into place, saving r3 (c) in lr, loading d from stack
99+
; r3 is live as 4th argument, so push it before KCFI check
100+
; Restore r3 immediately after comparison, before branch
101+
call void %target(i32 %a, i32 %b, i32 %c, i32 %d) [ "kcfi"(i32 12345678) ]
102+
ret void
103+
}
104+
105+
; Test with 3 arguments - r3 not live, target in r12, so r3 used as scratch without spilling
106+
define void @f4_r3_unused(ptr noundef %target, i32 %a, i32 %b) !kcfi_type !1 {
107+
; ASM-LABEL: f4_r3_unused:
108+
; ASM: @ %bb.0:
109+
; ASM-NEXT: .save {r11, lr}
110+
; ASM-NEXT: push {r11, lr}
111+
; ASM-NEXT: mov r3, r0
112+
; ASM-NEXT: mov r0, r1
113+
; ASM-NEXT: mov r1, r2
114+
; ASM-NEXT: bic r12, r3, #1
115+
; ASM-NEXT: ldr r12, [r12, #-4]
116+
; ASM-NEXT: eor r12, r12, #78
117+
; ASM-NEXT: eor r12, r12, #24832
118+
; ASM-NEXT: eor r12, r12, #12320768
119+
; ASM-NEXT: eors r12, r12, #0
120+
; ASM-NEXT: beq .Ltmp3
121+
; ASM-NEXT: udf #33763
122+
; ASM-NEXT: .Ltmp3:
123+
; ASM-NEXT: blx r3
124+
; ASM-NEXT: pop {r11, pc}
125+
; Only 3 arguments total, so r3 is not used as call argument
126+
; Compiler puts target→r3, a→r0, b→r1
127+
; r3 is the target, so we use r12 as scratch (no spill needed)
128+
call void %target(i32 %a, i32 %b) [ "kcfi"(i32 12345678) ]
129+
ret void
130+
}
131+
132+
!llvm.module.flags = !{!0}
133+
!0 = !{i32 4, !"kcfi", i32 1}
134+
!1 = !{i32 12345678}
135+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
136+
; ISEL: {{.*}}
137+
; KCFI: {{.*}}
138+
; MIR: {{.*}}

0 commit comments

Comments
 (0)