From 65f1f6225e0128a5fccacfa915025d1b4cbfa4b2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 11 Mar 2025 12:24:47 -0700 Subject: [PATCH 1/2] [BPF] Handle traps with kfunc call __bpf_trap NOTE: not working as the symbol is generated at selectiondag stage Currently, middle-end generates 'unreachable' insn if the compiler feels the code is indeed unreachable or the code becomes invalid due to some optimizaiton (e.g. code optimization with uninitialized variables). Right now BPF backend ignores 'unreachable' insn during selectiondag lowering. For cases where 'unreachable' is due to invalid code transformation, such a signal will be missed. Later on, users needs some effort to debug it which impacts developer productivity. This patch enabled selectiondag lowering for 'unreachable' insn. Previous attempt ([1]) tries to have a backend IR pass to filter out 'unreachable' insns in a number of cases. But such pattern matching may misalign with future middle-end optimization with 'unreachable' insns. This patch takes a different approach. The 'unreachable' insn is lowered with special encoding in bpf object file and verifier will do proper verification for the bpf prog. More specifically, the 'unreachable' insn is replaced by a __bpf_trap() function. This function will be a kfunc (in ".ksyms" section) with a weak attribute, but does not have definition. The actual kfunc definition is expected to be in kernel. The __bpf_trap() extern function is also encoded in BTF. The name __bpf_trap() is chosen to satisfy reserved identifier requirement. Besides the uninitialized variable case, the builtin function '__builtin_trap' can also generate kfunc __bpf_trap(). For example in [3], we have # define __bpf_unreachable() __builtin_trap() If the compiler didn't remove __builtin_trap() during middle-end optimization, compilation will fail. With this case, compilation will not fail and __builtin_trap() is converted to __bpf_trap() kfunc. The eventual failure will be in verifier instead of llvm compilation. I tested this patch on bpf selftests and all tests are passed. I also tried original example in [2] and the code looks like below: ; { 0: bf 16 00 00 00 00 00 00 r6 = r1 ; bpf_printk("Start"); 1: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll 0000000000000008: R_BPF_64_64 .rodata 3: b4 02 00 00 06 00 00 00 w2 = 0x6 4: 85 00 00 00 06 00 00 00 call 0x6 ; DEFINE_FUNC_CTX_POINTER(data) 5: 61 61 4c 00 00 00 00 00 w1 = *(u32 *)(r6 + 0x4c) ; bpf_printk("pre ipv6_hdrlen_offset"); 6: 18 01 00 00 06 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x6 ll 0000000000000030: R_BPF_64_64 .rodata 8: b4 02 00 00 17 00 00 00 w2 = 0x17 9: 85 00 00 00 06 00 00 00 call 0x6 10: 85 10 00 00 ff ff ff ff call -0x1 0000000000000050: R_BPF_64_32 __bpf_trap 11: 95 00 00 00 00 00 00 00 exit Eventually kernel verifier will emit the following logs: 10: (85) call __bpf_trap#74479 unexpected __bpf_trap() due to uninitialized variable? In another internal sched-ext bpf prog, with the patch we have bpf code: Disassembly of section .text: 0000000000000000 : ; { 0: bc 13 00 00 00 00 00 00 w3 = w1 1: b4 01 00 00 00 00 00 00 w1 = 0x0 ; const u32 zero = 0; ... 0000000000003a80 : ; { 1872: bc 16 00 00 00 00 00 00 w6 = w1 ; bpf_printk("dom_id %d", dom_id); 1873: 18 01 00 00 3f 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x3f ll 0000000000003a88: R_BPF_64_64 .rodata 1875: b4 02 00 00 0a 00 00 00 w2 = 0xa 1876: bc 63 00 00 00 00 00 00 w3 = w6 1877: 85 00 00 00 06 00 00 00 call 0x6 ; ret = scx_bpf_create_dsq(dom_id, 0); 1878: bc 61 00 00 00 00 00 00 w1 = w6 1879: b4 02 00 00 00 00 00 00 w2 = 0x0 1880: 85 10 00 00 ff ff ff ff call -0x1 0000000000003ac0: R_BPF_64_32 scx_bpf_create_dsq ; domc->node_cpumask = node_data[node_id]; 1881: 85 10 00 00 ff ff ff ff call -0x1 0000000000003ac8: R_BPF_64_32 __bpf_trap 1882: 95 00 00 00 00 00 00 00 exit The verifier can easily report the error too. A bpf flag `-bpf-disable-trap-unreachable` is introduced to disable trapping for 'unreachable' or __builtin_trap. [1] https://github.com/llvm/llvm-project/pull/126858 [2] https://github.com/msune/clang_bpf/blob/main/Makefile#L3 [3] https://github.com/libbpf/libbpf/blob/master/src/bpf_helpers.h --- llvm/lib/Target/BPF/BPF.h | 2 + llvm/lib/Target/BPF/BPFISelLowering.cpp | 62 +++++++++++++++++++++-- llvm/lib/Target/BPF/BPFISelLowering.h | 1 + llvm/lib/Target/BPF/BPFMIPeephole.cpp | 16 ++++++ llvm/lib/Target/BPF/BPFTargetMachine.cpp | 9 ++++ llvm/test/CodeGen/BPF/BTF/builtin_trap.ll | 39 ++++++++++++++ llvm/test/CodeGen/BPF/BTF/unreachable.ll | 53 +++++++++++++++++++ 7 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/BTF/builtin_trap.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/unreachable.ll diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 68166e574f35e..5d49949ddea25 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -22,6 +22,8 @@ class BPFTargetMachine; class InstructionSelector; class PassRegistry; +static const char *BPF_TRAP = "__bpf_trap"; + ModulePass *createBPFCheckAndAdjustIR(); FunctionPass *createBPFISelDag(BPFTargetMachine &TM); diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 6c196309d2d1a..f4f414d192df0 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -21,8 +21,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -68,6 +70,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::TRAP, MVT::Other, Custom); + setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); @@ -326,6 +330,8 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG); + case ISD::TRAP: + return LowerTRAP(Op, DAG); } } @@ -521,10 +527,12 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, G->getOffset(), 0); } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); - fail(CLI.DL, DAG, - Twine("A call to built-in function '" + StringRef(E->getSymbol()) + - "' is not supported.")); + if (StringRef(E->getSymbol()) != BPF_TRAP) { + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + fail(CLI.DL, DAG, + Twine("A call to built-in function '" + StringRef(E->getSymbol()) + + "' is not supported.")); + } } // Returns a chain & a flag for retval copy to use. @@ -726,6 +734,52 @@ SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op, return Op; } +static Function *createBPFUnreachable(Module *M) { + if (auto *Fn = M->getFunction(BPF_TRAP)) + return Fn; + + FunctionType *FT = FunctionType::get(Type::getVoidTy(M->getContext()), false); + Function *NewF = + Function::Create(FT, GlobalValue::ExternalWeakLinkage, BPF_TRAP, M); + NewF->setDSOLocal(true); + NewF->setCallingConv(CallingConv::C); + NewF->setSection(".ksyms"); + + if (M->debug_compile_units().empty()) + return NewF; + + DIBuilder DBuilder(*M); + DITypeRefArray ParamTypes = + DBuilder.getOrCreateTypeArray({nullptr /*void return*/}); + DISubroutineType *FuncType = DBuilder.createSubroutineType(ParamTypes); + DICompileUnit *CU = *M->debug_compile_units_begin(); + DISubprogram *SP = + DBuilder.createFunction(CU, BPF_TRAP, BPF_TRAP, nullptr, 0, FuncType, 0, + DINode::FlagZero, DISubprogram::SPFlagZero); + NewF->setSubprogram(SP); + return NewF; +} + +SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + TargetLowering::CallLoweringInfo CLI(DAG); + SmallVector InVals; + SDNode *N = Op.getNode(); + SDLoc DL(N); + + Function *Fn = createBPFUnreachable(MF.getFunction().getParent()); + auto PtrVT = getPointerTy(MF.getDataLayout()); + CLI.Callee = DAG.getTargetGlobalAddress(Fn, DL, PtrVT); + CLI.Chain = N->getOperand(0); + CLI.IsTailCall = false; + CLI.CallConv = CallingConv::C; + CLI.IsVarArg = false; + CLI.DL = DL; + CLI.NoMerge = false; + CLI.DoesNotReturn = true; + return LowerCall(CLI, InVals); +} + const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((BPFISD::NodeType)Opcode) { case BPFISD::FIRST_NUMBER: diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 8104895cb7f14..23cbce7094e6b 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -80,6 +80,7 @@ class BPFTargetLowering : public TargetLowering { SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAP(SDValue Op, SelectionDAG &DAG) const; template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index 4febf3042fdd9..6275d5b7721c6 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -320,6 +320,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { bool adjustBranch(); bool insertMissingCallerSavedSpills(); bool removeMayGotoZero(); + bool addExitAfterUnreachable(); public: @@ -336,6 +337,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { Changed = adjustBranch() || Changed; Changed |= insertMissingCallerSavedSpills(); Changed |= removeMayGotoZero(); + Changed |= addExitAfterUnreachable(); return Changed; } }; @@ -734,6 +736,20 @@ bool BPFMIPreEmitPeephole::removeMayGotoZero() { return Changed; } +// If the last insn in a funciton is 'JAL &bpf_unreachable', let us add an +// 'exit' insn after that insn. This will ensure no fallthrough at the last +// insn, making kernel verification easier. +bool BPFMIPreEmitPeephole::addExitAfterUnreachable() { + MachineBasicBlock &MBB = MF->back(); + MachineInstr &MI = MBB.back(); + if (MI.getOpcode() != BPF::JAL || !MI.getOperand(0).isGlobal() || + MI.getOperand(0).getGlobal()->getName() != BPF_TRAP) + return false; + + BuildMI(&MBB, MI.getDebugLoc(), TII->get(BPF::RET)); + return true; +} + } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 46ba758b55223..0c3f61fdfedd6 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -37,6 +37,10 @@ static cl:: opt DisableMIPeephole("disable-bpf-peephole", cl::Hidden, cl::desc("Disable machine peepholes for BPF")); +static cl::opt + DisableCheckUnreachable("bpf-disable-trap-unreachable", cl::Hidden, + cl::desc("Disable Trap Unreachable for BPF")); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() { // Register the target. RegisterTargetMachine X(getTheBPFleTarget()); @@ -77,6 +81,11 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()), Subtarget(TT, std::string(CPU), std::string(FS), *this) { + if (!DisableCheckUnreachable) { + this->Options.TrapUnreachable = true; + this->Options.NoTrapAfterNoreturn = true; + } + initAsmInfo(); BPFMCAsmInfo *MAI = diff --git a/llvm/test/CodeGen/BPF/BTF/builtin_trap.ll b/llvm/test/CodeGen/BPF/BTF/builtin_trap.ll new file mode 100644 index 0000000000000..846b1264cc1ad --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/builtin_trap.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; RUN: llc -mtriple=bpfel -mcpu=v3 < %s | FileCheck -check-prefixes=CHECK %s + +; BPFTargetMachine Options.NoTrapAfterNoreturn has been set to true, +; so in below code, 'unreachable' will become a noop and +; llvm.trap() will become 'call __bpf_trap' after selectiondag. +define dso_local void @foo(i32 noundef %0) { + tail call void @llvm.trap() + unreachable +} + +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call __bpf_trap +; CHECK-NEXT: exit +; CHECK-NEXT: .Lfunc_end0: + +; CHECK-BTF: [1] FUNC_PROTO '(anon)' ret_type_id=0 vlen=0 +; CHECK-BTF: [2] FUNC '__bpf_trap' type_id=1 linkage=extern +; CHECK-BTF: [3] DATASEC '.ksyms' size=0 vlen=1 +; CHECK-BTF: type_id=2 offset=0 size=0 + +declare void @llvm.trap() #1 + +attributes #1 = {noreturn} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "test_trap.c", directory: "/some/dir") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"frame-pointer", i32 2} +!6 = !{i32 7, !"debug-info-assignment-tracking", i1 true} diff --git a/llvm/test/CodeGen/BPF/BTF/unreachable.ll b/llvm/test/CodeGen/BPF/BTF/unreachable.ll new file mode 100644 index 0000000000000..5f53a74454331 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/unreachable.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; RUN: llc -mtriple=bpfel -mcpu=v3 < %s | FileCheck -check-prefixes=CHECK %s + +define void @foo() { +entry: + tail call void @bar() + unreachable +} + +; CHECK: foo: +; CHECK-NEXT: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call bar +; CHECK-NEXT: call __bpf_trap +; CHECK-NEXT: exit +; CHECK-NEXT: .Lfunc_end0: + +define void @buz() #0 { +entry: + tail call void asm sideeffect "r0 = r1; exit;", ""() + unreachable +} + +; CHECK: buz: +; CHECK-NEXT: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: exit +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .Lfunc_end1: + +; CHECK-BTF: [1] FUNC_PROTO '(anon)' ret_type_id=0 vlen=0 +; CHECK-BTF: [2] FUNC '__bpf_trap' type_id=1 linkage=extern +; CHECK-BTF: [3] DATASEC '.ksyms' size=0 vlen=1 +; CHECK-BTF: type_id=2 offset=0 size=0 + +declare dso_local void @bar() + +attributes #0 = { naked } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "test.c", directory: "/some/dir") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} From ca6b648b828ada398493740f47dd402d959ad753 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 24 May 2025 10:19:38 -0700 Subject: [PATCH 2/2] Fix issues in BTFDebug for potential missing __bpf_trap btf The funciton __bpf_trap is created during selectiondag lowering. But the function usage could be removed during later machine-level optimization. In such cases, we will generate btf for __bpf_trap in endModule(). --- llvm/lib/Target/BPF/BTFDebug.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 8f2274eb75da8..6c6686390615e 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -1622,6 +1622,13 @@ void BTFDebug::endModule() { // Collect global types/variables except MapDef globals. processGlobals(false); + // In case that BPF_TRAP usage is removed during machine-level optimization, + // generate btf for BPF_TRAP function here. + for (const Function &F : *MMI->getModule()) { + if (F.getName() == BPF_TRAP) + processFuncPrototypes(&F); + } + for (auto &DataSec : DataSecEntries) addType(std::move(DataSec.second));