From 383bd60ab7d2b1f58fc59805fc7829703d357052 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 7 Jul 2025 16:19:59 -0700 Subject: [PATCH 01/31] [IR] llvm.reloc.none intrinsic for no-op symbol references This intrinsic emits a BFD_RELOC_NONE relocation at the point of call, which allows optimizations and languages to explicitly pull in symbols from static libraries without there being any code or data that has an effectual relocation against such a symbol. See issue #146159 for context. --- llvm/docs/LangRef.rst | 32 +++++++++++++++++++ llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 ++ llvm/include/llvm/CodeGen/SelectionDAGISel.h | 1 + llvm/include/llvm/IR/Intrinsics.td | 3 ++ llvm/include/llvm/Support/TargetOpcodes.def | 3 ++ llvm/include/llvm/Target/Target.td | 5 +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 14 ++++++++ .../SelectionDAG/SelectionDAGBuilder.cpp | 13 ++++++++ .../SelectionDAG/SelectionDAGDumper.cpp | 2 ++ .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 8 +++++ 10 files changed, 84 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2ec1cb51eda89..44032a5db4965 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -30668,6 +30668,38 @@ This intrinsic does nothing, but optimizers must consider it a use of its single operand and should try to preserve the intrinsic and its position in the function. +.. _llvm_reloc_none: + +'``llvm.reloc.none``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.reloc.none(ptrty %ptr) + +Overview: +""""""""" + +The ``llvm.reloc.none`` intrinsic emits a no-op relocation against a given +operand symbol. This can bring the symbol +definition into the link without emitting any code or data to the binary for +that purpose. + +Arguments: +"""""""""" + +The ``llvm.fake.use`` intrinsic takes one argument, which may be any global +value. + +Semantics: +"""""""""" + +This intrinsic emits a no-op relocation at the location of the intrinsic call +for the symbol that corresponds to the global value argument. + Stack Map Intrinsics -------------------- diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index c76c83d84b3c7..9ad6e6fc00fad 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1531,6 +1531,9 @@ enum NodeType { #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, #include "llvm/IR/VPIntrinsics.def" + // Issue a no-op relocation against a given symbol at the current location. + RELOC_NONE, + // The `llvm.experimental.convergence.*` intrinsics. CONVERGENCECTRL_ANCHOR, CONVERGENCECTRL_ENTRY, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 5241a51dd8cd8..c29a902be1be7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -473,6 +473,7 @@ class SelectionDAGISel { void Select_WRITE_REGISTER(SDNode *Op); void Select_UNDEF(SDNode *N); void Select_FAKE_USE(SDNode *N); + void Select_RELOC_NONE(SDNode *N); void CannotYetSelect(SDNode *N); void Select_FREEZE(SDNode *N); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 8e2e0604cb3af..d62ccceb76b62 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1913,6 +1913,9 @@ def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatch def int_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; +def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], + [IntrHasSideEffects, IntrInaccessibleMemOnly, IntrWillReturn]>; + //===---------------- Vector Predication Intrinsics --------------===// // Memory Intrinsics def int_vp_store : DefaultAttrsIntrinsic<[], diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index b905576b61791..aca7e6232df62 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -233,6 +233,9 @@ HANDLE_TARGET_OPCODE(MEMBARRIER) // using. HANDLE_TARGET_OPCODE(JUMP_TABLE_DEBUG_INFO) +// Issue a no-op relocation against a given symbol at the current location. +HANDLE_TARGET_OPCODE(RELOC_NONE) + HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ENTRY) HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ANCHOR) HANDLE_TARGET_OPCODE(CONVERGENCECTRL_LOOP) diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 38c3b6064d267..c0133af130654 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1528,6 +1528,11 @@ def JUMP_TABLE_DEBUG_INFO : StandardPseudoInstruction { let Size = 0; let isMeta = true; } +def RELOC_NONE : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins unknown:$symbol); + let hasSideEffects = true; +} let hasSideEffects = false, isMeta = true, isConvergent = true in { def CONVERGENCECTRL_ANCHOR : StandardPseudoInstruction { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cd14a4f57f760..7b4d62806d3d4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2037,6 +2037,20 @@ void AsmPrinter::emitFunctionBody() { // This is only used to influence register allocation behavior, no // actual initialization is needed. break; + case TargetOpcode::RELOC_NONE: { + // Generate a temporary label for the current PC. + MCSymbol *Sym = OutContext.createTempSymbol("reloc_none"); + OutStreamer->emitLabel(Sym); + const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext); + + assert(MI.getNumOperands() == 1 && + "RELOC_NONE can only have one operand"); + const MCExpr *Value = MCSymbolRefExpr::create( + getSymbol(MI.getOperand(0).getGlobal()), OutContext); + OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc(), + *STI); + break; + } default: emitInstruction(&MI); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 76c7e9d1ff94e..3cfe4e4c81e08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7750,6 +7750,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::reloc_none: { + SDValue V = getValue(I.getArgOperand(0)); + auto *GA = dyn_cast(V); + if (!GA) + report_fatal_error("llvm.reloc.none operand must be a GlobalValue"); + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getTargetGlobalAddress(GA->getGlobal(), sdl, V.getValueType(), + GA->getOffset()); + DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 4b2a00c2e2cfa..4e86c124a5384 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -471,6 +471,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::FAKE_USE: return "fake_use"; + case ISD::RELOC_NONE: + return "reloc_none"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ece50ed95fc49..f92961aa82cc5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2521,6 +2521,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { N->getOperand(1), N->getOperand(0)); } +void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -3296,6 +3301,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FAKE_USE: Select_FAKE_USE(NodeToMatch); return; + case ISD::RELOC_NONE: + Select_RELOC_NONE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; From cd9446389816ec3a2f43347043e9e1143c63ffff Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 8 Jul 2025 14:59:30 -0700 Subject: [PATCH 02/31] fake.use -> reloc.none --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 44032a5db4965..dcd508b72cf6e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -30691,7 +30691,7 @@ that purpose. Arguments: """""""""" -The ``llvm.fake.use`` intrinsic takes one argument, which may be any global +The ``llvm.reloc.none`` intrinsic takes one argument, which may be any global value. Semantics: From a0456268ab25c2c92f817cdf5893566877000018 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 21 Jul 2025 14:01:01 -0700 Subject: [PATCH 03/31] Take symbol name by metadata arg rather than ptr to GlobalValue --- llvm/docs/LangRef.rst | 8 ++++---- llvm/include/llvm/IR/Intrinsics.td | 2 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index dcd508b72cf6e..6a283f16ae96a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -30691,14 +30691,14 @@ that purpose. Arguments: """""""""" -The ``llvm.reloc.none`` intrinsic takes one argument, which may be any global -value. +The ``llvm.reloc.none`` intrinsic takes the symbol as a metadata string +argument. Semantics: """""""""" -This intrinsic emits a no-op relocation at the location of the intrinsic call -for the symbol that corresponds to the global value argument. +This intrinsic emits a no-op relocation for the symbol the location of the +intrinsic call. Stack Map Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d62ccceb76b62..75369f80d67fe 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1913,7 +1913,7 @@ def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatch def int_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; -def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], +def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], [IntrHasSideEffects, IntrInaccessibleMemOnly, IntrWillReturn]>; //===---------------- Vector Predication Intrinsics --------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3cfe4e4c81e08..13b894d95bbe1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7751,14 +7751,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::reloc_none: { - SDValue V = getValue(I.getArgOperand(0)); - auto *GA = dyn_cast(V); - if (!GA) - report_fatal_error("llvm.reloc.none operand must be a GlobalValue"); + Metadata *MD = cast(I.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast(MD)->getString(); + auto *M = const_cast(I.getModule()); + auto *RelocSymbol = cast( + M->getOrInsertGlobal(SymbolName, StructType::create(M->getContext()))); SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getTargetGlobalAddress(GA->getGlobal(), sdl, V.getValueType(), - GA->getOffset()); + Ops[1] = DAG.getTargetGlobalAddress( + RelocSymbol, sdl, TLI.getPointerTy(DAG.getDataLayout()), 0); DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); return; } From f439cb694cb0b8b377a285835757139ec5750daf Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 23 Jul 2025 15:01:09 -0700 Subject: [PATCH 04/31] Add a generic reloc_none test --- llvm/test/CodeGen/Generic/reloc_none.ll | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 llvm/test/CodeGen/Generic/reloc_none.ll diff --git a/llvm/test/CodeGen/Generic/reloc_none.ll b/llvm/test/CodeGen/Generic/reloc_none.ll new file mode 100644 index 0000000000000..0c8b7a57aca83 --- /dev/null +++ b/llvm/test/CodeGen/Generic/reloc_none.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s | FileCheck %s + +; CHECK: .reloc {{.*}}, BFD_RELOC_NONE, foo + +define void @test_reloc_none() { + call void @llvm.reloc.none(metadata !"foo") + ret void +} + +declare void @llvm.reloc.none(metadata) From 312f6881d034994936d34256602b9cb423934059 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 25 Jul 2025 14:09:45 -0700 Subject: [PATCH 05/31] IR verifier check and test --- llvm/lib/IR/Verifier.cpp | 6 ++++++ llvm/test/Verifier/reloc_none.ll | 13 +++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 llvm/test/Verifier/reloc_none.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 48007be924bda..c9d47d945f77c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5933,6 +5933,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(cast(Call.getArgOperand(3))->getZExtValue() < 2, "cache type argument to llvm.prefetch must be 0-1", Call); break; + case Intrinsic::reloc_none: { + Check(isa( + cast(Call.getArgOperand(0))->getMetadata()), + "llvm.reloc.none argument must be a metadata string", &Call); + break; + } case Intrinsic::stackprotector: Check(isa(Call.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); diff --git a/llvm/test/Verifier/reloc_none.ll b/llvm/test/Verifier/reloc_none.ll new file mode 100644 index 0000000000000..55c470a6a5fe6 --- /dev/null +++ b/llvm/test/Verifier/reloc_none.ll @@ -0,0 +1,13 @@ +; RUN: not opt -S -passes=verify 2>&1 < %s | FileCheck %s + +; CHECK: llvm.reloc.none argument must be a metadata string +; CHECK-NEXT: call void @llvm.reloc.none(metadata !0) + +define void @test_reloc_none_bad_arg() { + call void @llvm.reloc.none(metadata !0) + ret void +} + +declare void @llvm.reloc.none(metadata) + +!0 = !{} From 5d2bb5e8bc5c631a48cb28a118678106bc977519 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 28 Jul 2025 14:58:01 -0700 Subject: [PATCH 06/31] Remove unneeded assertion from AsmPrinter --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7b4d62806d3d4..353678ea98c1e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2042,9 +2042,6 @@ void AsmPrinter::emitFunctionBody() { MCSymbol *Sym = OutContext.createTempSymbol("reloc_none"); OutStreamer->emitLabel(Sym); const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext); - - assert(MI.getNumOperands() == 1 && - "RELOC_NONE can only have one operand"); const MCExpr *Value = MCSymbolRefExpr::create( getSymbol(MI.getOperand(0).getGlobal()), OutContext); OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc(), From f67a868608eb4bc30d5fa1587c2c1369f6e9eddd Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 28 Jul 2025 14:59:56 -0700 Subject: [PATCH 07/31] Use llvm-as for test --- llvm/test/Verifier/reloc_none.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Verifier/reloc_none.ll b/llvm/test/Verifier/reloc_none.ll index 55c470a6a5fe6..9c96799a36a36 100644 --- a/llvm/test/Verifier/reloc_none.ll +++ b/llvm/test/Verifier/reloc_none.ll @@ -1,4 +1,4 @@ -; RUN: not opt -S -passes=verify 2>&1 < %s | FileCheck %s +; RUN: not llvm-as -disable-output 2>&1 %s | FileCheck %s ; CHECK: llvm.reloc.none argument must be a metadata string ; CHECK-NEXT: call void @llvm.reloc.none(metadata !0) From eac2e300b022aaa40e543e611aa1541dee2344d1 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 29 Jul 2025 14:19:17 -0700 Subject: [PATCH 08/31] Rename reloc_none.ll to reloc-none.ll --- llvm/test/CodeGen/Generic/{reloc_none.ll => reloc-none.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/Generic/{reloc_none.ll => reloc-none.ll} (100%) diff --git a/llvm/test/CodeGen/Generic/reloc_none.ll b/llvm/test/CodeGen/Generic/reloc-none.ll similarity index 100% rename from llvm/test/CodeGen/Generic/reloc_none.ll rename to llvm/test/CodeGen/Generic/reloc-none.ll From d4b53168ee192cff9d2df5b201f01d8c7e51b3b7 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 29 Jul 2025 14:19:48 -0700 Subject: [PATCH 09/31] Lower reloc.none in Global ISel --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 10 ++++++++++ llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll | 14 ++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 541269ab6bfce..dbf13b134a38f 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2668,6 +2668,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::experimental_convergence_entry: case Intrinsic::experimental_convergence_loop: return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder); + case Intrinsic::reloc_none: { + Metadata *MD = cast(CI.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast(MD)->getString(); + auto *M = const_cast(CI.getModule()); + auto *RelocSymbol = cast( + M->getOrInsertGlobal(SymbolName, StructType::create(M->getContext()))); + MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE) + .addGlobalAddress(RelocSymbol); + return true; + } } return false; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll new file mode 100644 index 0000000000000..841c9a6d62d9e --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK + +define void @test_reloc_none() { +; CHECK-LABEL: test_reloc_none: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lreloc_none0: +; CHECK-NEXT: .reloc .Lreloc_none0, BFD_RELOC_NONE, foo +; CHECK-NEXT: retq + call void @llvm.reloc.none(metadata !"foo") + ret void +} + +declare void @llvm.reloc.none(metadata) From 2d891ba68bc06a6216f1863741bc872ddf893ee8 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 29 Jul 2025 14:36:29 -0700 Subject: [PATCH 10/31] Remove arg from emitRelocDirective call --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 353678ea98c1e..822bfc58c3fe8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2044,8 +2044,7 @@ void AsmPrinter::emitFunctionBody() { const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext); const MCExpr *Value = MCSymbolRefExpr::create( getSymbol(MI.getOperand(0).getGlobal()), OutContext); - OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc(), - *STI); + OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc()); break; } default: From 984b060954aa293c3bee7244354497db536c6865 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 29 Jul 2025 15:01:47 -0700 Subject: [PATCH 11/31] Update tests --- .../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir | 4 ++-- .../CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir | 4 ++-- .../TableGen/GlobalISelCombinerEmitter/match-table-cxx.td | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 040f97f96ee21..98522801680e1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -70,11 +70,11 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # -# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices +# DEBUG-NEXT: G_ABDS (opcode 66): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # -# DEBUG-NEXT: G_ABDU (opcode 66): 1 type index, 0 imm indices +# DEBUG-NEXT: G_ABDU (opcode 67): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 9d68a6d72c486..b0a69eab190f1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -72,11 +72,11 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # -# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices +# DEBUG-NEXT: G_ABDS (opcode 66): 1 type index, 0 imm indices # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices +# DEBUG-NEXT:G_ABDU (opcode 67): 1 type index, 0 imm indices # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td index ce4f0108b4843..9a7e21f0a9b07 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td @@ -96,7 +96,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(210), /*)*//*default:*//*Label 5*/ GIMT_Encode4(520), +// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(100), GIMT_Encode2(211), /*)*//*default:*//*Label 5*/ GIMT_Encode4(520), // CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(454), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /* 182 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(472), GIMT_Encode4(0), // CHECK-NEXT: /* 190 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(484), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), From ccafe0c783259e2b9992477c8d254046e38e9519 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 20 Aug 2025 16:24:34 -0700 Subject: [PATCH 12/31] Take symbol name by GlobalValue again to avoid modifying Module --- llvm/docs/LangRef.rst | 8 ++++---- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 +------ .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 11 ++++------- llvm/lib/IR/Verifier.cpp | 5 ++--- llvm/test/CodeGen/Generic/reloc-none.ll | 7 +++++-- llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll | 7 +++++-- llvm/test/Verifier/reloc-none.ll | 13 +++++++++++++ llvm/test/Verifier/reloc_none.ll | 13 ------------- 9 files changed, 35 insertions(+), 38 deletions(-) create mode 100644 llvm/test/Verifier/reloc-none.ll delete mode 100644 llvm/test/Verifier/reloc_none.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6a283f16ae96a..dcd508b72cf6e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -30691,14 +30691,14 @@ that purpose. Arguments: """""""""" -The ``llvm.reloc.none`` intrinsic takes the symbol as a metadata string -argument. +The ``llvm.reloc.none`` intrinsic takes one argument, which may be any global +value. Semantics: """""""""" -This intrinsic emits a no-op relocation for the symbol the location of the -intrinsic call. +This intrinsic emits a no-op relocation at the location of the intrinsic call +for the symbol that corresponds to the global value argument. Stack Map Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 75369f80d67fe..d62ccceb76b62 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1913,7 +1913,7 @@ def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatch def int_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; -def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], +def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [IntrHasSideEffects, IntrInaccessibleMemOnly, IntrWillReturn]>; //===---------------- Vector Predication Intrinsics --------------===// diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index dbf13b134a38f..fa6d953f7040d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2669,13 +2669,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::experimental_convergence_loop: return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder); case Intrinsic::reloc_none: { - Metadata *MD = cast(CI.getArgOperand(0))->getMetadata(); - StringRef SymbolName = cast(MD)->getString(); - auto *M = const_cast(CI.getModule()); - auto *RelocSymbol = cast( - M->getOrInsertGlobal(SymbolName, StructType::create(M->getContext()))); MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE) - .addGlobalAddress(RelocSymbol); + .addGlobalAddress(cast(CI.getArgOperand(0))); return true; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 13b894d95bbe1..c6ddb81aac983 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7751,15 +7751,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::reloc_none: { - Metadata *MD = cast(I.getArgOperand(0))->getMetadata(); - StringRef SymbolName = cast(MD)->getString(); - auto *M = const_cast(I.getModule()); - auto *RelocSymbol = cast( - M->getOrInsertGlobal(SymbolName, StructType::create(M->getContext()))); + SDValue V = getValue(I.getArgOperand(0)); + const auto *GA = cast(V); SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getTargetGlobalAddress( - RelocSymbol, sdl, TLI.getPointerTy(DAG.getDataLayout()), 0); + Ops[1] = DAG.getTargetGlobalAddress(GA->getGlobal(), sdl, V.getValueType(), + GA->getOffset()); DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); return; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c9d47d945f77c..5003596b14d64 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5934,9 +5934,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "cache type argument to llvm.prefetch must be 0-1", Call); break; case Intrinsic::reloc_none: { - Check(isa( - cast(Call.getArgOperand(0))->getMetadata()), - "llvm.reloc.none argument must be a metadata string", &Call); + Check(isa(Call.getArgOperand(0)), + "llvm.reloc.none argument must be a global value", &Call); break; } case Intrinsic::stackprotector: diff --git a/llvm/test/CodeGen/Generic/reloc-none.ll b/llvm/test/CodeGen/Generic/reloc-none.ll index 0c8b7a57aca83..e87d81b6d0463 100644 --- a/llvm/test/CodeGen/Generic/reloc-none.ll +++ b/llvm/test/CodeGen/Generic/reloc-none.ll @@ -2,9 +2,12 @@ ; CHECK: .reloc {{.*}}, BFD_RELOC_NONE, foo +%1 = type opaque +@foo = external global %1 + define void @test_reloc_none() { - call void @llvm.reloc.none(metadata !"foo") + call void @llvm.reloc.none(ptr @foo) ret void } -declare void @llvm.reloc.none(metadata) +declare void @llvm.reloc.none(ptr) diff --git a/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll index 841c9a6d62d9e..247d9bd798bcb 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll @@ -1,14 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK +%1 = type opaque +@foo = external global %1 + define void @test_reloc_none() { ; CHECK-LABEL: test_reloc_none: ; CHECK: # %bb.0: ; CHECK-NEXT: .Lreloc_none0: ; CHECK-NEXT: .reloc .Lreloc_none0, BFD_RELOC_NONE, foo ; CHECK-NEXT: retq - call void @llvm.reloc.none(metadata !"foo") + call void @llvm.reloc.none(ptr @foo) ret void } -declare void @llvm.reloc.none(metadata) +declare void @llvm.reloc.none(ptr) diff --git a/llvm/test/Verifier/reloc-none.ll b/llvm/test/Verifier/reloc-none.ll new file mode 100644 index 0000000000000..f025b7a7c9cb7 --- /dev/null +++ b/llvm/test/Verifier/reloc-none.ll @@ -0,0 +1,13 @@ +; RUN: not llvm-as -disable-output 2>&1 %s | FileCheck %s + +; CHECK: llvm.reloc.none argument must be a global value +; CHECK-NEXT: call void @llvm.reloc.none(ptr %foo) + +define void @test_reloc_none_bad_arg(ptr %foo) { + call void @llvm.reloc.none(ptr %foo) + ret void +} + +declare void @llvm.reloc.none(ptr) + +!0 = !{} diff --git a/llvm/test/Verifier/reloc_none.ll b/llvm/test/Verifier/reloc_none.ll deleted file mode 100644 index 9c96799a36a36..0000000000000 --- a/llvm/test/Verifier/reloc_none.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llvm-as -disable-output 2>&1 %s | FileCheck %s - -; CHECK: llvm.reloc.none argument must be a metadata string -; CHECK-NEXT: call void @llvm.reloc.none(metadata !0) - -define void @test_reloc_none_bad_arg() { - call void @llvm.reloc.none(metadata !0) - ret void -} - -declare void @llvm.reloc.none(metadata) - -!0 = !{} From bad31c7a7f60af8e1c79ffb4080b52b8cb2bccc9 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Thu, 21 Aug 2025 15:08:45 -0700 Subject: [PATCH 13/31] Update big-filter.td --- llvm/test/TableGen/FixedLenDecoderEmitter/big-filter.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/big-filter.td b/llvm/test/TableGen/FixedLenDecoderEmitter/big-filter.td index 7e2cda1bae9ed..56fc14d0fe775 100644 --- a/llvm/test/TableGen/FixedLenDecoderEmitter/big-filter.td +++ b/llvm/test/TableGen/FixedLenDecoderEmitter/big-filter.td @@ -15,10 +15,10 @@ class I : Instruction { // CHECK-NEXT: MCD::OPC_ExtractField, 0, 64, // CHECK-NEXT: MCD::OPC_FilterValue, 1, 8, 0, // CHECK-NEXT: MCD::OPC_CheckFieldOrFail, 127, 1, 1, -// CHECK-NEXT: MCD::OPC_Decode, 187, 2, 0, +// CHECK-NEXT: MCD::OPC_Decode, 188, 2, 0, // CHECK-NEXT: MCD::OPC_FilterValueOrFail, 255, 255, 255, 255, 255, 255, 255, 255, 255, 1, // CHECK-NEXT: MCD::OPC_CheckFieldOrFail, 127, 1, 0, -// CHECK-NEXT: MCD::OPC_Decode, 186, 2, 0, +// CHECK-NEXT: MCD::OPC_Decode, 187, 2, 0, // CHECK-NEXT: }; def I1 : I { From 1848a8e57031030d963095daaa4f2546eceee320 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 2 Apr 2025 16:24:57 -0700 Subject: [PATCH 14/31] [IR] "modular-format" attribute for functions using format strings A new InstCombine transform uses this attribute to rewrite calls to a modular version of the implementation along with llvm.reloc.none relocations against aspects of the implementation needed by the call. This change only adds support for the 'float' aspect, but it also builds the structure needed for others. See issue #146159 --- llvm/docs/LangRef.rst | 17 +++++ .../InstCombine/InstCombineCalls.cpp | 62 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index dcd508b72cf6e..39fa6a82f1eb6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2637,6 +2637,23 @@ For example: This attribute indicates that outlining passes should not modify the function. +``"modular_format"=",,,,"`` + This attribute indicates that the implementation is modular on a particular + format string argument . When the argument for a given call is constant, the + compiler may redirect the call to a modular implementation function + instead. + + The compiler also emits relocations to report various aspects of the format + string and arguments that were present. The compiler reports an aspect by + issing a relocation for the symbol `_``. This arranges + for code and data needed to support the aspect of the implementation to be + brought into the link to satisfy weak references in the modular + implemenation function. + + The following aspects are currently supported: + + - ``float``: The call has a floating point argument + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 42b65dde67255..bcd4aa76696c5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" @@ -4001,6 +4002,63 @@ Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { return visitCallBase(CBI); } +static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { + if (!CI->hasFnAttr("modular-format")) + return nullptr; + + SmallVector Args( + llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); + // TODO: Examine the format argument in Args[0]. + // TODO: Error handling + unsigned FirstArgIdx; + if (!llvm::to_integer(Args[1], FirstArgIdx)) + return nullptr; + if (FirstArgIdx == 0) + return nullptr; + --FirstArgIdx; + StringRef FnName = Args[2]; + StringRef ImplName = Args[3]; + DenseSet Aspects(llvm::from_range, + ArrayRef(Args).drop_front(4)); + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionCallee ModularFn = + M->getOrInsertFunction(FnName, Callee->getFunctionType(), + Callee->getAttributes().removeFnAttribute( + M->getContext(), "modular-format")); + CallInst *New = cast(CI->clone()); + New->setCalledFunction(ModularFn); + New->removeFnAttr("modular-format"); + B.Insert(New); + + const auto ReferenceAspect = [&](StringRef Aspect) { + SmallString<20> Name = ImplName; + Name += '_'; + Name += Aspect; + Constant *Sym = + M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); + Function *RelocNoneFn = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); + B.CreateCall(RelocNoneFn, {Sym}); + }; + + if (Aspects.contains("float")) { + Aspects.erase("float"); + if (llvm::any_of( + llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx), + CI->arg_end()), + [](Value *V) { return V->getType()->isFloatingPointTy(); })) + ReferenceAspect("float"); + } + + SmallVector UnknownAspects(Aspects.begin(), Aspects.end()); + llvm::sort(UnknownAspects); + for (StringRef Request : UnknownAspects) + ReferenceAspect(Request); + + return New; +} + Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; @@ -4022,6 +4080,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { ++NumSimplified; return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); } + if (Value *With = optimizeModularFormat(CI, Builder)) { + ++NumSimplified; + return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); + } return nullptr; } From f6e5d881a01b22347f69dfb1a69137fbec35032d Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 8 Jul 2025 15:11:42 -0700 Subject: [PATCH 15/31] issing -> issuing --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 39fa6a82f1eb6..b9e57edadca9b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2645,7 +2645,7 @@ For example: The compiler also emits relocations to report various aspects of the format string and arguments that were present. The compiler reports an aspect by - issing a relocation for the symbol `_``. This arranges + issuing a relocation for the symbol `_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. From 4d10a1d10a5ab380487e193cdb8cf4dae68baac1 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 21 Jul 2025 15:09:58 -0700 Subject: [PATCH 16/31] Emit reloc.none instinsic with metdata string arg --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bcd4aa76696c5..e11d9f91a3a4c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4021,11 +4021,12 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { DenseSet Aspects(llvm::from_range, ArrayRef(Args).drop_front(4)); Module *M = CI->getModule(); + LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); FunctionCallee ModularFn = M->getOrInsertFunction(FnName, Callee->getFunctionType(), Callee->getAttributes().removeFnAttribute( - M->getContext(), "modular-format")); + Ctx, "modular-format")); CallInst *New = cast(CI->clone()); New->setCalledFunction(ModularFn); New->removeFnAttr("modular-format"); @@ -4035,11 +4036,10 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallString<20> Name = ImplName; Name += '_'; Name += Aspect; - Constant *Sym = - M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); Function *RelocNoneFn = Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); - B.CreateCall(RelocNoneFn, {Sym}); + B.CreateCall(RelocNoneFn, + {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))}); }; if (Aspects.contains("float")) { From 2f49b8c8d7da25dd8a6bf823c9b4a7ab653b0780 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:24:20 -0700 Subject: [PATCH 17/31] Correct modular_format to modular-format in docs --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b9e57edadca9b..07c63ef96db07 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2637,7 +2637,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular_format"=",,,,"`` +``"modular-format"=",,,,"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function From d510d67d28e93c24f51f4c80a7efe28e9ebf1cad Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:26:20 -0700 Subject: [PATCH 18/31] Describe the semantics of the arguments copied from C format attr --- llvm/docs/LangRef.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 07c63ef96db07..0a981214983fe 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2650,6 +2650,9 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. + The first two arguments have the same semantics as the arguments to the C + ``format`` attribute. + The following aspects are currently supported: - ``float``: The call has a floating point argument From ca8b788eb3875731365e6581de3de6cfaffd92dd Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:29:09 -0700 Subject: [PATCH 19/31] Add a type arg --- llvm/docs/LangRef.rst | 6 ++++-- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 0a981214983fe..3132a3b84f1b7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2637,7 +2637,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular-format"=",,,,"`` +``"modular-format"=",,,,,"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function @@ -2650,13 +2650,15 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. - The first two arguments have the same semantics as the arguments to the C + The first three arguments have the same semantics as the arguments to the C ``format`` attribute. The following aspects are currently supported: - ``float``: The call has a floating point argument + + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e11d9f91a3a4c..bedc57014d215 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4008,18 +4008,18 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallVector Args( llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); - // TODO: Examine the format argument in Args[0]. + // TODO: Make use of the first two arguments // TODO: Error handling unsigned FirstArgIdx; - if (!llvm::to_integer(Args[1], FirstArgIdx)) + if (!llvm::to_integer(Args[2], FirstArgIdx)) return nullptr; if (FirstArgIdx == 0) return nullptr; --FirstArgIdx; - StringRef FnName = Args[2]; - StringRef ImplName = Args[3]; + StringRef FnName = Args[3]; + StringRef ImplName = Args[4]; DenseSet Aspects(llvm::from_range, - ArrayRef(Args).drop_front(4)); + ArrayRef(Args).drop_front(5)); Module *M = CI->getModule(); LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); From 34aeb6424bf5ec2458797c2e0e26050cfc950f26 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Thu, 21 Aug 2025 14:49:14 -0700 Subject: [PATCH 20/31] llvm.reloc.none takes a GlobalValue again This avoids avoid modifying Module in ISel --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bedc57014d215..b70ac950ed8fc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4021,12 +4021,11 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { DenseSet Aspects(llvm::from_range, ArrayRef(Args).drop_front(5)); Module *M = CI->getModule(); - LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); FunctionCallee ModularFn = M->getOrInsertFunction(FnName, Callee->getFunctionType(), Callee->getAttributes().removeFnAttribute( - Ctx, "modular-format")); + M->getContext(), "modular-format")); CallInst *New = cast(CI->clone()); New->setCalledFunction(ModularFn); New->removeFnAttr("modular-format"); @@ -4036,10 +4035,11 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallString<20> Name = ImplName; Name += '_'; Name += Aspect; + Constant *Sym = + M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); Function *RelocNoneFn = Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); - B.CreateCall(RelocNoneFn, - {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))}); + B.CreateCall(RelocNoneFn, {Sym}); }; if (Aspects.contains("float")) { From 9dff0b30f8d1a43bef141dd54dd69b475306ade0 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 26 Aug 2025 14:28:34 -0700 Subject: [PATCH 21/31] Add test cases and polish change --- llvm/docs/LangRef.rst | 21 ++-- llvm/lib/IR/Verifier.cpp | 14 +++ .../InstCombine/InstCombineCalls.cpp | 8 +- .../Transforms/InstCombine/modular-format.ll | 104 ++++++++++++++++++ llvm/test/Verifier/modular-format.ll | 41 +++++++ 5 files changed, 172 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/modular-format.ll create mode 100644 llvm/test/Verifier/modular-format.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3132a3b84f1b7..2c1ee150aea9a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2637,18 +2637,17 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular-format"=",,,,,"`` +``"modular-format"=",,,,,"`` This attribute indicates that the implementation is modular on a particular - format string argument . When the argument for a given call is constant, the - compiler may redirect the call to a modular implementation function - instead. - - The compiler also emits relocations to report various aspects of the format - string and arguments that were present. The compiler reports an aspect by - issuing a relocation for the symbol `_``. This arranges - for code and data needed to support the aspect of the implementation to be - brought into the link to satisfy weak references in the modular - implemenation function. + format string argument. If the compiler can determine that not all aspects + of the implementation are needed, it can report which aspects were needed + and redirect the call to a modular implementation function instead. + + The compiler reports that an implementation aspect is needed by issuing a + relocation for the symbol `_``. This arranges for code + and data needed to support the aspect of the implementation to be brought + into the link to satisfy weak references in the modular implemenation + function. The first three arguments have the same semantics as the arguments to the C ``format`` attribute. diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 5003596b14d64..8836f2672a239 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2522,6 +2522,20 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, CheckFailed("invalid value for 'denormal-fp-math-f32' attribute: " + S, V); } + + if (auto A = Attrs.getFnAttr("modular-format"); A.isValid()) { + StringRef S = A.getValueAsString(); + SmallVector Args; + S.split(Args, ','); + Check(Args.size() >= 5, + "modular-format attribute requires at least 5 arguments", V); + unsigned FirstArgIdx; + Check(!Args[2].getAsInteger(10, FirstArgIdx), + "modular-format attribute first arg index is not an integer", V); + unsigned UpperBound = FT->getNumParams() + (FT->isVarArg() ? 1 : 0); + Check(FirstArgIdx > 0 && FirstArgIdx <= UpperBound, + "modular-format attribute first arg index is out of bounds", V); + } } void Verifier::verifyFunctionMetadata( diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b70ac950ed8fc..f8c27ed7d23b5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4009,12 +4009,10 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallVector Args( llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); // TODO: Make use of the first two arguments - // TODO: Error handling unsigned FirstArgIdx; - if (!llvm::to_integer(Args[2], FirstArgIdx)) - return nullptr; - if (FirstArgIdx == 0) - return nullptr; + [[maybe_unused]] bool Error; + Error = Args[2].getAsInteger(10, FirstArgIdx); + assert(!Error && "invalid first arg index"); --FirstArgIdx; StringRef FnName = Args[3]; StringRef ImplName = Args[4]; diff --git a/llvm/test/Transforms/InstCombine/modular-format.ll b/llvm/test/Transforms/InstCombine/modular-format.ll new file mode 100644 index 0000000000000..9b1e60bbab4f8 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/modular-format.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test that the modular format string library call simplifier works correctly. +; +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@.str.int = constant [3 x i8] c"%d\00" +@.str.float = constant [3 x i8] c"%f\00" +@.str.multi = constant [6 x i8] c"%f %d\00" +@.str.multifp = constant [6 x i8] c"%f %f\00" +@.str.noargs = constant [1 x i8] c"\00" + +; Basic Transformation +define void @test_basic(i32 %arg) { +; CHECK-LABEL: @test_basic( +; CHECK-NEXT: call void (ptr, ...) @basic_mod(ptr nonnull @.str.int, i32 [[ARG:%.*]]) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @basic(ptr @.str.int, i32 %arg) + ret void +} + +declare void @basic(ptr, ...) "modular-format"="printf,1,2,basic_mod,basic_impl" +; "float" Aspect - Present +define void @test_float_present(double %arg) { +; CHECK-LABEL: @test_float_present( +; CHECK-NEXT: call void (ptr, ...) @float_present_mod(ptr nonnull @.str.float, double [[ARG:%.*]]) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_float) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @float_present(ptr @.str.float, double %arg) + ret void +} + +declare void @float_present(ptr, ...) #0 + +; Unknown Aspects +define void @test_unknown_aspects(i32 %arg) { +; CHECK-LABEL: @test_unknown_aspects( +; CHECK-NEXT: call void (ptr, ...) @unknown_aspects_mod(ptr nonnull @.str.int, i32 [[ARG:%.*]]) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_unknown1) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_unknown2) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @unknown_aspects(ptr @.str.int, i32 %arg) + ret void +} + +declare void @unknown_aspects(ptr, ...) "modular-format"="printf,1,2,unknown_aspects_mod,basic_impl,unknown1,unknown2" + +; Multiple Aspects +define void @test_multiple_aspects(double %arg1, i32 %arg2) { +; CHECK-LABEL: @test_multiple_aspects( +; CHECK-NEXT: call void (ptr, ...) @multiple_aspects_mod(ptr nonnull @.str.multi, double [[ARG1:%.*]], i32 [[ARG2:%.*]]) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_float) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_unknown) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @multiple_aspects(ptr @.str.multi, double %arg1, i32 %arg2) + ret void +} + +declare void @multiple_aspects(ptr, ...) "modular-format"="printf,1,2,multiple_aspects_mod,basic_impl,float,unknown" + +; Multiple Floating-Point Arguments +define void @test_multiple_fp_args(double %arg1, float %arg2) { +; CHECK-LABEL: @test_multiple_fp_args( +; CHECK-NEXT: call void (ptr, ...) @float_present_mod(ptr nonnull @.str.multifp, double [[ARG1:%.*]], float [[ARG2:%.*]]) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_float) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @multiple_fp_args(ptr @.str.multifp, double %arg1, float %arg2) + ret void +} + +declare void @multiple_fp_args(ptr, ...) #0 + +; No Arguments to Check +define void @test_no_args_to_check() { +; CHECK-LABEL: @test_no_args_to_check( +; CHECK-NEXT: call void (ptr, ...) @float_present_mod(ptr nonnull @.str.noargs) +; CHECK-NEXT: ret void +; + call void (ptr, ...) @no_args_to_check(ptr @.str.noargs) + ret void +} + +declare void @no_args_to_check(ptr, ...) #0 + +; First argument index != 2 +define void @test_first_arg_idx(i32 %ignored, double %arg) { +; CHECK-LABEL: @test_first_arg_idx( +; CHECK-NEXT: call void (i32, ptr, ...) @first_arg_idx_mod(i32 [[IGNORED:%.*]], ptr nonnull @.str.float, double [[ARG:%.*]]) +; CHECK-NEXT: call void @llvm.reloc.none(ptr nonnull @basic_impl_float) +; CHECK-NEXT: ret void +; + call void (i32, ptr, ...) @first_arg_idx(i32 %ignored, ptr @.str.float, double %arg) + ret void +} + +declare void @first_arg_idx(i32, ptr, ...) "modular-format"="printf,2,3,first_arg_idx_mod,basic_impl,float" + +attributes #0 = { "modular-format"="printf,1,2,float_present_mod,basic_impl,float" } diff --git a/llvm/test/Verifier/modular-format.ll b/llvm/test/Verifier/modular-format.ll new file mode 100644 index 0000000000000..abdd73d098be1 --- /dev/null +++ b/llvm/test/Verifier/modular-format.ll @@ -0,0 +1,41 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +define void @test_too_few_arguments(i32 %arg, ...) "modular-format"="printf,1,2,basic_mod" { + ret void +} +; CHECK: modular-format attribute requires at least 5 arguments +; CHECK-NEXT: ptr @test_too_few_arguments + +define void @test_first_arg_index_not_integer(i32 %arg, ...) "modular-format"="printf,1,foo,basic_mod,basic_impl" { + ret void +} +; CHECK: modular-format attribute first arg index is not an integer +; CHECK-NEXT: ptr @test_first_arg_index_not_integer + +define void @test_first_arg_index_zero(i32 %arg) "modular-format"="printf,1,0,basic_mod,basic_impl" { + ret void +} +; CHECK: modular-format attribute first arg index is out of bounds +; CHECK-NEXT: ptr @test_first_arg_index_zero + +define void @test_first_arg_index_out_of_bounds(i32 %arg) "modular-format"="printf,1,2,basic_mod,basic_impl" { + ret void +} +; CHECK: modular-format attribute first arg index is out of bounds +; CHECK-NEXT: ptr @test_first_arg_index_out_of_bounds + +define void @test_first_arg_index_out_of_bounds_varargs(i32 %arg, ...) "modular-format"="printf,1,3,basic_mod,basic_impl" { + ret void +} +; CHECK: modular-format attribute first arg index is out of bounds +; CHECK-NEXT: ptr @test_first_arg_index_out_of_bounds_varargs + +; CHECK-NOT: ptr @test_first_arg_index_in_bounds +define void @test_first_arg_index_in_bounds(i32 %arg) "modular-format"="printf,1,1,basic_mod,basic_impl" { + ret void +} + +; CHECK-NOT: ptr @test_first_arg_index_in_bounds_varargs +define void @test_first_arg_index_in_bounds_varargs(i32 %arg, ...) "modular-format"="printf,1,2,basic_mod,basic_impl" { + ret void +} From 7adc2bd41f4df5a53c404659639ed52d4b90e8ca Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 10 Jun 2025 14:06:53 -0700 Subject: [PATCH 22/31] [clang] "modular_format" attribute for functions using format strings This provides a C language version of the new IR modular-format attribute. This, in concert with the format attribute, allows a library function to declare that a modular version of its implementation is available. See issue #146159 for context. --- clang/include/clang/Basic/Attr.td | 11 +++++++++++ clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++ clang/lib/CodeGen/CGCall.cpp | 12 ++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 29364c5903d31..1c7cb5dba5419 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5240,3 +5240,14 @@ def NonString : InheritableAttr { let Subjects = SubjectList<[Var, Field]>; let Documentation = [NonStringDocs]; } + +def ModularFormat : InheritableAttr { + let Spellings = [Clang<"modular_format">]; + let Args = [ + IdentifierArgument<"ModularImplFn">, + StringArgument<"ImplName">, + VariadicStringArgument<"Aspects"> + ]; + let Subjects = SubjectList<[Function]>; + let Documentation = [ModularFormatDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2504841f6df33..47d655444fa49 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9605,3 +9605,28 @@ silence diagnostics with code like: __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed }]; } + +def ModularFormatDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``modular_format`` attribute can be applied to a function that bears the +``format`` attribute to indicate that the implementation is modular on the +format string argument. When the format argument for a given call is constant, +the compiler may redirect the call to the symbol given as the first argument to +the attribute (the modular implementation function). + +The second argument is a implementation name, and the remaining arguments are +aspects of the format string for the compiler to report. If the compiler does +not understand a aspect, it must summarily report that the format string has +that aspect. + +The compiler reports an aspect by issing a relocation for the symbol +`_``. This arranges for code and data needed to support the +aspect of the implementation to be brought into the link to satisfy weak +references in the modular implemenation function. + +The following aspects are currently supported: + +- ``float``: The call has a floating point argument + }]; +} diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a94a7ed51521c..bf1ddb7bf8fe9 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2560,6 +2560,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (TargetDecl->hasAttr()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); + + if (auto *ModularFormat = TargetDecl->getAttr()) { + // TODO: Error checking + FormatAttr *Format = TargetDecl->getAttr(); + std::string FormatIdx = std::to_string(Format->getFormatIdx()); + std::string FirstArg = std::to_string(Format->getFirstArg()); + SmallVector Args = { + FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + ModularFormat->getImplName()}; + llvm::append_range(Args, ModularFormat->aspects()); + FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); + } } // Attach "no-builtins" attributes to: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 3ded60cd8b073..32a5e895a675c 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6751,6 +6751,29 @@ static void handleVTablePointerAuthentication(Sema &S, Decl *D, CustomDiscriminationValue)); } +static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef ImplName; + if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName)) + return; + SmallVector Aspects; + for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) { + StringRef Aspect; + if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect)) + return; + Aspects.push_back(Aspect); + } + + // Store aspects sorted and without duplicates. + llvm::sort(Aspects); + Aspects.erase(llvm::unique(Aspects), Aspects.end()); + + // TODO: Type checking on identifier + // TODO: Merge attributes + D->addAttr(::new (S.Context) ModularFormatAttr( + S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName, + Aspects.data(), Aspects.size())); +} + //===----------------------------------------------------------------------===// // Top Level Sema Entry Points //===----------------------------------------------------------------------===// @@ -7681,6 +7704,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_VTablePointerAuthentication: handleVTablePointerAuthentication(S, D, AL); break; + + case ParsedAttr::AT_ModularFormat: + handleModularFormat(S, D, AL); + break; } } From 78a37cc3f116af211f538cc8476d4ac94dc8ce2a Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 15 Jul 2025 11:28:20 -0700 Subject: [PATCH 23/31] Update docs to account for clang inferring format attribute --- clang/include/clang/Basic/AttrDocs.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 47d655444fa49..51af9eca2c512 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9610,10 +9610,11 @@ def ModularFormatDocs : Documentation { let Category = DocCatFunction; let Content = [{ The ``modular_format`` attribute can be applied to a function that bears the -``format`` attribute to indicate that the implementation is modular on the -format string argument. When the format argument for a given call is constant, -the compiler may redirect the call to the symbol given as the first argument to -the attribute (the modular implementation function). +``format`` attribute (or standard library functions) to indicate that the +implementation is modular on the format string argument. When the format string +for a given call is constant, the compiler may redirect the call to the symbol +given as the first argument to the attribute (the modular implementation +function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does From f5681b3910c49938e305f9f3564c00a78f2118dc Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 16 Jul 2025 15:19:37 -0700 Subject: [PATCH 24/31] Add an example to clang attr doc --- clang/include/clang/Basic/AttrDocs.td | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 51af9eca2c512..a30935df3dcf9 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9622,10 +9622,18 @@ not understand a aspect, it must summarily report that the format string has that aspect. The compiler reports an aspect by issing a relocation for the symbol -`_``. This arranges for code and data needed to support the +``_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. +For example, say ``printf`` is annotated with +``modular_format(__modular_printf, __printf, float)``. Then, a call to +``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would +become a call to ``__modular_printf`` with the same arguments, as would +``printf("%f", 42.0)``. The latter would be accompanied with a strong +relocation against the symbol ``__printf_float``, which would bring floating +point support for ``printf`` into the link. + The following aspects are currently supported: - ``float``: The call has a floating point argument From aaeb8a95da15d411e9cf730cbfa8654273c0b7c1 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:35:46 -0700 Subject: [PATCH 25/31] Emit the new type arg from format attr --- clang/lib/CodeGen/CGCall.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bf1ddb7bf8fe9..e50ed1c2f273c 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2564,10 +2564,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (auto *ModularFormat = TargetDecl->getAttr()) { // TODO: Error checking FormatAttr *Format = TargetDecl->getAttr(); + StringRef Type = Format->getType()->getName(); std::string FormatIdx = std::to_string(Format->getFormatIdx()); std::string FirstArg = std::to_string(Format->getFirstArg()); SmallVector Args = { - FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + Type, FormatIdx, FirstArg, + ModularFormat->getModularImplFn()->getName(), ModularFormat->getImplName()}; llvm::append_range(Args, ModularFormat->aspects()); FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); From 3ed12cdd97660ea871d30a3b43aff1caaad217bb Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 15:01:56 -0700 Subject: [PATCH 26/31] Correct typos --- clang/include/clang/Basic/AttrDocs.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index a30935df3dcf9..e2b910c058e72 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9618,10 +9618,10 @@ function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does -not understand a aspect, it must summarily report that the format string has +not understand an aspect, it must summarily report that the format string has that aspect. -The compiler reports an aspect by issing a relocation for the symbol +The compiler reports an aspect by issuing a relocation for the symbol ``_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. From 5498b26a5ac6f98042573ff254d12379ca2d677c Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Thu, 19 Dec 2024 11:57:27 -0800 Subject: [PATCH 27/31] [libc] Modular printf option (float only) This adds LIBC_CONF_PRINTF_MODULAR, which causes floating point support (later, others) to be weakly linked into the implementation. __printf_modular becomes the main entry point of the implementaiton, an printf itself wraps __printf_modular. printf it also contains a BFD_RELOC_NONE relocation to bring in the float aspect. See issue #146159 for context. --- libc/config/config.json | 4 ++ libc/docs/configure.rst | 1 + libc/src/stdio/generic/CMakeLists.txt | 7 ++- libc/src/stdio/generic/printf_modular.cpp | 40 +++++++++++++ libc/src/stdio/printf.h | 1 + libc/src/stdio/printf_core/CMakeLists.txt | 7 ++- .../stdio/printf_core/float_dec_converter.h | 25 +++++++-- .../printf_core/float_dec_converter_limited.h | 24 ++++++-- .../stdio/printf_core/float_hex_converter.h | 10 +++- libc/src/stdio/printf_core/float_impl.cpp | 41 ++++++++++++++ libc/src/stdio/printf_core/parser.h | 56 ++++++++++++++----- libc/src/stdio/printf_core/printf_config.h | 7 +++ libc/src/stdio/printf_core/printf_main.h | 13 ++++- .../src/stdio/printf_core/vfprintf_internal.h | 13 ++++- 14 files changed, 216 insertions(+), 33 deletions(-) create mode 100644 libc/src/stdio/generic/printf_modular.cpp create mode 100644 libc/src/stdio/printf_core/float_impl.cpp diff --git a/libc/config/config.json b/libc/config/config.json index cfbe9a43948ea..b73fc85758a3b 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -51,6 +51,10 @@ "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": { "value": true, "doc": "Use dynamic dispatch for the output mechanism to reduce code size." + }, + "LIBC_CONF_PRINTF_MODULAR": { + "value": true, + "doc": "Split printf implementation into modules that can be lazily linked in." } }, "scanf": { diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index e23fc824ce7c8..ed383c6f668a4 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -45,6 +45,7 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for faster and smaller but less accurate printf doubles. - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative printf float implementation based on 320-bit floats - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance. + - ``LIBC_CONF_PRINTF_MODULAR``: Split printf implementation into modules that can be lazily linked in. - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the output mechanism to reduce code size. * **"pthread" options** - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100). diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 6361822b61999..41b18bc7195ca 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -412,10 +412,15 @@ if(LLVM_LIBC_FULL_BUILD) ) endif() +set(printf_srcs printf.cpp) +if (LIBC_CONF_PRINTF_MODULAR) + list(APPEND printf_srcs printf_modular.cpp) +endif() + add_generic_entrypoint_object( printf SRCS - printf.cpp + ${printf_srcs} HDRS ../printf.h DEPENDS diff --git a/libc/src/stdio/generic/printf_modular.cpp b/libc/src/stdio/generic/printf_modular.cpp new file mode 100644 index 0000000000000..3a6a580002062 --- /dev/null +++ b/libc/src/stdio/generic/printf_modular.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of printf_modular-----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/printf.h" + +#include "src/__support/File/file.h" +#include "src/__support/arg_list.h" +#include "src/__support/macros/config.h" +#include "src/stdio/printf_core/vfprintf_internal.h" + +#include "hdr/types/FILE.h" +#include + +#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE +#define PRINTF_STDOUT LIBC_NAMESPACE::stdout +#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE +#define PRINTF_STDOUT ::stdout +#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, __printf_modular, + (const char *__restrict format, ...)) { + va_list vlist; + va_start(vlist, format); + internal::ArgList args(vlist); // This holder class allows for easier copying + // and pointer semantics, as well as handling + // destruction automatically. + va_end(vlist); + int ret_val = printf_core::vfprintf_internal_modular( + reinterpret_cast<::FILE *>(PRINTF_STDOUT), format, args); + return ret_val; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf.h b/libc/src/stdio/printf.h index 9e47ad8680f9c..81b7d866a6a59 100644 --- a/libc/src/stdio/printf.h +++ b/libc/src/stdio/printf.h @@ -15,6 +15,7 @@ namespace LIBC_NAMESPACE_DECL { int printf(const char *__restrict format, ...); +int __printf_modular(const char *__restrict format, ...); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 76eb0a2fdaaa5..0b4affa8e1cf8 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -28,6 +28,9 @@ endif() if(LIBC_CONF_PRINTF_RUNTIME_DISPATCH) list(APPEND printf_config_copts "-DLIBC_COPT_PRINTF_RUNTIME_DISPATCH") endif() +if(LIBC_CONF_PRINTF_MODULAR) + list(APPEND printf_config_copts "-DLIBC_COPT_PRINTF_MODULAR") +endif() if(printf_config_copts) list(PREPEND printf_config_copts "COMPILE_OPTIONS") endif() @@ -114,10 +117,12 @@ add_header_library( libc.src.__support.StringUtil.error_to_string ) -add_header_library( +add_object_library( printf_main HDRS printf_main.h + SRCS + float_impl.cpp DEPENDS .parser .converter diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index ed004f9a26a13..deeb566bd4092 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -1122,11 +1122,23 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer, } } +template +LIBC_PRINTF_MODULAR_DECL int +convert_float_decimal(Writer *writer, const FormatSection &to_conv); +template +LIBC_PRINTF_MODULAR_DECL int +convert_float_dec_exp(Writer *writer, const FormatSection &to_conv); +template +LIBC_PRINTF_MODULAR_DECL int +convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv); + +#ifdef LIBC_PRINTF_DEFINE_MODULAR // TODO: unify the float converters to remove the duplicated checks for inf/nan. template -LIBC_INLINE int convert_float_decimal(Writer *writer, - const FormatSection &to_conv) { +int convert_float_decimal(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); @@ -1147,8 +1159,8 @@ LIBC_INLINE int convert_float_decimal(Writer *writer, } template -LIBC_INLINE int convert_float_dec_exp(Writer *writer, - const FormatSection &to_conv) { +int convert_float_dec_exp(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); @@ -1169,8 +1181,8 @@ LIBC_INLINE int convert_float_dec_exp(Writer *writer, } template -LIBC_INLINE int convert_float_dec_auto(Writer *writer, - const FormatSection &to_conv) { +int convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); @@ -1189,6 +1201,7 @@ LIBC_INLINE int convert_float_dec_auto(Writer *writer, return convert_inf_nan(writer, to_conv); } +#endif } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h index f468dbc8e2ae8..9804a38964be0 100644 --- a/libc/src/stdio/printf_core/float_dec_converter_limited.h +++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h @@ -676,22 +676,34 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer, } template -LIBC_INLINE int convert_float_decimal(Writer *writer, - const FormatSection &to_conv) { +LIBC_PRINTF_MODULAR_DECL int convert_float_decimal(Writer *writer, + const FormatSection &to_conv); +template +LIBC_PRINTF_MODULAR_DECL int convert_float_dec_exp(Writer *writer, + const FormatSection &to_conv); +template +LIBC_PRINTF_MODULAR_DECL int convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv); + +#ifdef LIBC_PRINTF_DEFINE_MODULAR +template +int convert_float_decimal(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::F); } template -LIBC_INLINE int convert_float_dec_exp(Writer *writer, - const FormatSection &to_conv) { +int convert_float_dec_exp(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::E); } template -LIBC_INLINE int convert_float_dec_auto(Writer *writer, - const FormatSection &to_conv) { +int convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::G); } +#endif } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 16592e7bac932..fa724066813d7 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -26,8 +26,13 @@ namespace LIBC_NAMESPACE_DECL { namespace printf_core { template -LIBC_INLINE int convert_float_hex_exp(Writer *writer, - const FormatSection &to_conv) { +LIBC_PRINTF_MODULAR_DECL int convert_float_hex_exp(Writer *writer, + const FormatSection &to_conv); + +#ifdef LIBC_PRINTF_DEFINE_MODULAR +template +int convert_float_hex_exp(Writer *writer, + const FormatSection &to_conv) { using LDBits = fputil::FPBits; using StorageType = LDBits::StorageType; @@ -254,6 +259,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, } return WRITE_OK; } +#endif } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_impl.cpp b/libc/src/stdio/printf_core/float_impl.cpp new file mode 100644 index 0000000000000..e7c9ba39aa148 --- /dev/null +++ b/libc/src/stdio/printf_core/float_impl.cpp @@ -0,0 +1,41 @@ +#ifdef LIBC_COPT_PRINTF_MODULAR +#include "src/__support/arg_list.h" + +#define LIBC_PRINTF_DEFINE_MODULAR +#include "src/stdio/printf_core/float_dec_converter.h" +#include "src/stdio/printf_core/float_hex_converter.h" +#include "src/stdio/printf_core/parser.h" + +namespace LIBC_NAMESPACE_DECL { +namespace printf_core { +template class Parser; +template class Parser>; +template class Parser>; +template class Parser>; +template class Parser>; + +#define INSTANTIATE_CONVERT_FN(NAME) \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv) + +INSTANTIATE_CONVERT_FN(convert_float_decimal); +INSTANTIATE_CONVERT_FN(convert_float_dec_exp); +INSTANTIATE_CONVERT_FN(convert_float_dec_auto); +INSTANTIATE_CONVERT_FN(convert_float_hex_exp); + +} // namespace printf_core +} // namespace LIBC_NAMESPACE_DECL + +// Bring this file into the link if __printf_float is referenced. +extern "C" void __printf_float() {} +#endif diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index cef9b1ae58fa0..5a1eea36b603f 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -236,11 +236,7 @@ template class Parser { case ('A'): case ('g'): case ('G'): - if (lm != LengthModifier::L) { - WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index); - } else { - WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index); - } + write_float_arg_val(section, lm, conv_index); break; #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT @@ -299,6 +295,12 @@ template class Parser { return section; } + LIBC_PRINTF_MODULAR_DECL void write_float_arg_val(FormatSection §ion, + LengthModifier lm, + size_t conv_index); + LIBC_PRINTF_MODULAR_DECL TypeDesc float_type_desc(LengthModifier lm); + LIBC_PRINTF_MODULAR_DECL bool advance_arg_if_float(TypeDesc cur_type_desc); + private: // parse_flags parses the flags inside a format string. It assumes that // str[*local_pos] is inside a format specifier, and parses any flags it @@ -474,10 +476,9 @@ template class Parser { args_cur.template next_var(); #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT // Floating point numbers are stored separately from the other arguments. - else if (cur_type_desc == type_desc_from_type()) - args_cur.template next_var(); - else if (cur_type_desc == type_desc_from_type()) - args_cur.template next_var(); + else if (&Parser::advance_arg_if_float && + advance_arg_if_float(cur_type_desc)) + ; #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT // Floating point numbers may be stored separately from the other @@ -630,10 +631,7 @@ template class Parser { case ('A'): case ('g'): case ('G'): - if (lm != LengthModifier::L) - conv_size = type_desc_from_type(); - else - conv_size = type_desc_from_type(); + conv_size = float_type_desc(lm); break; #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT @@ -682,6 +680,38 @@ template class Parser { #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE }; +#ifdef LIBC_PRINTF_DEFINE_MODULAR +template +void Parser::write_float_arg_val(FormatSection §ion, + LengthModifier lm, + size_t conv_index) { + if (lm != LengthModifier::L) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index); + } else { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index); + } +} + +template +TypeDesc Parser::float_type_desc(LengthModifier lm) { + if (lm != LengthModifier::L) + return type_desc_from_type(); + else + return type_desc_from_type(); +} + +template +bool Parser::advance_arg_if_float(TypeDesc cur_type_desc) { + if (cur_type_desc == type_desc_from_type()) + args_cur.template next_var(); + else if (cur_type_desc == type_desc_from_type()) + args_cur.template next_var(); + else + return false; + return true; +} +#endif + } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/printf_config.h b/libc/src/stdio/printf_core/printf_config.h index 8a48abdd170ec..8f6ae8b41bc92 100644 --- a/libc/src/stdio/printf_core/printf_config.h +++ b/libc/src/stdio/printf_core/printf_config.h @@ -48,4 +48,11 @@ // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS +#ifdef LIBC_COPT_PRINTF_MODULAR +#define LIBC_PRINTF_MODULAR_DECL [[gnu::weak]] +#else +#define LIBC_PRINTF_MODULAR_DECL LIBC_INLINE +#define LIBC_PRINTF_DEFINE_MODULAR +#endif + #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_CONFIG_H diff --git a/libc/src/stdio/printf_core/printf_main.h b/libc/src/stdio/printf_core/printf_main.h index 57f29858d5298..c77922bb4f044 100644 --- a/libc/src/stdio/printf_core/printf_main.h +++ b/libc/src/stdio/printf_core/printf_main.h @@ -22,8 +22,8 @@ namespace LIBC_NAMESPACE_DECL { namespace printf_core { template -int printf_main(Writer *writer, const char *__restrict str, - internal::ArgList &args) { +int printf_main_modular(Writer *writer, const char *__restrict str, + internal::ArgList &args) { Parser parser(str, args); int result = 0; for (FormatSection cur_section = parser.get_next_section(); @@ -41,6 +41,15 @@ int printf_main(Writer *writer, const char *__restrict str, return writer->get_chars_written(); } +template +int printf_main(Writer *writer, const char *__restrict str, + internal::ArgList &args) { +#ifdef LIBC_COPT_PRINTF_MODULAR + __asm__ __volatile__ (".reloc ., BFD_RELOC_NONE, __printf_float"); +#endif + return printf_main_modular(writer, str, args); +} + } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/vfprintf_internal.h b/libc/src/stdio/printf_core/vfprintf_internal.h index 630de9d9d43dd..c9d6fce458409 100644 --- a/libc/src/stdio/printf_core/vfprintf_internal.h +++ b/libc/src/stdio/printf_core/vfprintf_internal.h @@ -67,7 +67,7 @@ LIBC_INLINE int file_write_hook(cpp::string_view new_str, void *fp) { return WRITE_OK; } -LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, +LIBC_INLINE int vfprintf_internal_modular(::FILE *__restrict stream, const char *__restrict format, internal::ArgList &args) { constexpr size_t BUFF_SIZE = 1024; @@ -76,7 +76,7 @@ LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, buffer, BUFF_SIZE, &file_write_hook, reinterpret_cast(stream)); Writer writer(wb); internal::flockfile(stream); - int retval = printf_main(&writer, format, args); + int retval = printf_main_modular(&writer, format, args); int flushval = wb.overflow_write(""); if (flushval != WRITE_OK) retval = flushval; @@ -84,6 +84,15 @@ LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, return retval; } +LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, + const char *__restrict format, + internal::ArgList &args) { +#ifdef LIBC_COPT_PRINTF_SPLIT + __asm__ __volatile__(".reloc ., BFD_RELOC_NONE, __printf_float"); +#endif + return vfprintf_internal_modular(stream, format, args); +} + } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL From 662a265f4d064b90c00601cdfa657254f8948660 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 15 Jul 2025 15:20:40 -0700 Subject: [PATCH 28/31] Fix missed SPLIT->MODULAR rename --- libc/src/stdio/printf_core/vfprintf_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/stdio/printf_core/vfprintf_internal.h b/libc/src/stdio/printf_core/vfprintf_internal.h index c9d6fce458409..921eea9e0a4ba 100644 --- a/libc/src/stdio/printf_core/vfprintf_internal.h +++ b/libc/src/stdio/printf_core/vfprintf_internal.h @@ -87,7 +87,7 @@ LIBC_INLINE int vfprintf_internal_modular(::FILE *__restrict stream, LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, const char *__restrict format, internal::ArgList &args) { -#ifdef LIBC_COPT_PRINTF_SPLIT +#ifdef LIBC_COPT_PRINTF_MODULAR __asm__ __volatile__(".reloc ., BFD_RELOC_NONE, __printf_float"); #endif return vfprintf_internal_modular(stream, format, args); From 191b1dda574a141fabc5fbfc7aad6083f52daf83 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 15 Jul 2025 15:24:53 -0700 Subject: [PATCH 29/31] Add missing file header --- libc/src/stdio/printf_core/float_impl.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/libc/src/stdio/printf_core/float_impl.cpp b/libc/src/stdio/printf_core/float_impl.cpp index e7c9ba39aa148..2215ac101f47d 100644 --- a/libc/src/stdio/printf_core/float_impl.cpp +++ b/libc/src/stdio/printf_core/float_impl.cpp @@ -1,3 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file instantiates the functionality needed for supporting floating +/// point arguments in modular printf builds. Non-modular printf builds +/// implicitly instantiate these functions. +/// +//===----------------------------------------------------------------------===// + #ifdef LIBC_COPT_PRINTF_MODULAR #include "src/__support/arg_list.h" From 05a3088190231ef48cb93434d77e5d139d049813 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 16 Jul 2025 13:53:44 -0700 Subject: [PATCH 30/31] Use LIBC_INLINE_ASM macro --- libc/src/stdio/printf_core/printf_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/stdio/printf_core/printf_main.h b/libc/src/stdio/printf_core/printf_main.h index c77922bb4f044..af2fd1f09cd99 100644 --- a/libc/src/stdio/printf_core/printf_main.h +++ b/libc/src/stdio/printf_core/printf_main.h @@ -45,7 +45,7 @@ template int printf_main(Writer *writer, const char *__restrict str, internal::ArgList &args) { #ifdef LIBC_COPT_PRINTF_MODULAR - __asm__ __volatile__ (".reloc ., BFD_RELOC_NONE, __printf_float"); + LIBC_INLINE_ASM(".reloc ., BFD_RELOC_NONE, __printf_float"); #endif return printf_main_modular(writer, str, args); } From 40cbed774962681fa25460c8950f70fbf8547c44 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 16 Jul 2025 13:58:01 -0700 Subject: [PATCH 31/31] LIBC_INLINE for modular fn definitions --- libc/src/stdio/printf_core/float_dec_converter.h | 12 ++++++------ .../stdio/printf_core/float_dec_converter_limited.h | 12 ++++++------ libc/src/stdio/printf_core/float_hex_converter.h | 4 ++-- libc/src/stdio/printf_core/parser.h | 11 ++++++----- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index deeb566bd4092..1b0a02ed426c1 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -1137,8 +1137,8 @@ convert_float_dec_auto(Writer *writer, // TODO: unify the float converters to remove the duplicated checks for inf/nan. template -int convert_float_decimal(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_decimal(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); @@ -1159,8 +1159,8 @@ int convert_float_decimal(Writer *writer, } template -int convert_float_dec_exp(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_dec_exp(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); @@ -1181,8 +1181,8 @@ int convert_float_dec_exp(Writer *writer, } template -int convert_float_dec_auto(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv) { if (to_conv.length_modifier == LengthModifier::L) { fputil::FPBits::StorageType float_raw = to_conv.conv_val_raw; fputil::FPBits float_bits(float_raw); diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h index 9804a38964be0..d9af10dcc6c00 100644 --- a/libc/src/stdio/printf_core/float_dec_converter_limited.h +++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h @@ -687,20 +687,20 @@ LIBC_PRINTF_MODULAR_DECL int convert_float_dec_auto(Writer *writer, #ifdef LIBC_PRINTF_DEFINE_MODULAR template -int convert_float_decimal(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_decimal(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::F); } template -int convert_float_dec_exp(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_dec_exp(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::E); } template -int convert_float_dec_auto(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_dec_auto(Writer *writer, + const FormatSection &to_conv) { return convert_float_outer(writer, to_conv, ConversionType::G); } #endif diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index fa724066813d7..f1aa556fe2820 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -31,8 +31,8 @@ LIBC_PRINTF_MODULAR_DECL int convert_float_hex_exp(Writer *writer, #ifdef LIBC_PRINTF_DEFINE_MODULAR template -int convert_float_hex_exp(Writer *writer, - const FormatSection &to_conv) { +LIBC_INLINE int convert_float_hex_exp(Writer *writer, + const FormatSection &to_conv) { using LDBits = fputil::FPBits; using StorageType = LDBits::StorageType; diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index 5a1eea36b603f..f25e761fa6cd3 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -682,9 +682,9 @@ template class Parser { #ifdef LIBC_PRINTF_DEFINE_MODULAR template -void Parser::write_float_arg_val(FormatSection §ion, - LengthModifier lm, - size_t conv_index) { +LIBC_INLINE void Parser::write_float_arg_val(FormatSection §ion, + LengthModifier lm, + size_t conv_index) { if (lm != LengthModifier::L) { WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index); } else { @@ -693,7 +693,7 @@ void Parser::write_float_arg_val(FormatSection §ion, } template -TypeDesc Parser::float_type_desc(LengthModifier lm) { +LIBC_INLINE TypeDesc Parser::float_type_desc(LengthModifier lm) { if (lm != LengthModifier::L) return type_desc_from_type(); else @@ -701,7 +701,8 @@ TypeDesc Parser::float_type_desc(LengthModifier lm) { } template -bool Parser::advance_arg_if_float(TypeDesc cur_type_desc) { +LIBC_INLINE bool +Parser::advance_arg_if_float(TypeDesc cur_type_desc) { if (cur_type_desc == type_desc_from_type()) args_cur.template next_var(); else if (cur_type_desc == type_desc_from_type())