From c1b0977d0ba3d80b5d5e896307a98a816f4efcd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= Date: Tue, 24 Jun 2025 23:24:40 +0200 Subject: [PATCH 1/2] [analysis] fix unknown source locations in KernelInfo The fix provides meaningful source locations by falling back to the containing function's subprogram information instead of showing unknown locations. --- llvm/lib/Analysis/KernelInfo.cpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp index 93dd7cecb32e1..33a84452d1527 100644 --- a/llvm/lib/Analysis/KernelInfo.cpp +++ b/llvm/lib/Analysis/KernelInfo.cpp @@ -77,6 +77,14 @@ class KernelInfo { } // end anonymous namespace +static DiagnosticLocation getRemarkLocation(const Instruction &I) { + if (DebugLoc DL = I.getDebugLoc()) + return DiagnosticLocation(DL); + if (auto *SP = I.getFunction()->getSubprogram()) + return DiagnosticLocation(SP); + return DiagnosticLocation(); +} + static void identifyCallee(OptimizationRemark &R, const Module *M, const Value *V, StringRef Kind = "") { SmallString<100> Name; // might be function name or asm expression @@ -105,16 +113,19 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, TypeSize::ScalarTy StaticSize) { ORE.emit([&] { StringRef DbgName; - DebugLoc Loc; + DebugLoc DL; bool Artificial = false; auto DVRs = findDVRDeclares(&const_cast(Alloca)); if (!DVRs.empty()) { const DbgVariableRecord &DVR = **DVRs.begin(); DbgName = DVR.getVariable()->getName(); - Loc = DVR.getDebugLoc(); + DL = DVR.getDebugLoc(); Artificial = DVR.Variable->isArtificial(); } - OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc), + + OptimizationRemark R(DEBUG_TYPE, "Alloca", + DL ? DiagnosticLocation(DL) + : getRemarkLocation(Alloca), Alloca.getParent()); R << "in "; identifyFunction(R, Caller); @@ -142,7 +153,8 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, const CallBase &Call, StringRef CallKind, StringRef RemarkKind) { ORE.emit([&] { - OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call); + OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call), + Call.getParent()); R << "in "; identifyFunction(R, Caller); R << ", " << CallKind << ", callee is "; @@ -155,7 +167,8 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, const Function &Caller, const Instruction &Inst) { ORE.emit([&] { - OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst); + OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", + getRemarkLocation(Inst), Inst.getParent()); R << "in "; identifyFunction(R, Caller); if (const IntrinsicInst *II = dyn_cast(&Inst)) { @@ -265,7 +278,11 @@ void KernelInfo::updateForBB(const BasicBlock &BB, static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, StringRef Name, int64_t Value) { ORE.emit([&] { - OptimizationRemark R(DEBUG_TYPE, Name, &F); + DiagnosticLocation DL = F.getSubprogram() + ? DiagnosticLocation(F.getSubprogram()) + : DiagnosticLocation(); + OptimizationRemark R(DEBUG_TYPE, Name, DL, + !F.empty() ? &F.front() : nullptr); R << "in "; identifyFunction(R, F); R << ", " << Name << " = " << itostr(Value); From 9f77d27b0608f3ec8e8acebf02506372e7fcaf09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= Date: Wed, 25 Jun 2025 02:37:38 +0200 Subject: [PATCH 2/2] [Target][KernelInfo] Move kernel-info pass to OptimizerLastEPCallback for YAML remark output The kernel-info pass was registered using FullLinkTimeOptimizationLastEPCallback, which runs after the optimization record YAML files have been finalized. This caused kernel-info remarks to appear in terminal output but not in YAML files when using -fsave-optimization-record. Move kernel-info registration to OptimizerLastEPCallback, which runs during the LTO optimization pipeline while the remark streamer is still active. This ensures kernel-info remarks (including NVVM GPU intrinsics like @llvm.nvvm.read.ptx.sreg.tid.x) are captured in both terminal output and YAML optimization record files. Affects NVPTX and AMDGPU targets. --- llvm/lib/Analysis/KernelInfo.cpp | 29 ++++--------------- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 17 +++++++---- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 13 +++++---- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp index 33a84452d1527..93dd7cecb32e1 100644 --- a/llvm/lib/Analysis/KernelInfo.cpp +++ b/llvm/lib/Analysis/KernelInfo.cpp @@ -77,14 +77,6 @@ class KernelInfo { } // end anonymous namespace -static DiagnosticLocation getRemarkLocation(const Instruction &I) { - if (DebugLoc DL = I.getDebugLoc()) - return DiagnosticLocation(DL); - if (auto *SP = I.getFunction()->getSubprogram()) - return DiagnosticLocation(SP); - return DiagnosticLocation(); -} - static void identifyCallee(OptimizationRemark &R, const Module *M, const Value *V, StringRef Kind = "") { SmallString<100> Name; // might be function name or asm expression @@ -113,19 +105,16 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, TypeSize::ScalarTy StaticSize) { ORE.emit([&] { StringRef DbgName; - DebugLoc DL; + DebugLoc Loc; bool Artificial = false; auto DVRs = findDVRDeclares(&const_cast(Alloca)); if (!DVRs.empty()) { const DbgVariableRecord &DVR = **DVRs.begin(); DbgName = DVR.getVariable()->getName(); - DL = DVR.getDebugLoc(); + Loc = DVR.getDebugLoc(); Artificial = DVR.Variable->isArtificial(); } - - OptimizationRemark R(DEBUG_TYPE, "Alloca", - DL ? DiagnosticLocation(DL) - : getRemarkLocation(Alloca), + OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc), Alloca.getParent()); R << "in "; identifyFunction(R, Caller); @@ -153,8 +142,7 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, const CallBase &Call, StringRef CallKind, StringRef RemarkKind) { ORE.emit([&] { - OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call), - Call.getParent()); + OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call); R << "in "; identifyFunction(R, Caller); R << ", " << CallKind << ", callee is "; @@ -167,8 +155,7 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, const Function &Caller, const Instruction &Inst) { ORE.emit([&] { - OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", - getRemarkLocation(Inst), Inst.getParent()); + OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst); R << "in "; identifyFunction(R, Caller); if (const IntrinsicInst *II = dyn_cast(&Inst)) { @@ -278,11 +265,7 @@ void KernelInfo::updateForBB(const BasicBlock &BB, static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, StringRef Name, int64_t Value) { ORE.emit([&] { - DiagnosticLocation DL = F.getSubprogram() - ? DiagnosticLocation(F.getSubprogram()) - : DiagnosticLocation(); - OptimizationRemark R(DEBUG_TYPE, Name, DL, - !F.empty() ? &F.front() : nullptr); + OptimizationRemark R(DEBUG_TYPE, Name, &F); R << "in "; identifyFunction(R, F); R << ", " << Name << " = " << itostr(Value); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d2e4825cf3c81..3306077ac1440 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -941,13 +941,20 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { *this, Opt, ThinOrFullLTOPhase::FullLTOPostLink)); } } - if (!NoKernelInfoEndLTO) { - FunctionPassManager FPM; - FPM.addPass(KernelInfoPrinter(this)); - PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } }); + // Add kernel-info pass using OptimizerLastEPCallback to run during LTO + // while remark streamer is still active + if (!NoKernelInfoEndLTO) { + PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM, + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + FunctionPassManager FPM; + FPM.addPass(KernelInfoPrinter(this)); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); + } + PB.registerRegClassFilterParsingCallback( [](StringRef FilterName) -> RegAllocFilterFunc { if (FilterName == "sgpr") diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ef310e5828f22..b9b6aa55cc875 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -260,12 +260,13 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { }); if (!NoKernelInfoEndLTO) { - PB.registerFullLinkTimeOptimizationLastEPCallback( - [this](ModulePassManager &PM, OptimizationLevel Level) { - FunctionPassManager FPM; - FPM.addPass(KernelInfoPrinter(this)); - PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - }); + PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM, + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + FunctionPassManager FPM; + FPM.addPass(KernelInfoPrinter(this)); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); } }