Skip to content

Commit 9f77d27

Browse files
committed
[Target][KernelInfo] Move kernel-info pass to OptimizerLastEPCallback for YAML remark output
The kernel-info pass was registered using FullLinkTimeOptimizationLastEPCallback, which runs after the optimization record YAML files have been finalized. This caused kernel-info remarks to appear in terminal output but not in YAML files when using -fsave-optimization-record. Move kernel-info registration to OptimizerLastEPCallback, which runs during the LTO optimization pipeline while the remark streamer is still active. This ensures kernel-info remarks (including NVVM GPU intrinsics like @llvm.nvvm.read.ptx.sreg.tid.x) are captured in both terminal output and YAML optimization record files. Affects NVPTX and AMDGPU targets.
1 parent c1b0977 commit 9f77d27

File tree

3 files changed

+25
-34
lines changed

3 files changed

+25
-34
lines changed

llvm/lib/Analysis/KernelInfo.cpp

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,6 @@ class KernelInfo {
7777

7878
} // end anonymous namespace
7979

80-
static DiagnosticLocation getRemarkLocation(const Instruction &I) {
81-
if (DebugLoc DL = I.getDebugLoc())
82-
return DiagnosticLocation(DL);
83-
if (auto *SP = I.getFunction()->getSubprogram())
84-
return DiagnosticLocation(SP);
85-
return DiagnosticLocation();
86-
}
87-
8880
static void identifyCallee(OptimizationRemark &R, const Module *M,
8981
const Value *V, StringRef Kind = "") {
9082
SmallString<100> Name; // might be function name or asm expression
@@ -113,19 +105,16 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
113105
TypeSize::ScalarTy StaticSize) {
114106
ORE.emit([&] {
115107
StringRef DbgName;
116-
DebugLoc DL;
108+
DebugLoc Loc;
117109
bool Artificial = false;
118110
auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
119111
if (!DVRs.empty()) {
120112
const DbgVariableRecord &DVR = **DVRs.begin();
121113
DbgName = DVR.getVariable()->getName();
122-
DL = DVR.getDebugLoc();
114+
Loc = DVR.getDebugLoc();
123115
Artificial = DVR.Variable->isArtificial();
124116
}
125-
126-
OptimizationRemark R(DEBUG_TYPE, "Alloca",
127-
DL ? DiagnosticLocation(DL)
128-
: getRemarkLocation(Alloca),
117+
OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
129118
Alloca.getParent());
130119
R << "in ";
131120
identifyFunction(R, Caller);
@@ -153,8 +142,7 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
153142
const CallBase &Call, StringRef CallKind,
154143
StringRef RemarkKind) {
155144
ORE.emit([&] {
156-
OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call),
157-
Call.getParent());
145+
OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
158146
R << "in ";
159147
identifyFunction(R, Caller);
160148
R << ", " << CallKind << ", callee is ";
@@ -167,8 +155,7 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
167155
const Function &Caller,
168156
const Instruction &Inst) {
169157
ORE.emit([&] {
170-
OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess",
171-
getRemarkLocation(Inst), Inst.getParent());
158+
OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
172159
R << "in ";
173160
identifyFunction(R, Caller);
174161
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -278,11 +265,7 @@ void KernelInfo::updateForBB(const BasicBlock &BB,
278265
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
279266
StringRef Name, int64_t Value) {
280267
ORE.emit([&] {
281-
DiagnosticLocation DL = F.getSubprogram()
282-
? DiagnosticLocation(F.getSubprogram())
283-
: DiagnosticLocation();
284-
OptimizationRemark R(DEBUG_TYPE, Name, DL,
285-
!F.empty() ? &F.front() : nullptr);
268+
OptimizationRemark R(DEBUG_TYPE, Name, &F);
286269
R << "in ";
287270
identifyFunction(R, F);
288271
R << ", " << Name << " = " << itostr(Value);

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -941,13 +941,20 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
941941
*this, Opt, ThinOrFullLTOPhase::FullLTOPostLink));
942942
}
943943
}
944-
if (!NoKernelInfoEndLTO) {
945-
FunctionPassManager FPM;
946-
FPM.addPass(KernelInfoPrinter(this));
947-
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
948-
}
949944
});
950945

946+
// Add kernel-info pass using OptimizerLastEPCallback to run during LTO
947+
// while remark streamer is still active
948+
if (!NoKernelInfoEndLTO) {
949+
PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
950+
OptimizationLevel Level,
951+
ThinOrFullLTOPhase Phase) {
952+
FunctionPassManager FPM;
953+
FPM.addPass(KernelInfoPrinter(this));
954+
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
955+
});
956+
}
957+
951958
PB.registerRegClassFilterParsingCallback(
952959
[](StringRef FilterName) -> RegAllocFilterFunc {
953960
if (FilterName == "sgpr")

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,13 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
260260
});
261261

262262
if (!NoKernelInfoEndLTO) {
263-
PB.registerFullLinkTimeOptimizationLastEPCallback(
264-
[this](ModulePassManager &PM, OptimizationLevel Level) {
265-
FunctionPassManager FPM;
266-
FPM.addPass(KernelInfoPrinter(this));
267-
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
268-
});
263+
PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
264+
OptimizationLevel Level,
265+
ThinOrFullLTOPhase Phase) {
266+
FunctionPassManager FPM;
267+
FPM.addPass(KernelInfoPrinter(this));
268+
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
269+
});
269270
}
270271
}
271272

0 commit comments

Comments
 (0)