Skip to content

Commit 8711cea

Browse files
committed
[DAGCombiner] Preserve debug location of original load in fold (conv (load x))
This patch fixes a debug information loss issue during the combine of a conversion (e.g., bitcast) with a load into a new load: `fold (conv (load x)) -> (load (conv*)x)`. The newly created load node was incorrectly using the debug location (`SDLoc`) of the conversion operation (the `conv` node, `N`) instead of the location of the original load operation (the `load` node, `LN0`). The location of the conversion operation often points to compiler-internal instructions and provides little value for source-level debugging. In contrast, the original load's location accurately represents the source of the data access in the user's code. This change ensures the new load inherits the debug location from `LN0` by using `SDLoc(LN0)`, which improves debugging experience and fixes a test case failure observed in the Triton compiler.
1 parent 862a1be commit 8711cea

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16703,7 +16703,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
1670316703
}
1670416704
}
1670516705
SDValue Load =
16706-
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16706+
DAG.getLoad(VT, SDLoc(LN0), LN0->getChain(), LN0->getBasePtr(),
1670716707
LN0->getMemOperand());
1670816708
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
1670916709
return Load;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s
2+
3+
; CHECK-LABEL: test:
4+
; CHECK: .loc 1 8 16 ; test.py:8:16
5+
; CHECK-NEXT: s_load_dword
6+
7+
; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
8+
define amdgpu_kernel void @test(ptr addrspace(1) inreg readonly captures(none) %0, ptr addrspace(1) inreg writeonly captures(none) %1, ptr addrspace(1) inreg readnone captures(none) %2, ptr addrspace(1) inreg readnone captures(none) %3) local_unnamed_addr #0 !dbg !4 {
9+
%5 = tail call i32 @llvm.amdgcn.workitem.id.x(), !dbg !7
10+
%6 = and i32 %5, 255, !dbg !7
11+
%7 = icmp eq i32 %6, 0, !dbg !7
12+
br i1 %7, label %8, label %10, !dbg !7
13+
14+
8: ; preds = %4
15+
%9 = load <1 x float>, ptr addrspace(1) %0, align 4, !dbg !8, !amdgpu.noclobber !6
16+
store <1 x float> %9, ptr addrspace(1) %1, align 4, !dbg !7
17+
br label %10, !dbg !7
18+
19+
10: ; preds = %8, %4
20+
ret void, !dbg !9
21+
}
22+
23+
; Function Attrs: alwaysinline nocallback nofree nosync nounwind speculatable willreturn memory(none)
24+
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1
25+
26+
attributes #0 = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "denormal-fp-math-f32"="ieee" "uniform-work-group-size"="false" }
27+
attributes #1 = { alwaysinline nocallback nofree nosync nounwind speculatable willreturn memory(none) }
28+
29+
!llvm.dbg.cu = !{!0}
30+
!llvm.module.flags = !{!2, !3}
31+
32+
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly)
33+
!1 = !DIFile(filename: "test.py", directory: "/path")
34+
!2 = !{i32 2, !"Debug Info Version", i32 3}
35+
!3 = !{i32 1, !"amdhsa_code_object_version", i32 500}
36+
!4 = distinct !DISubprogram(name: "test", linkageName: "test", scope: !1, file: !1, line: 7, type: !5, scopeLine: 7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
37+
!5 = !DISubroutineType(cc: DW_CC_normal, types: !6)
38+
!6 = !{}
39+
!7 = !DILocation(line: 9, column: 20, scope: !4)
40+
!8 = !DILocation(line: 8, column: 16, scope: !4)
41+
!9 = !DILocation(line: 9, column: 4, scope: !4)

0 commit comments

Comments
 (0)