Skip to content

Commit a4277f4

Browse files
committed
Addressed feedback
1 parent c6bacae commit a4277f4

File tree

6 files changed

+222
-13
lines changed

6 files changed

+222
-13
lines changed

llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class LVBinaryReader : public LVReader {
160160

161161
// Loads all info for the architecture of the provided object file.
162162
Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures,
163-
StringRef CPU);
163+
StringRef TheCPU);
164164

165165
virtual void mapRangeAddress(const object::ObjectFile &Obj) {}
166166
virtual void mapRangeAddress(const object::ObjectFile &Obj,

llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
276276

277277
Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
278278
StringRef TheFeatures,
279-
StringRef CPU) {
279+
StringRef TheCPU) {
280280
std::string TargetLookupError;
281281
const Target *TheTarget =
282282
TargetRegistry::lookupTarget(TheTriple, TargetLookupError);
@@ -300,7 +300,7 @@ Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
300300

301301
// Target subtargets.
302302
MCSubtargetInfo *SubtargetInfo(
303-
TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
303+
TheTarget->createMCSubtargetInfo(TheTriple, TheCPU, TheFeatures));
304304
if (!SubtargetInfo)
305305
return createStringError(errc::invalid_argument,
306306
"no subtarget info for target " + TheTriple);

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
226226
public:
227227
ELFAMDGPUAsmBackend(const Target &T, const Triple &TT)
228228
: AMDGPUAsmBackend(T), Is64Bit(TT.isAMDGCN()),
229-
HasRelocationAddend(TT.getOS() == Triple::AMDHSA ||
230-
TT.getOS() == Triple::AMDPAL) {
229+
HasRelocationAddend(TT.getOS() == Triple::AMDHSA) {
231230
switch (TT.getOS()) {
232231
case Triple::AMDHSA:
233232
OSABI = ELF::ELFOSABI_AMDGPU_HSA;

llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,10 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
130130
if (VirtReg.isPhysical())
131131
continue;
132132

133-
if (MI.isDebugInstr() && VirtReg == AMDGPU::NoRegister)
133+
if (!VirtReg.isValid()) {
134+
assert(MI.isDebugInstr() && "non-debug use of noreg");
134135
continue;
136+
}
135137

136138
if (!VRM->hasPhys(VirtReg))
137139
continue;

llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
; RUN: llvm-debuginfo-analyzer %t.o --print=all --attribute=all | FileCheck %s
33

44
; This test compiles this module with AMDGPU backend under -O0,
5-
; and makes sure llvm-debuginfo-analzyer works for it.
5+
; and makes sure llvm-debuginfo-analyzer works for it.
66

77
; Simple checks to make sure llvm-debuginfo-analzyer didn't fail early.
88
; CHECK: Logical View:
99
; CHECK: {CompileUnit}
10+
; CHECK-DAG: {Parameter} 'dtid' -> [0x{{[a-f0-9]+}}]'uint3'
11+
; CHECK-DAG: {Variable} 'my_var2' -> [0x{{[a-f0-9]+}}]'float'
12+
; CHECK-DAG: {Line} {{.+}}basic_var.hlsl
1013
; CHECK: {Code} 's_endpgm'
1114

1215
source_filename = "module"
@@ -15,7 +18,6 @@ target triple = "amdgcn-amd-amdpal"
1518

1619
%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
1720

18-
; Function Attrs: memory(readwrite)
1921
define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 {
2022
%LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28
2123
%WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28
@@ -42,16 +44,12 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable,
4244
ret void, !dbg !37
4345
}
4446

45-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
4647
declare noundef i64 @llvm.amdgcn.s.getpc() #1
4748

48-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
4949
declare void @llvm.assume(i1 noundef) #2
5050

51-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
5251
declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3
5352

54-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
5553
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4
5654

5755
attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" }
@@ -100,4 +98,4 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) }
10098
!34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9)
10199
!35 = !DILocation(line: 14, column: 9, scope: !14)
102100
!36 = !DILocation(line: 17, column: 14, scope: !14)
103-
!37 = !DILocation(line: 19, column: 1, scope: !14)
101+
!37 = !DILocation(line: 19, column: 1, scope: !14)
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# RUN: llc %s -o - -mcpu=gfx1030 -O0 -run-pass=si-pre-allocate-wwm-regs | FileCheck %s
2+
3+
# Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass
4+
5+
# CHECK: S_ENDPGM
6+
7+
--- |
8+
source_filename = "module"
9+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
10+
target triple = "amdgcn-amd-amdpal"
11+
12+
%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
13+
14+
define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 {
15+
%LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28
16+
%WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28
17+
%1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28
18+
%2 = shl i32 %WorkgroupId.i0, 6, !dbg !28
19+
%3 = add i32 %LocalInvocationId.i0, %2, !dbg !28
20+
#dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28)
21+
%4 = and i64 %1, -4294967296, !dbg !30
22+
%5 = zext i32 %userdata4 to i64, !dbg !30
23+
%6 = or disjoint i64 %4, %5, !dbg !30
24+
%7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30, !amdgpu.uniform !2
25+
%8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2
26+
%9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30
27+
#dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32)
28+
%10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33
29+
#dbg_value(float %10, !34, !DIExpression(), !35)
30+
%11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36, !amdgpu.uniform !2
31+
%.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36
32+
%12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36
33+
%13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2
34+
call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36
35+
ret void, !dbg !37
36+
}
37+
38+
declare noundef i64 @llvm.amdgcn.s.getpc() #1
39+
declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3
40+
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4
41+
42+
attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" }
43+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" }
44+
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx1030" }
45+
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx1030" }
46+
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx1030" }
47+
48+
!llvm.dbg.cu = !{!0}
49+
!llvm.module.flags = !{!12, !13}
50+
51+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3)
52+
!1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "")
53+
!2 = !{}
54+
!3 = !{!4, !10}
55+
!4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression())
56+
!5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true)
57+
!6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer<float>", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7)
58+
!7 = !{!8}
59+
!8 = !DITemplateTypeParameter(name: "element", type: !9)
60+
!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
61+
!10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
62+
!11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true)
63+
!12 = !{i32 2, !"Dwarf Version", i32 5}
64+
!13 = !{i32 2, !"Debug Info Version", i32 3}
65+
!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
66+
!15 = !DISubroutineType(types: !16)
67+
!16 = !{null, !17}
68+
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18)
69+
!18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector<unsigned int, 3>", file: !1, size: 96, align: 32, elements: !19, templateParams: !24)
70+
!19 = !{!20, !22, !23}
71+
!20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic)
72+
!21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
73+
!22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
74+
!23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic)
75+
!24 = !{!25, !26}
76+
!25 = !DITemplateTypeParameter(name: "element", type: !21)
77+
!26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3)
78+
!27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
79+
!28 = !DILocation(line: 7, column: 17, scope: !14)
80+
!29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17)
81+
!30 = !DILocation(line: 11, column: 18, scope: !14)
82+
!31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9)
83+
!32 = !DILocation(line: 11, column: 9, scope: !14)
84+
!33 = !DILocation(line: 14, column: 26, scope: !14)
85+
!34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9)
86+
!35 = !DILocation(line: 14, column: 9, scope: !14)
87+
!36 = !DILocation(line: 17, column: 14, scope: !14)
88+
!37 = !DILocation(line: 19, column: 1, scope: !14)
89+
...
90+
---
91+
name: _amdgpu_cs_main
92+
alignment: 1
93+
exposesReturnsTwice: false
94+
legalized: false
95+
regBankSelected: false
96+
selected: false
97+
failedISel: false
98+
tracksRegLiveness: true
99+
hasWinCFI: false
100+
noPhis: true
101+
isSSA: false
102+
noVRegs: false
103+
hasFakeUses: false
104+
callsEHReturn: false
105+
callsUnwindInit: false
106+
hasEHContTarget: false
107+
hasEHScopes: false
108+
hasEHFunclets: false
109+
isOutlined: false
110+
debugInstrRef: false
111+
failsVerification: false
112+
tracksDebugUserValues: false
113+
fixedStack: []
114+
stack: []
115+
entry_values: []
116+
callSites: []
117+
debugValueSubstitutions: []
118+
constants: []
119+
machineFunctionInfo:
120+
explicitKernArgSize: 0
121+
maxKernArgAlign: 4
122+
ldsSize: 0
123+
gdsSize: 0
124+
dynLDSAlign: 1
125+
isEntryFunction: true
126+
isChainFunction: false
127+
noSignedZerosFPMath: false
128+
memoryBound: false
129+
waveLimiter: false
130+
hasSpilledSGPRs: true
131+
hasSpilledVGPRs: false
132+
scratchRSrcReg: '$private_rsrc_reg'
133+
frameOffsetReg: '$fp_reg'
134+
stackPtrOffsetReg: '$sgpr32'
135+
bytesInStackArgArea: 0
136+
returnsVoid: true
137+
argumentInfo:
138+
privateSegmentWaveByteOffset: { reg: '$sgpr6' }
139+
psInputAddr: 0
140+
psInputEnable: 0
141+
maxMemoryClusterDWords: 8
142+
mode:
143+
ieee: false
144+
dx10-clamp: true
145+
fp32-input-denormals: false
146+
fp32-output-denormals: false
147+
fp64-fp16-input-denormals: true
148+
fp64-fp16-output-denormals: true
149+
highBitsOf32BitAddress: 0
150+
occupancy: 16
151+
vgprForAGPRCopy: ''
152+
sgprForEXECCopy: '$sgpr12_sgpr13'
153+
longBranchReservedReg: ''
154+
hasInitWholeWave: false
155+
dynamicVGPRBlockSize: 0
156+
scratchReservedForDynamicVGPRs: 0
157+
body: |
158+
bb.0 (%ir-block.0):
159+
liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
160+
161+
%8:vgpr_32 = COPY killed $vgpr2
162+
%7:vgpr_32 = COPY killed $vgpr1
163+
%6:vgpr_32 = COPY killed $vgpr0
164+
renamable $sgpr0 = COPY killed $sgpr4
165+
%39:vgpr_32 = IMPLICIT_DEF
166+
%39:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %39
167+
renamable $sgpr3 = COPY killed $sgpr2
168+
renamable $sgpr2 = COPY $sgpr1
169+
$sgpr1 = SI_RESTORE_S32_FROM_VGPR %39, 0
170+
dead renamable $sgpr4 = IMPLICIT_DEF
171+
dead renamable $sgpr4 = IMPLICIT_DEF
172+
dead renamable $sgpr4 = IMPLICIT_DEF
173+
undef %38.sub0:vreg_96 = COPY %6
174+
%38.sub1:vreg_96 = COPY %7
175+
dead %38.sub2:vreg_96 = COPY %8
176+
undef renamable $sgpr4 = COPY renamable $sgpr3, implicit-def $sgpr4_sgpr5_sgpr6
177+
renamable $sgpr5 = COPY killed renamable $sgpr1
178+
renamable $sgpr6 = COPY killed renamable $sgpr0
179+
dead renamable $sgpr8_sgpr9_sgpr10 = IMPLICIT_DEF
180+
renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo debug-location !28
181+
renamable $sgpr4 = S_MOV_B32 6
182+
%16:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %6, implicit $exec, debug-location !28
183+
DBG_VALUE %16, $noreg, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !28
184+
renamable $sgpr3 = S_MOV_B32 -1
185+
renamable $sgpr4 = S_MOV_B32 0
186+
undef renamable $sgpr6 = COPY renamable $sgpr4, implicit-def $sgpr6_sgpr7
187+
renamable $sgpr7 = COPY killed renamable $sgpr3
188+
renamable $sgpr0_sgpr1 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr6_sgpr7, implicit-def dead $scc, debug-location !30
189+
renamable $sgpr5 = S_MOV_B32 0, debug-location !30
190+
undef renamable $sgpr2 = COPY killed renamable $sgpr2, implicit-def $sgpr2_sgpr3, debug-location !30
191+
renamable $sgpr3 = COPY killed renamable $sgpr5, debug-location !30
192+
renamable $sgpr0_sgpr1 = disjoint S_OR_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr2_sgpr3, implicit-def dead $scc, debug-location !30
193+
renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, debug-location !30 :: (invariant load (s128) from %ir.7, align 4, addrspace 4)
194+
renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 32, 0, debug-location !36 :: (invariant load (s128) from %ir.11, align 4, addrspace 4)
195+
%26:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %16, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !30 :: (dereferenceable load (s32), align 1, addrspace 8)
196+
DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32
197+
%27:vgpr_32 = arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %26, 0, %26, 0, 0, implicit $mode, implicit $exec, debug-location !33
198+
DBG_VALUE %27, $noreg, !34, !DIExpression(), debug-location !35
199+
dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36
200+
dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36
201+
dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36
202+
dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36
203+
undef %37.sub0:vreg_128 = COPY %27, debug-location !36
204+
%37.sub1:vreg_128 = COPY %27, debug-location !36
205+
%37.sub2:vreg_128 = COPY %27, debug-location !36
206+
%37.sub3:vreg_128 = COPY %27, debug-location !36
207+
%29:vreg_128 = COPY %37, debug-location !36
208+
BUFFER_STORE_FORMAT_XYZW_IDXEN_exact %29, %16, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !36 :: (dereferenceable store (s128), align 1, addrspace 8)
209+
S_ENDPGM 0, debug-location !37
210+
...

0 commit comments

Comments
 (0)