Skip to content

[AMDGPU] Fixed llvm-debuginfo-analyzer for AMDGPU. #145125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 12, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ class LVBinaryReader : public LVReader {
LVAddress WasmCodeSectionOffset = 0;

// Loads all info for the architecture of the provided object file.
Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures);
Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures,
StringRef TheCPU);

virtual void mapRangeAddress(const object::ObjectFile &Obj) {}
virtual void mapRangeAddress(const object::ObjectFile &Obj,
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
}

Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
StringRef TheFeatures) {
StringRef TheFeatures,
StringRef TheCPU) {
std::string TargetLookupError;
const Target *TheTarget =
TargetRegistry::lookupTarget(TheTriple, TargetLookupError);
Expand All @@ -298,9 +299,8 @@ Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
MAI.reset(AsmInfo);

// Target subtargets.
StringRef CPU;
MCSubtargetInfo *SubtargetInfo(
TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
TheTarget->createMCSubtargetInfo(TheTriple, TheCPU, TheFeatures));
if (!SubtargetInfo)
return createStringError(errc::invalid_argument,
"no subtarget info for target " + TheTriple);
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1190,7 +1190,12 @@ Error LVCodeViewReader::loadTargetInfo(const ObjectFile &Obj) {
FeaturesValue = SubtargetFeatures();
}
FeaturesValue = *Features;
return loadGenericTargetInfo(TT.str(), FeaturesValue.getString());

StringRef CPU;
if (auto OptCPU = Obj.tryGetCPUName())
CPU = *OptCPU;

return loadGenericTargetInfo(TT.str(), FeaturesValue.getString(), CPU);
}

Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) {
Expand All @@ -1200,8 +1205,9 @@ Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) {
TT.setOS(Triple::Win32);

StringRef TheFeature = "";
StringRef TheCPU = "";

return loadGenericTargetInfo(TT.str(), TheFeature);
return loadGenericTargetInfo(TT.str(), TheFeature, TheCPU);
}

std::string LVCodeViewReader::getRegisterName(LVSmall Opcode,
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,10 +956,7 @@ LVElement *LVDWARFReader::getElementForOffset(LVOffset Offset,
Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) {
// Detect the architecture from the object file. We usually don't need OS
// info to lookup a target and create register info.
Triple TT;
TT.setArch(Triple::ArchType(Obj.getArch()));
TT.setVendor(Triple::UnknownVendor);
TT.setOS(Triple::UnknownOS);
Triple TT = Obj.makeTriple();

// Features to be passed to target/subtarget
Expected<SubtargetFeatures> Features = Obj.getFeatures();
Expand All @@ -969,7 +966,12 @@ Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) {
FeaturesValue = SubtargetFeatures();
}
FeaturesValue = *Features;
return loadGenericTargetInfo(TT.str(), FeaturesValue.getString());

StringRef CPU;
if (auto OptCPU = Obj.tryGetCPUName())
CPU = *OptCPU;

return loadGenericTargetInfo(TT.str(), FeaturesValue.getString(), CPU);
}

void LVDWARFReader::mapRangeAddress(const ObjectFile &Obj) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
if (VirtReg.isPhysical())
continue;

if (!VirtReg.isValid())
continue;

if (!VRM->hasPhys(VirtReg))
continue;

Expand Down
100 changes: 100 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
; RUN: llc %s -o %t.o -mcpu=gfx1030 -filetype=obj -O0
; RUN: llvm-debuginfo-analyzer %t.o --print=all --attribute=all | FileCheck %s

; This test compiles this module with AMDGPU backend under -O0,
; and makes sure llvm-debuginfo-analyzer works for it.

; Simple checks to make sure llvm-debuginfo-analzyer didn't fail early.
; CHECK: Logical View:
; CHECK: {CompileUnit}
; CHECK-DAG: {Parameter} 'dtid' -> [0x{{[a-f0-9]+}}]'uint3'
; CHECK-DAG: {Variable} 'my_var2' -> [0x{{[a-f0-9]+}}]'float'
; CHECK-DAG: {Line} {{.+}}basic_var.hlsl
; CHECK: {Code} 's_endpgm'

source_filename = "module"
target triple = "amdgcn-amd-amdpal"

%dx.types.ResRet.f32 = type { float, float, float, float, i32 }

define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 {
%LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28
%WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28
%pc = call i64 @llvm.amdgcn.s.getpc(), !dbg !28
%offset = shl i32 %WorkgroupId.i0, 6, !dbg !28
%dtid = add i32 %LocalInvocationId.i0, %offset, !dbg !28
#dbg_value(i32 %dtid, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28)
%pc_hi = and i64 %pc, -4294967296, !dbg !30
%zext = zext i32 %userdata4 to i64, !dbg !30
%ptr_val = or disjoint i64 %pc_hi, %zext, !dbg !30
%ptr = inttoptr i64 %ptr_val to ptr addrspace(4), !dbg !30
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %ptr, i32 4), "dereferenceable"(ptr addrspace(4) %ptr, i32 -1) ], !dbg !30
%uav_0 = load <4 x i32>, ptr addrspace(4) %ptr, align 4, !dbg !30, !invariant.load !2
%uav_load_1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %uav_0, i32 %dtid, i32 0, i32 0, i32 0), !dbg !30
#dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32)
%mul = fmul reassoc arcp contract afn float %uav_load_1, 2.000000e+00, !dbg !33
#dbg_value(float %mul, !34, !DIExpression(), !35)
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %ptr, i32 4), "dereferenceable"(ptr addrspace(4) %ptr, i32 -1) ], !dbg !36
%uav_1_ptr = getelementptr i8, ptr addrspace(4) %ptr, i64 32, !dbg !36
%.upto01 = insertelement <4 x float> poison, float %mul, i64 0, !dbg !36
%filled_vector = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36
%uav_1 = load <4 x i32>, ptr addrspace(4) %uav_1_ptr, align 4, !dbg !36, !invariant.load !2
call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %filled_vector, <4 x i32> %uav_1, i32 %dtid, i32 0, i32 0, i32 0), !dbg !36
ret void, !dbg !37
}

declare noundef i64 @llvm.amdgcn.s.getpc() #1

declare void @llvm.assume(i1 noundef) #2

declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3

declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4

attributes #0 = { memory(readwrite) }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) }
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12, !13}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3)
!1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "")
!2 = !{}
!3 = !{!4, !10}
!4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression())
!5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true)
!6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer<float>", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7)
!7 = !{!8}
!8 = !DITemplateTypeParameter(name: "element", type: !9)
!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
!11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true)
!12 = !{i32 2, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!15 = !DISubroutineType(types: !16)
!16 = !{null, !17}
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18)
!18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector<unsigned int, 3>", file: !1, size: 96, align: 32, elements: !19, templateParams: !24)
!19 = !{!20, !22, !23}
!20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic)
!21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
!23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic)
!24 = !{!25, !26}
!25 = !DITemplateTypeParameter(name: "element", type: !21)
!26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3)
!27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!28 = !DILocation(line: 7, column: 17, scope: !14)
!29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17)
!30 = !DILocation(line: 11, column: 18, scope: !14)
!31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9)
!32 = !DILocation(line: 11, column: 9, scope: !14)
!33 = !DILocation(line: 14, column: 26, scope: !14)
!34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9)
!35 = !DILocation(line: 14, column: 9, scope: !14)
!36 = !DILocation(line: 17, column: 14, scope: !14)
!37 = !DILocation(line: 19, column: 1, scope: !14)
131 changes: 131 additions & 0 deletions llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# RUN: llc %s -o - -mcpu=gfx1030 -O0 -run-pass=si-pre-allocate-wwm-regs | FileCheck %s

# Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass

# CHECK: $vgpr0 = IMPLICIT_DEF
# CHECK: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, $vgpr0

--- |
target triple = "amdgcn-amd-amdpal"
%dx.types.ResRet.f32 = type { float, float, float, float, i32 }

; Function Attrs: memory(readwrite)
define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !5 {
#dbg_value(i32 poison, !19, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !20)
#dbg_value(%dx.types.ResRet.f32 poison, !21, !DIExpression(), !23)
ret void, !dbg !24
}

attributes #0 = { memory(readwrite) "amdgpu-prealloc-sgpr-spill-vgprs" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2)
!1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 5}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !6, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !9)
!9 = !DICompositeType(tag: DW_TAG_class_type, name: "vector<unsigned int, 3>", file: !1, size: 96, align: 32, elements: !10, templateParams: !15)
!10 = !{!11, !13, !14}
!11 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !9, file: !1, baseType: !12, size: 32, align: 32, flags: DIFlagPublic)
!12 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!13 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !9, file: !1, baseType: !12, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
!14 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !9, file: !1, baseType: !12, size: 32, align: 32, offset: 64, flags: DIFlagPublic)
!15 = !{!16, !17}
!16 = !DITemplateTypeParameter(name: "element", type: !12)
!17 = !DITemplateValueParameter(name: "element_count", type: !18, value: i32 3)
!18 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!19 = !DILocalVariable(name: "dtid", arg: 1, scope: !5, file: !1, line: 7, type: !8)
!20 = !DILocation(line: 7, column: 17, scope: !5)
!21 = !DILocalVariable(name: "my_var", scope: !5, file: !1, line: 11, type: !22)
!22 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!23 = !DILocation(line: 11, column: 9, scope: !5)
!24 = !DILocation(line: 19, column: 1, scope: !5)
...
---
name: _amdgpu_cs_main
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
noPhis: true
isSSA: false
noVRegs: false
hasFakeUses: false
callsEHReturn: false
callsUnwindInit: false
hasEHContTarget: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
liveins: []
fixedStack: []
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo:
explicitKernArgSize: 0
maxKernArgAlign: 4
ldsSize: 0
gdsSize: 0
dynLDSAlign: 1
isEntryFunction: true
isChainFunction: false
noSignedZerosFPMath: false
memoryBound: false
waveLimiter: false
hasSpilledSGPRs: true
hasSpilledVGPRs: false
scratchRSrcReg: '$private_rsrc_reg'
frameOffsetReg: '$fp_reg'
stackPtrOffsetReg: '$sgpr32'
bytesInStackArgArea: 0
returnsVoid: true
argumentInfo:
privateSegmentWaveByteOffset: { reg: '$sgpr6' }
psInputAddr: 0
psInputEnable: 0
maxMemoryClusterDWords: 8
mode:
ieee: false
dx10-clamp: true
fp32-input-denormals: false
fp32-output-denormals: false
fp64-fp16-input-denormals: true
fp64-fp16-output-denormals: true
highBitsOf32BitAddress: 0
occupancy: 16
vgprForAGPRCopy: ''
sgprForEXECCopy: '$sgpr12_sgpr13'
longBranchReservedReg: ''
hasInitWholeWave: false
dynamicVGPRBlockSize: 0
scratchReservedForDynamicVGPRs: 0
body: |
bb.0 (%ir-block.0):
liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2

%0:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %2
renamable $sgpr3 = COPY killed $sgpr2
renamable $sgpr4 = S_MOV_B32 6
%3:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %0, implicit $exec
DBG_VALUE %3, $noreg, !19, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !20
DBG_VALUE $noreg, $noreg, !21, !DIExpression(), debug-location !23
S_ENDPGM 0
...