|
| 1 | +# RUN: llc %s -o - -mcpu=gfx1030 -O0 -run-pass=si-pre-allocate-wwm-regs | FileCheck %s |
| 2 | + |
| 3 | +# Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass |
| 4 | + |
| 5 | +# CHECK: S_ENDPGM |
| 6 | + |
| 7 | +--- | |
| 8 | + source_filename = "module" |
| 9 | + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" |
| 10 | + target triple = "amdgcn-amd-amdpal" |
| 11 | + |
| 12 | + %dx.types.ResRet.f32 = type { float, float, float, float, i32 } |
| 13 | + |
| 14 | + define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { |
| 15 | + %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 |
| 16 | + %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 |
| 17 | + %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 |
| 18 | + %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28 |
| 19 | + %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28 |
| 20 | + #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) |
| 21 | + %4 = and i64 %1, -4294967296, !dbg !30 |
| 22 | + %5 = zext i32 %userdata4 to i64, !dbg !30 |
| 23 | + %6 = or disjoint i64 %4, %5, !dbg !30 |
| 24 | + %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30, !amdgpu.uniform !2 |
| 25 | + %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2 |
| 26 | + %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30 |
| 27 | + #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32) |
| 28 | + %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33 |
| 29 | + #dbg_value(float %10, !34, !DIExpression(), !35) |
| 30 | + %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36, !amdgpu.uniform !2 |
| 31 | + %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36 |
| 32 | + %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 |
| 33 | + %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2 |
| 34 | + call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36 |
| 35 | + ret void, !dbg !37 |
| 36 | + } |
| 37 | + |
| 38 | + declare noundef i64 @llvm.amdgcn.s.getpc() #1 |
| 39 | + declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3 |
| 40 | + declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 |
| 41 | + |
| 42 | + attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } |
| 43 | + attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" } |
| 44 | + attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx1030" } |
| 45 | + attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx1030" } |
| 46 | + attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx1030" } |
| 47 | + |
| 48 | + !llvm.dbg.cu = !{!0} |
| 49 | + !llvm.module.flags = !{!12, !13} |
| 50 | + |
| 51 | + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3) |
| 52 | + !1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "") |
| 53 | + !2 = !{} |
| 54 | + !3 = !{!4, !10} |
| 55 | + !4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) |
| 56 | + !5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true) |
| 57 | + !6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer<float>", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7) |
| 58 | + !7 = !{!8} |
| 59 | + !8 = !DITemplateTypeParameter(name: "element", type: !9) |
| 60 | + !9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) |
| 61 | + !10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) |
| 62 | + !11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true) |
| 63 | + !12 = !{i32 2, !"Dwarf Version", i32 5} |
| 64 | + !13 = !{i32 2, !"Debug Info Version", i32 3} |
| 65 | + !14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) |
| 66 | + !15 = !DISubroutineType(types: !16) |
| 67 | + !16 = !{null, !17} |
| 68 | + !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18) |
| 69 | + !18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector<unsigned int, 3>", file: !1, size: 96, align: 32, elements: !19, templateParams: !24) |
| 70 | + !19 = !{!20, !22, !23} |
| 71 | + !20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic) |
| 72 | + !21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) |
| 73 | + !22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic) |
| 74 | + !23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic) |
| 75 | + !24 = !{!25, !26} |
| 76 | + !25 = !DITemplateTypeParameter(name: "element", type: !21) |
| 77 | + !26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3) |
| 78 | + !27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) |
| 79 | + !28 = !DILocation(line: 7, column: 17, scope: !14) |
| 80 | + !29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17) |
| 81 | + !30 = !DILocation(line: 11, column: 18, scope: !14) |
| 82 | + !31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9) |
| 83 | + !32 = !DILocation(line: 11, column: 9, scope: !14) |
| 84 | + !33 = !DILocation(line: 14, column: 26, scope: !14) |
| 85 | + !34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9) |
| 86 | + !35 = !DILocation(line: 14, column: 9, scope: !14) |
| 87 | + !36 = !DILocation(line: 17, column: 14, scope: !14) |
| 88 | + !37 = !DILocation(line: 19, column: 1, scope: !14) |
| 89 | +... |
| 90 | +--- |
| 91 | +name: _amdgpu_cs_main |
| 92 | +alignment: 1 |
| 93 | +exposesReturnsTwice: false |
| 94 | +legalized: false |
| 95 | +regBankSelected: false |
| 96 | +selected: false |
| 97 | +failedISel: false |
| 98 | +tracksRegLiveness: true |
| 99 | +hasWinCFI: false |
| 100 | +noPhis: true |
| 101 | +isSSA: false |
| 102 | +noVRegs: false |
| 103 | +hasFakeUses: false |
| 104 | +callsEHReturn: false |
| 105 | +callsUnwindInit: false |
| 106 | +hasEHContTarget: false |
| 107 | +hasEHScopes: false |
| 108 | +hasEHFunclets: false |
| 109 | +isOutlined: false |
| 110 | +debugInstrRef: false |
| 111 | +failsVerification: false |
| 112 | +tracksDebugUserValues: false |
| 113 | +fixedStack: [] |
| 114 | +stack: [] |
| 115 | +entry_values: [] |
| 116 | +callSites: [] |
| 117 | +debugValueSubstitutions: [] |
| 118 | +constants: [] |
| 119 | +machineFunctionInfo: |
| 120 | + explicitKernArgSize: 0 |
| 121 | + maxKernArgAlign: 4 |
| 122 | + ldsSize: 0 |
| 123 | + gdsSize: 0 |
| 124 | + dynLDSAlign: 1 |
| 125 | + isEntryFunction: true |
| 126 | + isChainFunction: false |
| 127 | + noSignedZerosFPMath: false |
| 128 | + memoryBound: false |
| 129 | + waveLimiter: false |
| 130 | + hasSpilledSGPRs: true |
| 131 | + hasSpilledVGPRs: false |
| 132 | + scratchRSrcReg: '$private_rsrc_reg' |
| 133 | + frameOffsetReg: '$fp_reg' |
| 134 | + stackPtrOffsetReg: '$sgpr32' |
| 135 | + bytesInStackArgArea: 0 |
| 136 | + returnsVoid: true |
| 137 | + argumentInfo: |
| 138 | + privateSegmentWaveByteOffset: { reg: '$sgpr6' } |
| 139 | + psInputAddr: 0 |
| 140 | + psInputEnable: 0 |
| 141 | + maxMemoryClusterDWords: 8 |
| 142 | + mode: |
| 143 | + ieee: false |
| 144 | + dx10-clamp: true |
| 145 | + fp32-input-denormals: false |
| 146 | + fp32-output-denormals: false |
| 147 | + fp64-fp16-input-denormals: true |
| 148 | + fp64-fp16-output-denormals: true |
| 149 | + highBitsOf32BitAddress: 0 |
| 150 | + occupancy: 16 |
| 151 | + vgprForAGPRCopy: '' |
| 152 | + sgprForEXECCopy: '$sgpr12_sgpr13' |
| 153 | + longBranchReservedReg: '' |
| 154 | + hasInitWholeWave: false |
| 155 | + dynamicVGPRBlockSize: 0 |
| 156 | + scratchReservedForDynamicVGPRs: 0 |
| 157 | +body: | |
| 158 | + bb.0 (%ir-block.0): |
| 159 | + liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 |
| 160 | +
|
| 161 | + %8:vgpr_32 = COPY killed $vgpr2 |
| 162 | + %7:vgpr_32 = COPY killed $vgpr1 |
| 163 | + %6:vgpr_32 = COPY killed $vgpr0 |
| 164 | + renamable $sgpr0 = COPY killed $sgpr4 |
| 165 | + %39:vgpr_32 = IMPLICIT_DEF |
| 166 | + %39:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %39 |
| 167 | + renamable $sgpr3 = COPY killed $sgpr2 |
| 168 | + renamable $sgpr2 = COPY $sgpr1 |
| 169 | + $sgpr1 = SI_RESTORE_S32_FROM_VGPR %39, 0 |
| 170 | + dead renamable $sgpr4 = IMPLICIT_DEF |
| 171 | + dead renamable $sgpr4 = IMPLICIT_DEF |
| 172 | + dead renamable $sgpr4 = IMPLICIT_DEF |
| 173 | + undef %38.sub0:vreg_96 = COPY %6 |
| 174 | + %38.sub1:vreg_96 = COPY %7 |
| 175 | + dead %38.sub2:vreg_96 = COPY %8 |
| 176 | + undef renamable $sgpr4 = COPY renamable $sgpr3, implicit-def $sgpr4_sgpr5_sgpr6 |
| 177 | + renamable $sgpr5 = COPY killed renamable $sgpr1 |
| 178 | + renamable $sgpr6 = COPY killed renamable $sgpr0 |
| 179 | + dead renamable $sgpr8_sgpr9_sgpr10 = IMPLICIT_DEF |
| 180 | + renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo debug-location !28 |
| 181 | + renamable $sgpr4 = S_MOV_B32 6 |
| 182 | + %16:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %6, implicit $exec, debug-location !28 |
| 183 | + DBG_VALUE %16, $noreg, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !28 |
| 184 | + renamable $sgpr3 = S_MOV_B32 -1 |
| 185 | + renamable $sgpr4 = S_MOV_B32 0 |
| 186 | + undef renamable $sgpr6 = COPY renamable $sgpr4, implicit-def $sgpr6_sgpr7 |
| 187 | + renamable $sgpr7 = COPY killed renamable $sgpr3 |
| 188 | + renamable $sgpr0_sgpr1 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr6_sgpr7, implicit-def dead $scc, debug-location !30 |
| 189 | + renamable $sgpr5 = S_MOV_B32 0, debug-location !30 |
| 190 | + undef renamable $sgpr2 = COPY killed renamable $sgpr2, implicit-def $sgpr2_sgpr3, debug-location !30 |
| 191 | + renamable $sgpr3 = COPY killed renamable $sgpr5, debug-location !30 |
| 192 | + renamable $sgpr0_sgpr1 = disjoint S_OR_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr2_sgpr3, implicit-def dead $scc, debug-location !30 |
| 193 | + renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, debug-location !30 :: (invariant load (s128) from %ir.7, align 4, addrspace 4) |
| 194 | + renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 32, 0, debug-location !36 :: (invariant load (s128) from %ir.11, align 4, addrspace 4) |
| 195 | + %26:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %16, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !30 :: (dereferenceable load (s32), align 1, addrspace 8) |
| 196 | + DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32 |
| 197 | + %27:vgpr_32 = arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %26, 0, %26, 0, 0, implicit $mode, implicit $exec, debug-location !33 |
| 198 | + DBG_VALUE %27, $noreg, !34, !DIExpression(), debug-location !35 |
| 199 | + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 |
| 200 | + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 |
| 201 | + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 |
| 202 | + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 |
| 203 | + undef %37.sub0:vreg_128 = COPY %27, debug-location !36 |
| 204 | + %37.sub1:vreg_128 = COPY %27, debug-location !36 |
| 205 | + %37.sub2:vreg_128 = COPY %27, debug-location !36 |
| 206 | + %37.sub3:vreg_128 = COPY %27, debug-location !36 |
| 207 | + %29:vreg_128 = COPY %37, debug-location !36 |
| 208 | + BUFFER_STORE_FORMAT_XYZW_IDXEN_exact %29, %16, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !36 :: (dereferenceable store (s128), align 1, addrspace 8) |
| 209 | + S_ENDPGM 0, debug-location !37 |
| 210 | +... |
0 commit comments