Skip to content

Commit 449ac61

Browse files
rovkaaokblast
authored andcommitted
[AMDGPU] Remove subtarget features for dynamic VGPRs (#160822)
Users of the backend are expected to enable dynamic VGPRs via the `amdgpu-dynamic-vgpr-block-size` attribute instead of the subtarget features (see #133444).
1 parent 155e824 commit 449ac61

File tree

6 files changed

+213
-43
lines changed

6 files changed

+213
-43
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,20 +1411,6 @@ def FeatureGloballyAddressableScratch : SubtargetFeature<
14111411
"FLAT instructions can access scratch memory for any thread in any wave"
14121412
>;
14131413

1414-
// FIXME: Remove after all users are migrated to attribute.
1415-
def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
1416-
"DynamicVGPR",
1417-
"true",
1418-
"Enable dynamic VGPR mode"
1419-
>;
1420-
1421-
// FIXME: Remove after all users are migrated to attribute.
1422-
def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32",
1423-
"DynamicVGPRBlockSize32",
1424-
"true",
1425-
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
1426-
>;
1427-
14281414
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
14291415
// restoring the callee-saved registers.
14301416
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,11 +1353,6 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
13531353
if (DynamicVGPRBlockSize != 0)
13541354
return DynamicVGPRBlockSize;
13551355

1356-
// Temporarily check the subtarget feature, until we fully switch to using
1357-
// attributes.
1358-
if (STI->getFeatureBits().test(FeatureDynamicVGPR))
1359-
return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16;
1360-
13611356
bool IsWave32 = EnableWavefrontSize32
13621357
? *EnableWavefrontSize32
13631358
: STI->getFeatureBits().test(FeatureWavefrontSize32);
@@ -1412,10 +1407,7 @@ unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
14121407
if (Features.test(FeatureGFX90AInsts))
14131408
return 512;
14141409

1415-
// Temporarily check the subtarget feature, until we fully switch to using
1416-
// attributes.
1417-
if (DynamicVGPRBlockSize != 0 ||
1418-
STI->getFeatureBits().test(FeatureDynamicVGPR))
1410+
if (DynamicVGPRBlockSize != 0)
14191411
// On GFX12 we can allocate at most 8 blocks of VGPRs.
14201412
return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
14211413
return getAddressableNumArchVGPRs(STI);

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
22
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
3-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
43

54
; CHECK: .amdgpu_pal_metadata
65
; CHECK-NEXT: ---
76
; CHECK-NEXT: amdpal.pipelines:
87
; CHECK-NEXT: - .api: Vulkan
98
; CHECK-NEXT: .compute_registers:
10-
; DVGPR-NEXT: .dynamic_vgpr_en: true
119
; CHECK-NEXT: .tg_size_en: true
1210
; CHECK-NEXT: .tgid_x_en: false
1311
; CHECK-NEXT: .tgid_y_en: false

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11,NODVGPR
2-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK,NODVGPR
3-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
43

54
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
6-
; NODVGPR: ; TotalNumSgprs: 4
7-
; DVGPR: ; TotalNumSgprs: 34
5+
; CHECK: ; TotalNumSgprs: 4
86
; CHECK: ; NumVgprs: 2
97
; CHECK: .amdgpu_pal_metadata
108
; CHECK-NEXT: ---
119
; CHECK-NEXT: amdpal.pipelines:
1210
; CHECK-NEXT: - .api: Vulkan
1311
; CHECK-NEXT: .compute_registers:
14-
; DVGPR-NEXT: .dynamic_vgpr_en: true
1512
; CHECK-NEXT: .tg_size_en: true
1613
; CHECK-NEXT: .tgid_x_en: false
1714
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +54,6 @@
5754
; CHECK-NEXT: .cs:
5855
; CHECK-NEXT: .checksum_value: 0x9444d7d0
5956
; CHECK-NEXT: .debug_mode: false
60-
; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70
6157
; CHECK-NEXT: .entry_point: _amdgpu_cs_main
6258
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6359
; CHECK-NEXT: .excp_en: 0
@@ -69,8 +65,7 @@
6965
; CHECK-NEXT: .mem_ordered: true
7066
; CHECK-NEXT: .scratch_en: false
7167
; CHECK-NEXT: .scratch_memory_size: 0
72-
; NODVGPR-NEXT: .sgpr_count: 0x4
73-
; DVGPR-NEXT: .sgpr_count: 0x22
68+
; CHECK-NEXT: .sgpr_count: 0x4
7469
; CHECK-NEXT: .sgpr_limit: 0x6a
7570
; CHECK-NEXT: .threadgroup_dimensions:
7671
; CHECK-NEXT: - 0x1
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
2+
3+
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
4+
; CHECK: ; TotalNumSgprs: 34
5+
; CHECK: ; NumVgprs: 2
6+
; CHECK: .amdgpu_pal_metadata
7+
; CHECK-NEXT: ---
8+
; CHECK-NEXT: amdpal.pipelines:
9+
; CHECK-NEXT: - .api: Vulkan
10+
; CHECK-NEXT: .compute_registers:
11+
; CHECK-NEXT: .dynamic_vgpr_en: true
12+
; CHECK-NEXT: .tg_size_en: true
13+
; CHECK-NEXT: .tgid_x_en: false
14+
; CHECK-NEXT: .tgid_y_en: false
15+
; CHECK-NEXT: .tgid_z_en: false
16+
; CHECK-NEXT: .tidig_comp_cnt: 0x1
17+
; CHECK-NEXT: .graphics_registers:
18+
; CHECK-NEXT: .ps_extra_lds_size: 0
19+
; CHECK-NEXT: .spi_ps_input_addr:
20+
; CHECK-NEXT: .ancillary_ena: false
21+
; CHECK-NEXT: .front_face_ena: true
22+
; CHECK-NEXT: .line_stipple_tex_ena: false
23+
; CHECK-NEXT: .linear_center_ena: true
24+
; CHECK-NEXT: .linear_centroid_ena: true
25+
; CHECK-NEXT: .linear_sample_ena: true
26+
; CHECK-NEXT: .persp_center_ena: true
27+
; CHECK-NEXT: .persp_centroid_ena: true
28+
; CHECK-NEXT: .persp_pull_model_ena: false
29+
; CHECK-NEXT: .persp_sample_ena: true
30+
; CHECK-NEXT: .pos_fixed_pt_ena: true
31+
; CHECK-NEXT: .pos_w_float_ena: false
32+
; CHECK-NEXT: .pos_x_float_ena: false
33+
; CHECK-NEXT: .pos_y_float_ena: false
34+
; CHECK-NEXT: .pos_z_float_ena: false
35+
; CHECK-NEXT: .sample_coverage_ena: false
36+
; CHECK-NEXT: .spi_ps_input_ena:
37+
; CHECK-NEXT: .ancillary_ena: false
38+
; CHECK-NEXT: .front_face_ena: false
39+
; CHECK-NEXT: .line_stipple_tex_ena: false
40+
; CHECK-NEXT: .linear_center_ena: false
41+
; CHECK-NEXT: .linear_centroid_ena: false
42+
; CHECK-NEXT: .linear_sample_ena: false
43+
; CHECK-NEXT: .persp_center_ena: false
44+
; CHECK-NEXT: .persp_centroid_ena: false
45+
; CHECK-NEXT: .persp_pull_model_ena: false
46+
; CHECK-NEXT: .persp_sample_ena: true
47+
; CHECK-NEXT: .pos_fixed_pt_ena: false
48+
; CHECK-NEXT: .pos_w_float_ena: false
49+
; CHECK-NEXT: .pos_x_float_ena: false
50+
; CHECK-NEXT: .pos_y_float_ena: false
51+
; CHECK-NEXT: .pos_z_float_ena: false
52+
; CHECK-NEXT: .sample_coverage_ena: false
53+
; CHECK-NEXT: .hardware_stages:
54+
; CHECK-NEXT: .cs:
55+
; CHECK-NEXT: .checksum_value: 0x9444d7d0
56+
; CHECK-NEXT: .debug_mode: false
57+
; CHECK-NEXT: .dynamic_vgpr_saved_count: 0x70
58+
; CHECK-NOT: .entry_point: _amdgpu_cs_main
59+
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
60+
; CHECK-NEXT: .excp_en: 0
61+
; CHECK-NEXT: .float_mode: 0xc0
62+
; CHECK-NEXT: .forward_progress: true
63+
; GFX11-NEXT: .ieee_mode: false
64+
; CHECK-NEXT: .image_op: false
65+
; CHECK-NEXT: .lds_size: 0
66+
; CHECK-NEXT: .mem_ordered: true
67+
; CHECK-NEXT: .scratch_en: false
68+
; CHECK-NEXT: .scratch_memory_size: 0
69+
; CHECK-NEXT: .sgpr_count: 0x22
70+
; CHECK-NEXT: .sgpr_limit: 0x6a
71+
; CHECK-NEXT: .threadgroup_dimensions:
72+
; CHECK-NEXT: - 0x1
73+
; CHECK-NEXT: - 0x400
74+
; CHECK-NEXT: - 0x1
75+
; CHECK-NEXT: .trap_present: false
76+
; CHECK-NEXT: .user_data_reg_map:
77+
; CHECK-NEXT: - 0x10000000
78+
; CHECK-NEXT: - 0xffffffff
79+
; CHECK-NEXT: - 0
80+
; CHECK-NEXT: - 0xffffffff
81+
; CHECK-NEXT: - 0xffffffff
82+
; CHECK-NEXT: - 0xffffffff
83+
; CHECK-NEXT: - 0xffffffff
84+
; CHECK-NEXT: - 0xffffffff
85+
; CHECK-NEXT: - 0xffffffff
86+
; CHECK-NEXT: - 0xffffffff
87+
; CHECK-NEXT: - 0xffffffff
88+
; CHECK-NEXT: - 0xffffffff
89+
; CHECK-NEXT: - 0xffffffff
90+
; CHECK-NEXT: - 0xffffffff
91+
; CHECK-NEXT: - 0xffffffff
92+
; CHECK-NEXT: - 0xffffffff
93+
; CHECK-NEXT: - 0xffffffff
94+
; CHECK-NEXT: - 0xffffffff
95+
; CHECK-NEXT: - 0xffffffff
96+
; CHECK-NEXT: - 0xffffffff
97+
; CHECK-NEXT: - 0xffffffff
98+
; CHECK-NEXT: - 0xffffffff
99+
; CHECK-NEXT: - 0xffffffff
100+
; CHECK-NEXT: - 0xffffffff
101+
; CHECK-NEXT: - 0xffffffff
102+
; CHECK-NEXT: - 0xffffffff
103+
; CHECK-NEXT: - 0xffffffff
104+
; CHECK-NEXT: - 0xffffffff
105+
; CHECK-NEXT: - 0xffffffff
106+
; CHECK-NEXT: - 0xffffffff
107+
; CHECK-NEXT: - 0xffffffff
108+
; CHECK-NEXT: - 0xffffffff
109+
; CHECK-NEXT: .user_sgprs: 0x3
110+
; CHECK-NEXT: .vgpr_count: 0x2
111+
; CHECK-NEXT: .vgpr_limit: 0x100
112+
; CHECK-NEXT: .wavefront_size: 0x40
113+
; CHECK-NEXT: .wgp_mode: false
114+
; CHECK-NEXT: .gs:
115+
; CHECK-NEXT: .debug_mode: false
116+
; CHECK-NOT: .entry_point: _amdgpu_gs_main
117+
; CHECK-NEXT: .entry_point_symbol: gs_shader
118+
; CHECK-NEXT: .forward_progress: true
119+
; GFX11-NEXT: .ieee_mode: false
120+
; CHECK-NEXT: .lds_size: 0x200
121+
; CHECK-NEXT: .mem_ordered: true
122+
; CHECK-NEXT: .scratch_en: false
123+
; CHECK-NEXT: .scratch_memory_size: 0
124+
; CHECK-NEXT: .sgpr_count: 0x1
125+
; CHECK-NEXT: .vgpr_count: 0x1
126+
; CHECK-NEXT: .wgp_mode: true
127+
; CHECK-NEXT: .hs:
128+
; CHECK-NEXT: .debug_mode: false
129+
; CHECK-NOT: .entry_point: _amdgpu_hs_main
130+
; CHECK-NEXT: .entry_point_symbol: hs_shader
131+
; CHECK-NEXT: .forward_progress: true
132+
; GFX11-NEXT: .ieee_mode: false
133+
; CHECK-NEXT: .lds_size: 0x1000
134+
; CHECK-NEXT: .mem_ordered: true
135+
; CHECK-NEXT: .scratch_en: false
136+
; CHECK-NEXT: .scratch_memory_size: 0
137+
; CHECK-NEXT: .sgpr_count: 0x1
138+
; CHECK-NEXT: .vgpr_count: 0x1
139+
; CHECK-NEXT: .wgp_mode: true
140+
; CHECK-NEXT: .ps:
141+
; CHECK-NEXT: .debug_mode: false
142+
; CHECK-NOT: .entry_point: _amdgpu_ps_main
143+
; CHECK-NEXT: .entry_point_symbol: ps_shader
144+
; CHECK-NEXT: .forward_progress: true
145+
; GFX11-NEXT: .ieee_mode: false
146+
; CHECK-NEXT: .lds_size: 0
147+
; CHECK-NEXT: .mem_ordered: true
148+
; CHECK-NEXT: .scratch_en: false
149+
; CHECK-NEXT: .scratch_memory_size: 0
150+
; CHECK-NEXT: .sgpr_count: 0x1
151+
; CHECK-NEXT: .vgpr_count: 0x1
152+
; CHECK-NEXT: .wgp_mode: true
153+
; CHECK: .registers: {}
154+
; CHECK:amdpal.version:
155+
; CHECK-NEXT: - 0x3
156+
; CHECK-NEXT: - 0x6
157+
; CHECK-NEXT:...
158+
; CHECK-NEXT: .end_amdgpu_pal_metadata
159+
160+
define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 {
161+
.entry:
162+
%i = call i64 @llvm.amdgcn.s.getpc()
163+
%i1 = and i64 %i, -4294967296
164+
%i2 = zext i32 %arg1 to i64
165+
%i3 = or i64 %i1, %i2
166+
%i4 = inttoptr i64 %i3 to ptr addrspace(4)
167+
%i5 = and i32 %arg2, 1023
168+
%i6 = lshr i32 %arg2, 10
169+
%i7 = and i32 %i6, 1023
170+
%i8 = add nuw nsw i32 %i7, %i5
171+
%i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16
172+
%.idx = shl nuw nsw i32 %i8, 2
173+
call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0)
174+
ret void
175+
}
176+
177+
define dllexport amdgpu_ps void @ps_shader() #1 {
178+
ret void
179+
}
180+
181+
@LDS.GS = external addrspace(3) global [1 x i32], align 4
182+
183+
define dllexport amdgpu_gs void @gs_shader() {
184+
%ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0
185+
store i32 0, ptr addrspace(3) %ptr, align 4
186+
ret void
187+
}
188+
189+
@LDS.HS = external addrspace(3) global [1024 x i32], align 4
190+
191+
define dllexport amdgpu_hs void @hs_shader() {
192+
%ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0
193+
store i32 0, ptr addrspace(3) %ptr, align 4
194+
ret void
195+
}
196+
197+
!amdgpu.pal.metadata.msgpack = !{!0}
198+
199+
attributes #0 = { nounwind memory(readwrite) "target-features"=",+wavefrontsize64,+cumode" "amdgpu-dynamic-vgpr-block-size"="16" }
200+
201+
attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" "amdgpu-dynamic-vgpr-block-size"="16" }
202+
203+
!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\06"}
204+
!1 = !{i32 7}

llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11,NODVGPR
2-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK,NODVGPR
3-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
43

54
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
6-
; NODVGPR: ; TotalNumSgprs: 4
7-
; DVGPR: ; TotalNumSgprs: 34
5+
; CHECK: ; TotalNumSgprs: 4
86
; CHECK: ; NumVgprs: 2
97
; CHECK: .amdgpu_pal_metadata
108
; CHECK-NEXT: ---
119
; CHECK-NEXT: amdpal.pipelines:
1210
; CHECK-NEXT: - .api: Vulkan
1311
; CHECK-NEXT: .compute_registers:
14-
; DVGPR-NEXT: .dynamic_vgpr_en: true
1512
; CHECK-NEXT: .tg_size_en: true
1613
; CHECK-NEXT: .tgid_x_en: false
1714
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +54,6 @@
5754
; CHECK-NEXT: .cs:
5855
; CHECK-NEXT: .checksum_value: 0x9444d7d0
5956
; CHECK-NEXT: .debug_mode: false
60-
; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70
6157
; CHECK-NOT: .entry_point: _amdgpu_cs_main
6258
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6359
; CHECK-NEXT: .excp_en: 0
@@ -69,8 +65,7 @@
6965
; CHECK-NEXT: .mem_ordered: true
7066
; CHECK-NEXT: .scratch_en: false
7167
; CHECK-NEXT: .scratch_memory_size: 0
72-
; NODVGPR-NEXT: .sgpr_count: 0x4
73-
; DVGPR-NEXT: .sgpr_count: 0x22
68+
; CHECK-NEXT: .sgpr_count: 0x4
7469
; CHECK-NEXT: .sgpr_limit: 0x6a
7570
; CHECK-NEXT: .threadgroup_dimensions:
7671
; CHECK-NEXT: - 0x1

0 commit comments

Comments
 (0)