@@ -43,14 +43,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
4343; GFX12-SDAG: ; %bb.0: ; %main_body
4444; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
4545; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
46- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
4746; GFX12-SDAG-NEXT: ; return to shader part epilog
4847;
4948; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret:
5049; GFX12-GISEL: ; %bb.0: ; %main_body
5150; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
5251; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
53- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
5452; GFX12-GISEL-NEXT: ; return to shader part epilog
5553main_body:
5654 %unused = call <2 x half > @llvm.amdgcn.image.atomic.pk.add.f16.1d.v2f16.v2f16 (<2 x half > %data , i32 %s , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -81,14 +79,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
8179; GFX12-SDAG: ; %bb.0: ; %main_body
8280; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
8381; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
84- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
8582; GFX12-SDAG-NEXT: ; return to shader part epilog
8683;
8784; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret:
8885; GFX12-GISEL: ; %bb.0: ; %main_body
8986; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
9087; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
91- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
9288; GFX12-GISEL-NEXT: ; return to shader part epilog
9389main_body:
9490 %unused = call <4 x half > @llvm.amdgcn.image.atomic.pk.add.f16.1d.v4f16.v4f16 (<4 x half > %data , i32 %s , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -128,14 +124,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
128124; GFX12-SDAG: ; %bb.0: ; %main_body
129125; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
130126; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
131- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
132127; GFX12-SDAG-NEXT: ; return to shader part epilog
133128;
134129; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret:
135130; GFX12-GISEL: ; %bb.0: ; %main_body
136131; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
137132; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
138- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
139133; GFX12-GISEL-NEXT: ; return to shader part epilog
140134main_body:
141135 %unused = call <2 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2bf16.v2bf16 (<2 x bfloat> %data , i32 %s , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -175,14 +169,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
175169; GFX12-SDAG: ; %bb.0: ; %main_body
176170; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
177171; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
178- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
179172; GFX12-SDAG-NEXT: ; return to shader part epilog
180173;
181174; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret:
182175; GFX12-GISEL: ; %bb.0: ; %main_body
183176; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
184177; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
185- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
186178; GFX12-GISEL-NEXT: ; return to shader part epilog
187179main_body:
188180 %unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16 (<4 x bfloat> %data , i32 %s , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -192,16 +184,14 @@ main_body:
192184define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt (<8 x i32 > inreg %rsrc , <4 x bfloat> %data , i32 %s ) {
193185; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt:
194186; GFX12-SDAG: ; %bb.0: ; %main_body
195- ; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
187+ ; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
196188; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
197- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
198189; GFX12-SDAG-NEXT: ; return to shader part epilog
199190;
200191; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt:
201192; GFX12-GISEL: ; %bb.0: ; %main_body
202- ; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
193+ ; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
203194; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
204- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
205195; GFX12-GISEL-NEXT: ; return to shader part epilog
206196main_body:
207197 %unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16 (<4 x bfloat> %data , i32 %s , <8 x i32 > %rsrc , i32 0 , i32 2 )
0 commit comments