11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s -check-prefix=GFX942
33; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
4+ ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefix=GFX1250
45
56declare <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 , i32 , i1 )
67declare <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
@@ -30,6 +31,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
3031; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
3132; GFX12-NEXT: global_inv scope:SCOPE_SYS
3233; GFX12-NEXT: s_endpgm
34+ ;
35+ ; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat:
36+ ; GFX1250: ; %bb.0:
37+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
38+ ; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
39+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
40+ ; GFX1250-NEXT: s_wait_storecnt 0x0
41+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
42+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
43+ ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
44+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
45+ ; GFX1250-NEXT: s_endpgm
3346 %ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
3447 ret void
3548}
@@ -59,6 +72,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
5972; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
6073; GFX12-NEXT: global_inv scope:SCOPE_SYS
6174; GFX12-NEXT: s_endpgm
75+ ;
76+ ; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
77+ ; GFX1250: ; %bb.0:
78+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
79+ ; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
80+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
81+ ; GFX1250-NEXT: s_wait_storecnt 0x0
82+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
83+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
84+ ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
85+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
86+ ; GFX1250-NEXT: s_endpgm
6287 %ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
6388 ret void
6489}
@@ -88,6 +113,19 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
88113; GFX12-NEXT: global_inv scope:SCOPE_SYS
89114; GFX12-NEXT: s_wait_loadcnt 0x0
90115; GFX12-NEXT: s_setpc_b64 s[30:31]
116+ ;
117+ ; GFX1250-LABEL: flat_atomic_fadd_f32_rtn_pat:
118+ ; GFX1250: ; %bb.0:
119+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
120+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
121+ ; GFX1250-NEXT: v_mov_b32_e32 v2, 4.0
122+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
123+ ; GFX1250-NEXT: s_wait_storecnt 0x0
124+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
125+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
126+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
127+ ; GFX1250-NEXT: s_wait_loadcnt 0x0
128+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
91129 %ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
92130 ret float %ret
93131}
@@ -112,6 +150,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr,
112150; GFX12-NEXT: s_wait_dscnt 0x0
113151; GFX12-NEXT: global_inv scope:SCOPE_SE
114152; GFX12-NEXT: s_endpgm
153+ ;
154+ ; GFX1250-LABEL: local_atomic_fadd_v2f16_noret:
155+ ; GFX1250: ; %bb.0:
156+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
157+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
158+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
159+ ; GFX1250-NEXT: ds_pk_add_f16 v0, v1
160+ ; GFX1250-NEXT: s_wait_dscnt 0x0
161+ ; GFX1250-NEXT: s_endpgm
115162 %ret = call <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 0 , i32 0 , i1 0 )
116163 ret void
117164}
@@ -137,6 +184,14 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
137184; GFX12-NEXT: global_inv scope:SCOPE_SE
138185; GFX12-NEXT: s_wait_loadcnt 0x0
139186; GFX12-NEXT: s_setpc_b64 s[30:31]
187+ ;
188+ ; GFX1250-LABEL: local_atomic_fadd_v2f16_rtn:
189+ ; GFX1250: ; %bb.0:
190+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
191+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
192+ ; GFX1250-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
193+ ; GFX1250-NEXT: s_wait_dscnt 0x0
194+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
140195 %ret = call <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 0 , i32 0 , i1 0 )
141196 ret <2 x half > %ret
142197}
@@ -161,6 +216,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr,
161216; GFX12-NEXT: s_wait_dscnt 0x0
162217; GFX12-NEXT: global_inv scope:SCOPE_SE
163218; GFX12-NEXT: s_endpgm
219+ ;
220+ ; GFX1250-LABEL: local_atomic_fadd_v2bf16_noret:
221+ ; GFX1250: ; %bb.0:
222+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
223+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
224+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
225+ ; GFX1250-NEXT: ds_pk_add_bf16 v0, v1
226+ ; GFX1250-NEXT: s_wait_dscnt 0x0
227+ ; GFX1250-NEXT: s_endpgm
164228 %ret = call <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
165229 ret void
166230}
@@ -186,6 +250,14 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
186250; GFX12-NEXT: global_inv scope:SCOPE_SE
187251; GFX12-NEXT: s_wait_loadcnt 0x0
188252; GFX12-NEXT: s_setpc_b64 s[30:31]
253+ ;
254+ ; GFX1250-LABEL: local_atomic_fadd_v2bf16_rtn:
255+ ; GFX1250: ; %bb.0:
256+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
257+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
258+ ; GFX1250-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
259+ ; GFX1250-NEXT: s_wait_dscnt 0x0
260+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
189261 %ret = call <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
190262 ret <2 x i16 > %ret
191263}
0 commit comments