Skip to content

Commit 68107bb

Browse files
authored
[AMDGPU] gfx1250 run line in the fp atomics test. NFC (#155306)
1 parent 33de419 commit 68107bb

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s -check-prefix=GFX942
33
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
4+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefix=GFX1250
45

56
declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
67
declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
@@ -30,6 +31,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
3031
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
3132
; GFX12-NEXT: global_inv scope:SCOPE_SYS
3233
; GFX12-NEXT: s_endpgm
34+
;
35+
; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat:
36+
; GFX1250: ; %bb.0:
37+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
38+
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
39+
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
40+
; GFX1250-NEXT: s_wait_storecnt 0x0
41+
; GFX1250-NEXT: s_wait_kmcnt 0x0
42+
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
43+
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
44+
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
45+
; GFX1250-NEXT: s_endpgm
3346
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
3447
ret void
3548
}
@@ -59,6 +72,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
5972
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
6073
; GFX12-NEXT: global_inv scope:SCOPE_SYS
6174
; GFX12-NEXT: s_endpgm
75+
;
76+
; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
77+
; GFX1250: ; %bb.0:
78+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
79+
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
80+
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
81+
; GFX1250-NEXT: s_wait_storecnt 0x0
82+
; GFX1250-NEXT: s_wait_kmcnt 0x0
83+
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
84+
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
85+
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
86+
; GFX1250-NEXT: s_endpgm
6287
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
6388
ret void
6489
}
@@ -88,6 +113,19 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
88113
; GFX12-NEXT: global_inv scope:SCOPE_SYS
89114
; GFX12-NEXT: s_wait_loadcnt 0x0
90115
; GFX12-NEXT: s_setpc_b64 s[30:31]
116+
;
117+
; GFX1250-LABEL: flat_atomic_fadd_f32_rtn_pat:
118+
; GFX1250: ; %bb.0:
119+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
120+
; GFX1250-NEXT: s_wait_kmcnt 0x0
121+
; GFX1250-NEXT: v_mov_b32_e32 v2, 4.0
122+
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
123+
; GFX1250-NEXT: s_wait_storecnt 0x0
124+
; GFX1250-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
125+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
126+
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
127+
; GFX1250-NEXT: s_wait_loadcnt 0x0
128+
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
91129
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
92130
ret float %ret
93131
}
@@ -112,6 +150,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr,
112150
; GFX12-NEXT: s_wait_dscnt 0x0
113151
; GFX12-NEXT: global_inv scope:SCOPE_SE
114152
; GFX12-NEXT: s_endpgm
153+
;
154+
; GFX1250-LABEL: local_atomic_fadd_v2f16_noret:
155+
; GFX1250: ; %bb.0:
156+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
157+
; GFX1250-NEXT: s_wait_kmcnt 0x0
158+
; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
159+
; GFX1250-NEXT: ds_pk_add_f16 v0, v1
160+
; GFX1250-NEXT: s_wait_dscnt 0x0
161+
; GFX1250-NEXT: s_endpgm
115162
%ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
116163
ret void
117164
}
@@ -137,6 +184,14 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
137184
; GFX12-NEXT: global_inv scope:SCOPE_SE
138185
; GFX12-NEXT: s_wait_loadcnt 0x0
139186
; GFX12-NEXT: s_setpc_b64 s[30:31]
187+
;
188+
; GFX1250-LABEL: local_atomic_fadd_v2f16_rtn:
189+
; GFX1250: ; %bb.0:
190+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
191+
; GFX1250-NEXT: s_wait_kmcnt 0x0
192+
; GFX1250-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
193+
; GFX1250-NEXT: s_wait_dscnt 0x0
194+
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
140195
%ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
141196
ret <2 x half> %ret
142197
}
@@ -161,6 +216,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr,
161216
; GFX12-NEXT: s_wait_dscnt 0x0
162217
; GFX12-NEXT: global_inv scope:SCOPE_SE
163218
; GFX12-NEXT: s_endpgm
219+
;
220+
; GFX1250-LABEL: local_atomic_fadd_v2bf16_noret:
221+
; GFX1250: ; %bb.0:
222+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
223+
; GFX1250-NEXT: s_wait_kmcnt 0x0
224+
; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
225+
; GFX1250-NEXT: ds_pk_add_bf16 v0, v1
226+
; GFX1250-NEXT: s_wait_dscnt 0x0
227+
; GFX1250-NEXT: s_endpgm
164228
%ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
165229
ret void
166230
}
@@ -186,6 +250,14 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
186250
; GFX12-NEXT: global_inv scope:SCOPE_SE
187251
; GFX12-NEXT: s_wait_loadcnt 0x0
188252
; GFX12-NEXT: s_setpc_b64 s[30:31]
253+
;
254+
; GFX1250-LABEL: local_atomic_fadd_v2bf16_rtn:
255+
; GFX1250: ; %bb.0:
256+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
257+
; GFX1250-NEXT: s_wait_kmcnt 0x0
258+
; GFX1250-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
259+
; GFX1250-NEXT: s_wait_dscnt 0x0
260+
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
189261
%ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
190262
ret <2 x i16> %ret
191263
}

0 commit comments

Comments
 (0)