1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s -check-prefix=GFX942
3
3
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
4
+ ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefix=GFX1250
4
5
5
6
declare <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 , i32 , i1 )
6
7
declare <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
@@ -30,6 +31,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
30
31
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
31
32
; GFX12-NEXT: global_inv scope:SCOPE_SYS
32
33
; GFX12-NEXT: s_endpgm
34
+ ;
35
+ ; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat:
36
+ ; GFX1250: ; %bb.0:
37
+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
38
+ ; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
39
+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
40
+ ; GFX1250-NEXT: s_wait_storecnt 0x0
41
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
42
+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
43
+ ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
44
+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
45
+ ; GFX1250-NEXT: s_endpgm
33
46
%ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
34
47
ret void
35
48
}
@@ -59,6 +72,18 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
59
72
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
60
73
; GFX12-NEXT: global_inv scope:SCOPE_SYS
61
74
; GFX12-NEXT: s_endpgm
75
+ ;
76
+ ; GFX1250-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
77
+ ; GFX1250: ; %bb.0:
78
+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
79
+ ; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
80
+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
81
+ ; GFX1250-NEXT: s_wait_storecnt 0x0
82
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
83
+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
84
+ ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
85
+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
86
+ ; GFX1250-NEXT: s_endpgm
62
87
%ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
63
88
ret void
64
89
}
@@ -88,6 +113,19 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
88
113
; GFX12-NEXT: global_inv scope:SCOPE_SYS
89
114
; GFX12-NEXT: s_wait_loadcnt 0x0
90
115
; GFX12-NEXT: s_setpc_b64 s[30:31]
116
+ ;
117
+ ; GFX1250-LABEL: flat_atomic_fadd_f32_rtn_pat:
118
+ ; GFX1250: ; %bb.0:
119
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
120
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
121
+ ; GFX1250-NEXT: v_mov_b32_e32 v2, 4.0
122
+ ; GFX1250-NEXT: global_wb scope:SCOPE_SYS
123
+ ; GFX1250-NEXT: s_wait_storecnt 0x0
124
+ ; GFX1250-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
125
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
126
+ ; GFX1250-NEXT: global_inv scope:SCOPE_SYS
127
+ ; GFX1250-NEXT: s_wait_loadcnt 0x0
128
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
91
129
%ret = atomicrmw fadd ptr %ptr , float 4 .0 seq_cst , !amdgpu.no.remote.memory !0
92
130
ret float %ret
93
131
}
@@ -112,6 +150,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr,
112
150
; GFX12-NEXT: s_wait_dscnt 0x0
113
151
; GFX12-NEXT: global_inv scope:SCOPE_SE
114
152
; GFX12-NEXT: s_endpgm
153
+ ;
154
+ ; GFX1250-LABEL: local_atomic_fadd_v2f16_noret:
155
+ ; GFX1250: ; %bb.0:
156
+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
157
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
158
+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
159
+ ; GFX1250-NEXT: ds_pk_add_f16 v0, v1
160
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
161
+ ; GFX1250-NEXT: s_endpgm
115
162
%ret = call <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 0 , i32 0 , i1 0 )
116
163
ret void
117
164
}
@@ -137,6 +184,14 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
137
184
; GFX12-NEXT: global_inv scope:SCOPE_SE
138
185
; GFX12-NEXT: s_wait_loadcnt 0x0
139
186
; GFX12-NEXT: s_setpc_b64 s[30:31]
187
+ ;
188
+ ; GFX1250-LABEL: local_atomic_fadd_v2f16_rtn:
189
+ ; GFX1250: ; %bb.0:
190
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
191
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
192
+ ; GFX1250-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
193
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
194
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
140
195
%ret = call <2 x half > @llvm.amdgcn.ds.fadd.v2f16 (ptr addrspace (3 ) %ptr , <2 x half > %data , i32 0 , i32 0 , i1 0 )
141
196
ret <2 x half > %ret
142
197
}
@@ -161,6 +216,15 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr,
161
216
; GFX12-NEXT: s_wait_dscnt 0x0
162
217
; GFX12-NEXT: global_inv scope:SCOPE_SE
163
218
; GFX12-NEXT: s_endpgm
219
+ ;
220
+ ; GFX1250-LABEL: local_atomic_fadd_v2bf16_noret:
221
+ ; GFX1250: ; %bb.0:
222
+ ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
223
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
224
+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
225
+ ; GFX1250-NEXT: ds_pk_add_bf16 v0, v1
226
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
227
+ ; GFX1250-NEXT: s_endpgm
164
228
%ret = call <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
165
229
ret void
166
230
}
@@ -186,6 +250,14 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
186
250
; GFX12-NEXT: global_inv scope:SCOPE_SE
187
251
; GFX12-NEXT: s_wait_loadcnt 0x0
188
252
; GFX12-NEXT: s_setpc_b64 s[30:31]
253
+ ;
254
+ ; GFX1250-LABEL: local_atomic_fadd_v2bf16_rtn:
255
+ ; GFX1250: ; %bb.0:
256
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
257
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
258
+ ; GFX1250-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
259
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
260
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
189
261
%ret = call <2 x i16 > @llvm.amdgcn.ds.fadd.v2bf16 (ptr addrspace (3 ) %ptr , <2 x i16 > %data )
190
262
ret <2 x i16 > %ret
191
263
}
0 commit comments