@@ -7,9 +7,7 @@ declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>,
77declare <2 x half > @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16 (<2 x half >, <4 x i32 >, i32 , i32 , i32 )
88declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16 (<2 x bfloat> %val , <4 x i32 > %rsrc , i32 , i32 , i32 )
99declare <2 x half > @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16 (ptr addrspace (1 ) %ptr , <2 x half > %data )
10- declare <2 x i16 > @llvm.amdgcn.global.atomic.fadd.v2bf16.p1 (ptr addrspace (1 ) %ptr , <2 x i16 > %data )
1110declare <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %ptr , <2 x half > %data )
12- declare <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0 (ptr %ptr , <2 x i16 > %data )
1311
1412define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret (ptr %ptr , <2 x half > %data ) {
1513; GFX12-SDAG-LABEL: flat_atomic_fadd_v2f16_noret:
@@ -59,104 +57,6 @@ define <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
5957 ret <2 x half > %ret
6058}
6159
62- define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret (ptr %ptr , <2 x i16 > %data ) {
63- ; GFX12-SDAG-LABEL: flat_atomic_fadd_v2bf16_noret:
64- ; GFX12-SDAG: ; %bb.0:
65- ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
66- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
67- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
68- ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
69- ; GFX12-SDAG-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
70- ; GFX12-SDAG-NEXT: s_endpgm
71- ;
72- ; GFX12-GISEL-LABEL: flat_atomic_fadd_v2bf16_noret:
73- ; GFX12-GISEL: ; %bb.0:
74- ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
75- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
76- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
77- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
78- ; GFX12-GISEL-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
79- ; GFX12-GISEL-NEXT: s_endpgm
80- %ret = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0 (ptr %ptr , <2 x i16 > %data )
81- ret void
82- }
83-
84- define <2 x i16 > @flat_atomic_fadd_v2bf16_rtn (ptr %ptr , <2 x i16 > %data ) {
85- ; GFX12-SDAG-LABEL: flat_atomic_fadd_v2bf16_rtn:
86- ; GFX12-SDAG: ; %bb.0:
87- ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
88- ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
89- ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
90- ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
91- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
92- ; GFX12-SDAG-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
93- ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
94- ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
95- ;
96- ; GFX12-GISEL-LABEL: flat_atomic_fadd_v2bf16_rtn:
97- ; GFX12-GISEL: ; %bb.0:
98- ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
99- ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
100- ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
101- ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
102- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
103- ; GFX12-GISEL-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
104- ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
105- ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
106- %ret = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0 (ptr %ptr , <2 x i16 > %data )
107- ret <2 x i16 > %ret
108- }
109-
110- define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret (ptr addrspace (1 ) %ptr , <2 x i16 > %data ) {
111- ; GFX12-SDAG-LABEL: global_atomic_fadd_v2bf16_noret:
112- ; GFX12-SDAG: ; %bb.0:
113- ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
114- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
115- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
116- ; GFX12-SDAG-NEXT: global_atomic_pk_add_bf16 v0, v1, s[0:1]
117- ; GFX12-SDAG-NEXT: s_nop 0
118- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
119- ; GFX12-SDAG-NEXT: s_endpgm
120- ;
121- ; GFX12-GISEL-LABEL: global_atomic_fadd_v2bf16_noret:
122- ; GFX12-GISEL: ; %bb.0:
123- ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
124- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
125- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
126- ; GFX12-GISEL-NEXT: global_atomic_pk_add_bf16 v1, v0, s[0:1]
127- ; GFX12-GISEL-NEXT: s_nop 0
128- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
129- ; GFX12-GISEL-NEXT: s_endpgm
130- %ret = call <2 x i16 > @llvm.amdgcn.global.atomic.fadd.v2bf16.p1 (ptr addrspace (1 ) %ptr , <2 x i16 > %data )
131- ret void
132- }
133-
134- define <2 x i16 > @global_atomic_fadd_v2bf16_rtn (ptr addrspace (1 ) %ptr , <2 x i16 > %data ) {
135- ; GFX12-SDAG-LABEL: global_atomic_fadd_v2bf16_rtn:
136- ; GFX12-SDAG: ; %bb.0:
137- ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
138- ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
139- ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
140- ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
141- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
142- ; GFX12-SDAG-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
143- ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
144- ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
145- ;
146- ; GFX12-GISEL-LABEL: global_atomic_fadd_v2bf16_rtn:
147- ; GFX12-GISEL: ; %bb.0:
148- ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
149- ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
150- ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
151- ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
152- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
153- ; GFX12-GISEL-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
154- ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
155- ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
156- %ret = call <2 x i16 > @llvm.amdgcn.global.atomic.fadd.v2bf16.p1 (ptr addrspace (1 ) %ptr , <2 x i16 > %data )
157- ret <2 x i16 > %ret
158- }
159-
16060define void @global_atomic_pk_add_v2f16 (ptr addrspace (1 ) %ptr , <2 x half > %data ) {
16161; GFX12-SDAG-LABEL: global_atomic_pk_add_v2f16:
16262; GFX12-SDAG: ; %bb.0: ; %main_body
0 commit comments