Skip to content

Commit ef23b55

Browse files
committed
Fix fallout from rebase.
1 parent f0b52a5 commit ef23b55

File tree

6 files changed

+398
-391
lines changed

6 files changed

+398
-391
lines changed

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,9 +1279,9 @@ defm : DSAtomicRetNoRetPat_NoM0_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "
12791279

12801280
let SubtargetPredicate = isGFX12Plus in {
12811281

1282-
defm : DSAtomicRetNoRetPat_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "atomic_load_usub_cond">;
1282+
defm : DSAtomicRetNoRetPat_NoM0_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "atomic_load_usub_cond">;
12831283

1284-
defm : DSAtomicRetNoRetPat_mc<DS_SUB_CLAMP_RTN_U32, DS_SUB_CLAMP_U32, i32, "atomic_load_usub_sat">;
1284+
defm : DSAtomicRetNoRetPat_NoM0_mc<DS_SUB_CLAMP_RTN_U32, DS_SUB_CLAMP_U32, i32, "atomic_load_usub_sat">;
12851285

12861286
} // let SubtargetPredicate = isGFX12Plus
12871287

llvm/test/CodeGen/AMDGPU/atomicrmw_usub_cond.ll

Lines changed: 51 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,11 @@ define amdgpu_kernel void @flat_atomic_usub_cond_no_rtn_u32(ptr %addr, i32 %in)
4040
; GFX12-SDAG: ; %bb.0: ; %entry
4141
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
4242
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
43-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
43+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
44+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
4445
; GFX12-SDAG-NEXT: global_wb scope:SCOPE_SYS
4546
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
46-
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16 scope:SCOPE_SYS
47+
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16 scope:SCOPE_SYS
4748
; GFX12-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
4849
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_SYS
4950
; GFX12-SDAG-NEXT: s_endpgm
@@ -80,10 +81,14 @@ define amdgpu_kernel void @flat_atomic_usub_cond_no_rtn_u32(ptr %addr, i32 %in)
8081
; GFX12-GISEL: ; %bb.0: ; %entry
8182
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
8283
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
83-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
84+
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
85+
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
86+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
87+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
88+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
8489
; GFX12-GISEL-NEXT: global_wb scope:SCOPE_SYS
8590
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
86-
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16 scope:SCOPE_SYS
91+
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 scope:SCOPE_SYS
8792
; GFX12-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
8893
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_SYS
8994
; GFX12-GISEL-NEXT: s_endpgm
@@ -129,10 +134,11 @@ define amdgpu_kernel void @flat_atomic_usub_cond_no_rtn_u32_forced(ptr %addr, i3
129134
; GFX12-SDAG: ; %bb.0: ; %entry
130135
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
131136
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
132-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
137+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
138+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
133139
; GFX12-SDAG-NEXT: global_wb scope:SCOPE_SYS
134140
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
135-
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16 scope:SCOPE_SYS
141+
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16 scope:SCOPE_SYS
136142
; GFX12-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
137143
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_SYS
138144
; GFX12-SDAG-NEXT: s_endpgm
@@ -169,10 +175,14 @@ define amdgpu_kernel void @flat_atomic_usub_cond_no_rtn_u32_forced(ptr %addr, i3
169175
; GFX12-GISEL: ; %bb.0: ; %entry
170176
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
171177
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
172-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
178+
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
179+
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
180+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
181+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
182+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
173183
; GFX12-GISEL-NEXT: global_wb scope:SCOPE_SYS
174184
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
175-
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16 scope:SCOPE_SYS
185+
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 scope:SCOPE_SYS
176186
; GFX12-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
177187
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_SYS
178188
; GFX12-GISEL-NEXT: s_endpgm
@@ -217,14 +227,15 @@ define amdgpu_kernel void @flat_atomic_usub_cond_rtn_u32(ptr %addr, i32 %in, ptr
217227
;
218228
; GFX12-SDAG-LABEL: flat_atomic_usub_cond_rtn_u32:
219229
; GFX12-SDAG: ; %bb.0: ; %entry
230+
; GFX12-SDAG-NEXT: s_clause 0x1
220231
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
221-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
222232
; GFX12-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
223233
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
224-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s2
234+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
235+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
225236
; GFX12-SDAG-NEXT: global_wb scope:SCOPE_SYS
226237
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
227-
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
238+
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
228239
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
229240
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_SYS
230241
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
@@ -234,33 +245,39 @@ define amdgpu_kernel void @flat_atomic_usub_cond_rtn_u32(ptr %addr, i32 %in, ptr
234245
; GFX9-GISEL-LABEL: flat_atomic_usub_cond_rtn_u32:
235246
; GFX9-GISEL: ; %bb.0: ; %entry
236247
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
237-
; GFX9-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
248+
; GFX9-GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
238249
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
239-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
240-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
241-
; GFX9-GISEL-NEXT: flat_load_dword v2, v[0:1] offset:16
242-
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], 0
250+
; GFX9-GISEL-NEXT: s_add_u32 s2, s0, 16
251+
; GFX9-GISEL-NEXT: s_addc_u32 s3, s1, 0
252+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
253+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
254+
; GFX9-GISEL-NEXT: flat_load_dword v0, v[0:1]
255+
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], 0
243256
; GFX9-GISEL-NEXT: .LBB2_1: ; %atomicrmw.start
244257
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
245258
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
246-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, v2
247-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, s2, v3
248-
; GFX9-GISEL-NEXT: v_cmp_le_u32_e32 vcc, s2, v3
249-
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
250-
; GFX9-GISEL-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc
259+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v0
260+
; GFX9-GISEL-NEXT: s_add_u32 s8, s0, 16
261+
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, s6, v1
262+
; GFX9-GISEL-NEXT: s_addc_u32 s9, s1, 0
263+
; GFX9-GISEL-NEXT: v_cmp_le_u32_e32 vcc, s6, v1
264+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s8
265+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
266+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s9
267+
; GFX9-GISEL-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc
251268
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
252269
; GFX9-GISEL-NEXT: buffer_wbinvl1_vol
253-
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
254-
; GFX9-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
255-
; GFX9-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
270+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
271+
; GFX9-GISEL-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
272+
; GFX9-GISEL-NEXT: s_andn2_b64 exec, exec, s[2:3]
256273
; GFX9-GISEL-NEXT: s_cbranch_execnz .LBB2_1
257274
; GFX9-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
258-
; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
275+
; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
259276
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
260277
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
261-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
262-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
263-
; GFX9-GISEL-NEXT: flat_store_dword v[0:1], v2
278+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s1
279+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0
280+
; GFX9-GISEL-NEXT: flat_store_dword v[1:2], v0
264281
; GFX9-GISEL-NEXT: s_endpgm
265282
;
266283
; GFX12-GISEL-LABEL: flat_atomic_usub_cond_rtn_u32:
@@ -269,10 +286,14 @@ define amdgpu_kernel void @flat_atomic_usub_cond_rtn_u32(ptr %addr, i32 %in, ptr
269286
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
270287
; GFX12-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
271288
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
272-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
289+
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 16
290+
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
291+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
292+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
293+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
273294
; GFX12-GISEL-NEXT: global_wb scope:SCOPE_SYS
274295
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
275-
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v2, v1, v0, s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
296+
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
276297
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
277298
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_SYS
278299
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5

0 commit comments

Comments
 (0)