@@ -1528,9 +1528,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15281528; GFX942-NEXT: buffer_wbl2 sc1
15291529; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
15301530; GFX942-NEXT: s_waitcnt vmcnt(0)
1531- ; GFX942-NEXT: buffer_inv sc1
15321531; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
15331532; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1533+ ; GFX942-NEXT: buffer_inv sc1
15341534; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5]
15351535; GFX942-NEXT: s_cbranch_execnz .LBB12_1
15361536; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1576,9 +1576,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15761576; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
15771577; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
15781578; GFX90A-NEXT: s_waitcnt vmcnt(0)
1579- ; GFX90A-NEXT: buffer_wbinvl1
15801579; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
15811580; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1581+ ; GFX90A-NEXT: buffer_wbinvl1
15821582; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
15831583; GFX90A-NEXT: s_cbranch_execnz .LBB12_1
15841584; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1603,9 +1603,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16031603; GFX908-NEXT: v_mov_b32_e32 v1, v5
16041604; GFX908-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
16051605; GFX908-NEXT: s_waitcnt vmcnt(0)
1606- ; GFX908-NEXT: buffer_wbinvl1
16071606; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
16081607; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1608+ ; GFX908-NEXT: buffer_wbinvl1
16091609; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
16101610; GFX908-NEXT: s_cbranch_execnz .LBB12_1
16111611; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1630,9 +1630,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16301630; GFX8-NEXT: v_mov_b32_e32 v1, v5
16311631; GFX8-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
16321632; GFX8-NEXT: s_waitcnt vmcnt(0)
1633- ; GFX8-NEXT: buffer_wbinvl1
16341633; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
16351634; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1635+ ; GFX8-NEXT: buffer_wbinvl1
16361636; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
16371637; GFX8-NEXT: s_cbranch_execnz .LBB12_1
16381638; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1683,10 +1683,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16831683; GFX942-NEXT: buffer_wbl2 sc1
16841684; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
16851685; GFX942-NEXT: s_waitcnt vmcnt(0)
1686- ; GFX942-NEXT: buffer_inv sc1
16871686; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1688- ; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
16891687; GFX942-NEXT: v_mov_b32_e32 v1, v4
1688+ ; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1689+ ; GFX942-NEXT: buffer_inv sc1
16901690; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5]
16911691; GFX942-NEXT: s_cbranch_execnz .LBB13_1
16921692; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1730,10 +1730,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17301730; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1]
17311731; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17321732; GFX90A-NEXT: s_waitcnt vmcnt(0)
1733- ; GFX90A-NEXT: buffer_wbinvl1
17341733; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1735- ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
17361734; GFX90A-NEXT: v_mov_b32_e32 v1, v4
1735+ ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1736+ ; GFX90A-NEXT: buffer_wbinvl1
17371737; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
17381738; GFX90A-NEXT: s_cbranch_execnz .LBB13_1
17391739; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1756,10 +1756,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17561756; GFX908-NEXT: v_mov_b32_e32 v4, v0
17571757; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17581758; GFX908-NEXT: s_waitcnt vmcnt(0)
1759- ; GFX908-NEXT: buffer_wbinvl1
17601759; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1761- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
17621760; GFX908-NEXT: v_mov_b32_e32 v1, v4
1761+ ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1762+ ; GFX908-NEXT: buffer_wbinvl1
17631763; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
17641764; GFX908-NEXT: s_cbranch_execnz .LBB13_1
17651765; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1782,10 +1782,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17821782; GFX8-NEXT: v_mov_b32_e32 v4, v0
17831783; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17841784; GFX8-NEXT: s_waitcnt vmcnt(0)
1785- ; GFX8-NEXT: buffer_wbinvl1
17861785; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1787- ; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
17881786; GFX8-NEXT: v_mov_b32_e32 v1, v4
1787+ ; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1788+ ; GFX8-NEXT: buffer_wbinvl1
17891789; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
17901790; GFX8-NEXT: s_cbranch_execnz .LBB13_1
17911791; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1830,9 +1830,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18301830; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18311831; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
18321832; GFX12-NEXT: s_wait_loadcnt 0x0
1833- ; GFX12-NEXT: global_inv scope:SCOPE_DEV
18341833; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
18351834; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4
1835+ ; GFX12-NEXT: global_inv scope:SCOPE_DEV
18361836; GFX12-NEXT: s_wait_alu 0xfffe
18371837; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
18381838; GFX12-NEXT: s_cbranch_execnz .LBB14_1
@@ -1872,11 +1872,10 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18721872; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18731873; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
18741874; GFX11-NEXT: s_waitcnt vmcnt(0)
1875- ; GFX11-NEXT: buffer_gl1_inv
1876- ; GFX11-NEXT: buffer_gl0_inv
18771875; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
18781876; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
1879- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1877+ ; GFX11-NEXT: buffer_gl1_inv
1878+ ; GFX11-NEXT: buffer_gl0_inv
18801879; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
18811880; GFX11-NEXT: s_cbranch_execnz .LBB14_1
18821881; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1925,9 +1924,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19251924; GFX908-NEXT: v_mov_b32_e32 v3, v10
19261925; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19271926; GFX908-NEXT: s_waitcnt vmcnt(0)
1928- ; GFX908-NEXT: buffer_wbinvl1
19291927; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19301928; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1929+ ; GFX908-NEXT: buffer_wbinvl1
19311930; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
19321931; GFX908-NEXT: s_cbranch_execnz .LBB14_1
19331932; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1956,9 +1955,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19561955; GFX8-NEXT: v_mov_b32_e32 v3, v10
19571956; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19581957; GFX8-NEXT: s_waitcnt vmcnt(0)
1959- ; GFX8-NEXT: buffer_wbinvl1
19601958; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19611959; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1960+ ; GFX8-NEXT: buffer_wbinvl1
19621961; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
19631962; GFX8-NEXT: s_cbranch_execnz .LBB14_1
19641963; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2000,10 +1999,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20001999; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20012000; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20022001; GFX12-NEXT: s_wait_loadcnt 0x0
2003- ; GFX12-NEXT: global_inv scope:SCOPE_DEV
20042002; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
20052003; GFX12-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
20062004; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4
2005+ ; GFX12-NEXT: global_inv scope:SCOPE_DEV
20072006; GFX12-NEXT: s_wait_alu 0xfffe
20082007; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
20092008; GFX12-NEXT: s_cbranch_execnz .LBB15_1
@@ -2040,12 +2039,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20402039; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20412040; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
20422041; GFX11-NEXT: s_waitcnt vmcnt(0)
2043- ; GFX11-NEXT: buffer_gl1_inv
2044- ; GFX11-NEXT: buffer_gl0_inv
20452042; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
20462043; GFX11-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
20472044; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
2048- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2045+ ; GFX11-NEXT: buffer_gl1_inv
2046+ ; GFX11-NEXT: buffer_gl0_inv
20492047; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
20502048; GFX11-NEXT: s_cbranch_execnz .LBB15_1
20512049; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2090,11 +2088,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20902088; GFX908-NEXT: v_mov_b32_e32 v7, v0
20912089; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
20922090; GFX908-NEXT: s_waitcnt vmcnt(0)
2093- ; GFX908-NEXT: buffer_wbinvl1
20942091; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
20952092; GFX908-NEXT: v_mov_b32_e32 v2, v7
2096- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
20972093; GFX908-NEXT: v_mov_b32_e32 v3, v8
2094+ ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2095+ ; GFX908-NEXT: buffer_wbinvl1
20982096; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
20992097; GFX908-NEXT: s_cbranch_execnz .LBB15_1
21002098; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2119,11 +2117,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
21192117; GFX8-NEXT: v_mov_b32_e32 v7, v0
21202118; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
21212119; GFX8-NEXT: s_waitcnt vmcnt(0)
2122- ; GFX8-NEXT: buffer_wbinvl1
21232120; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
21242121; GFX8-NEXT: v_mov_b32_e32 v2, v7
2125- ; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
21262122; GFX8-NEXT: v_mov_b32_e32 v3, v8
2123+ ; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2124+ ; GFX8-NEXT: buffer_wbinvl1
21272125; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
21282126; GFX8-NEXT: s_cbranch_execnz .LBB15_1
21292127; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
0 commit comments