@@ -1528,9 +1528,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15281528; GFX942-NEXT:    buffer_wbl2 sc1 
15291529; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 
15301530; GFX942-NEXT:    s_waitcnt vmcnt(0) 
1531+ ; GFX942-NEXT:    buffer_inv sc1 
15311532; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5 
15321533; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1533- ; GFX942-NEXT:    buffer_inv sc1 
15341534; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
15351535; GFX942-NEXT:    s_cbranch_execnz .LBB12_1 
15361536; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1576,9 +1576,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15761576; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1] 
15771577; GFX90A-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 
15781578; GFX90A-NEXT:    s_waitcnt vmcnt(0) 
1579+ ; GFX90A-NEXT:    buffer_wbinvl1 
15791580; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5 
15801581; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1581- ; GFX90A-NEXT:    buffer_wbinvl1 
15821582; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
15831583; GFX90A-NEXT:    s_cbranch_execnz .LBB12_1 
15841584; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1603,9 +1603,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16031603; GFX908-NEXT:    v_mov_b32_e32 v1, v5 
16041604; GFX908-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 
16051605; GFX908-NEXT:    s_waitcnt vmcnt(0) 
1606+ ; GFX908-NEXT:    buffer_wbinvl1 
16061607; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5 
16071608; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1608- ; GFX908-NEXT:    buffer_wbinvl1 
16091609; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
16101610; GFX908-NEXT:    s_cbranch_execnz .LBB12_1 
16111611; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1630,9 +1630,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16301630; GFX8-NEXT:    v_mov_b32_e32 v1, v5 
16311631; GFX8-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 
16321632; GFX8-NEXT:    s_waitcnt vmcnt(0) 
1633+ ; GFX8-NEXT:    buffer_wbinvl1 
16331634; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5 
16341635; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1635- ; GFX8-NEXT:    buffer_wbinvl1 
16361636; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
16371637; GFX8-NEXT:    s_cbranch_execnz .LBB12_1 
16381638; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1683,10 +1683,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16831683; GFX942-NEXT:    buffer_wbl2 sc1 
16841684; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 
16851685; GFX942-NEXT:    s_waitcnt vmcnt(0) 
1686+ ; GFX942-NEXT:    buffer_inv sc1 
16861687; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1 
1687- ; GFX942-NEXT:    v_mov_b32_e32 v1, v4 
16881688; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1689- ; GFX942-NEXT:    buffer_inv sc1  
1689+ ; GFX942-NEXT:    v_mov_b32_e32 v1, v4  
16901690; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
16911691; GFX942-NEXT:    s_cbranch_execnz .LBB13_1 
16921692; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1730,10 +1730,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17301730; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] 
17311731; GFX90A-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 
17321732; GFX90A-NEXT:    s_waitcnt vmcnt(0) 
1733+ ; GFX90A-NEXT:    buffer_wbinvl1 
17331734; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1 
1734- ; GFX90A-NEXT:    v_mov_b32_e32 v1, v4 
17351735; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1736- ; GFX90A-NEXT:    buffer_wbinvl1  
1736+ ; GFX90A-NEXT:    v_mov_b32_e32 v1, v4  
17371737; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
17381738; GFX90A-NEXT:    s_cbranch_execnz .LBB13_1 
17391739; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1756,10 +1756,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17561756; GFX908-NEXT:    v_mov_b32_e32 v4, v0 
17571757; GFX908-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 
17581758; GFX908-NEXT:    s_waitcnt vmcnt(0) 
1759+ ; GFX908-NEXT:    buffer_wbinvl1 
17591760; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1 
1760- ; GFX908-NEXT:    v_mov_b32_e32 v1, v4 
17611761; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1762- ; GFX908-NEXT:    buffer_wbinvl1  
1762+ ; GFX908-NEXT:    v_mov_b32_e32 v1, v4  
17631763; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
17641764; GFX908-NEXT:    s_cbranch_execnz .LBB13_1 
17651765; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1782,10 +1782,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17821782; GFX8-NEXT:    v_mov_b32_e32 v4, v0 
17831783; GFX8-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 
17841784; GFX8-NEXT:    s_waitcnt vmcnt(0) 
1785+ ; GFX8-NEXT:    buffer_wbinvl1 
17851786; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1 
1786- ; GFX8-NEXT:    v_mov_b32_e32 v1, v4 
17871787; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1788- ; GFX8-NEXT:    buffer_wbinvl1  
1788+ ; GFX8-NEXT:    v_mov_b32_e32 v1, v4  
17891789; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
17901790; GFX8-NEXT:    s_cbranch_execnz .LBB13_1 
17911791; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1830,9 +1830,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18301830; GFX12-NEXT:    v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 
18311831; GFX12-NEXT:    buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN 
18321832; GFX12-NEXT:    s_wait_loadcnt 0x0 
1833+ ; GFX12-NEXT:    global_inv scope:SCOPE_DEV 
18331834; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10] 
18341835; GFX12-NEXT:    s_or_b32 s4, vcc_lo, s4 
1835- ; GFX12-NEXT:    global_inv scope:SCOPE_DEV 
18361836; GFX12-NEXT:    s_wait_alu 0xfffe 
18371837; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4 
18381838; GFX12-NEXT:    s_cbranch_execnz .LBB14_1 
@@ -1872,10 +1872,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18721872; GFX11-NEXT:    v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 
18731873; GFX11-NEXT:    buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc 
18741874; GFX11-NEXT:    s_waitcnt vmcnt(0) 
1875- ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10] 
1876- ; GFX11-NEXT:    s_or_b32 s4, vcc_lo, s4 
18771875; GFX11-NEXT:    buffer_gl1_inv 
18781876; GFX11-NEXT:    buffer_gl0_inv 
1877+ ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10] 
1878+ ; GFX11-NEXT:    s_or_b32 s4, vcc_lo, s4 
1879+ ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
18791880; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4 
18801881; GFX11-NEXT:    s_cbranch_execnz .LBB14_1 
18811882; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1924,9 +1925,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19241925; GFX908-NEXT:    v_mov_b32_e32 v3, v10 
19251926; GFX908-NEXT:    buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc 
19261927; GFX908-NEXT:    s_waitcnt vmcnt(0) 
1928+ ; GFX908-NEXT:    buffer_wbinvl1 
19271929; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10] 
19281930; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1929- ; GFX908-NEXT:    buffer_wbinvl1 
19301931; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
19311932; GFX908-NEXT:    s_cbranch_execnz .LBB14_1 
19321933; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1955,9 +1956,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19551956; GFX8-NEXT:    v_mov_b32_e32 v3, v10 
19561957; GFX8-NEXT:    buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc 
19571958; GFX8-NEXT:    s_waitcnt vmcnt(0) 
1959+ ; GFX8-NEXT:    buffer_wbinvl1 
19581960; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10] 
19591961; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
1960- ; GFX8-NEXT:    buffer_wbinvl1 
19611962; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
19621963; GFX8-NEXT:    s_cbranch_execnz .LBB14_1 
19631964; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -1999,10 +2000,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19992000; GFX12-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0 
20002001; GFX12-NEXT:    buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN 
20012002; GFX12-NEXT:    s_wait_loadcnt 0x0 
2003+ ; GFX12-NEXT:    global_inv scope:SCOPE_DEV 
20022004; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3] 
20032005; GFX12-NEXT:    v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8 
20042006; GFX12-NEXT:    s_or_b32 s4, vcc_lo, s4 
2005- ; GFX12-NEXT:    global_inv scope:SCOPE_DEV 
20062007; GFX12-NEXT:    s_wait_alu 0xfffe 
20072008; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4 
20082009; GFX12-NEXT:    s_cbranch_execnz .LBB15_1 
@@ -2039,11 +2040,12 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20392040; GFX11-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0 
20402041; GFX11-NEXT:    buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc 
20412042; GFX11-NEXT:    s_waitcnt vmcnt(0) 
2043+ ; GFX11-NEXT:    buffer_gl1_inv 
2044+ ; GFX11-NEXT:    buffer_gl0_inv 
20422045; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3] 
20432046; GFX11-NEXT:    v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8 
20442047; GFX11-NEXT:    s_or_b32 s4, vcc_lo, s4 
2045- ; GFX11-NEXT:    buffer_gl1_inv 
2046- ; GFX11-NEXT:    buffer_gl0_inv 
2048+ ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
20472049; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4 
20482050; GFX11-NEXT:    s_cbranch_execnz .LBB15_1 
20492051; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -2088,11 +2090,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20882090; GFX908-NEXT:    v_mov_b32_e32 v7, v0 
20892091; GFX908-NEXT:    buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc 
20902092; GFX908-NEXT:    s_waitcnt vmcnt(0) 
2093+ ; GFX908-NEXT:    buffer_wbinvl1 
20912094; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3] 
20922095; GFX908-NEXT:    v_mov_b32_e32 v2, v7 
2093- ; GFX908-NEXT:    v_mov_b32_e32 v3, v8 
20942096; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
2095- ; GFX908-NEXT:    buffer_wbinvl1  
2097+ ; GFX908-NEXT:    v_mov_b32_e32 v3, v8  
20962098; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
20972099; GFX908-NEXT:    s_cbranch_execnz .LBB15_1 
20982100; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end 
@@ -2117,11 +2119,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
21172119; GFX8-NEXT:    v_mov_b32_e32 v7, v0 
21182120; GFX8-NEXT:    buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc 
21192121; GFX8-NEXT:    s_waitcnt vmcnt(0) 
2122+ ; GFX8-NEXT:    buffer_wbinvl1 
21202123; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3] 
21212124; GFX8-NEXT:    v_mov_b32_e32 v2, v7 
2122- ; GFX8-NEXT:    v_mov_b32_e32 v3, v8 
21232125; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] 
2124- ; GFX8-NEXT:    buffer_wbinvl1  
2126+ ; GFX8-NEXT:    v_mov_b32_e32 v3, v8  
21252127; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5] 
21262128; GFX8-NEXT:    s_cbranch_execnz .LBB15_1 
21272129; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end 
0 commit comments