@@ -1812,26 +1812,26 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18121812; GFX12-NEXT: s_wait_samplecnt 0x0
18131813; GFX12-NEXT: s_wait_bvhcnt 0x0
18141814; GFX12-NEXT: s_wait_kmcnt 0x0
1815- ; GFX12-NEXT: v_mov_b32_e32 v6 , s16
1816- ; GFX12-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1
1815+ ; GFX12-NEXT: v_mov_b32_e32 v8 , s16
1816+ ; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[0:1], v[0:1]
18171817; GFX12-NEXT: s_mov_b32 s4, 0
1818- ; GFX12-NEXT: buffer_load_b64 v[0:1], v6, s[0:3], null offen
1819- ; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
1818+ ; GFX12-NEXT: buffer_load_b64 v[4:5], v8, s[0:3], null offen
18201819; GFX12-NEXT: .LBB14_1: ; %atomicrmw.start
18211820; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
18221821; GFX12-NEXT: s_wait_loadcnt 0x0
1823- ; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1822+ ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[4:5]
18241823; GFX12-NEXT: s_wait_storecnt 0x0
18251824; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1826- ; GFX12-NEXT: v_max_num_f64_e32 v[0:1 ], v[9:10 ], v[9:10 ]
1827- ; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
1828- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 )
1829- ; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1830- ; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
1831- ; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6 , s[0:3], null offen th:TH_ATOMIC_RETURN
1825+ ; GFX12-NEXT: v_max_num_f64_e32 v[2:3 ], v[0:1 ], v[6:7 ]
1826+ ; GFX12-NEXT: v_mov_b32_e32 v0, v2
1827+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 )
1828+ ; GFX12-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_mov_b32 v2, v4
1829+ ; GFX12-NEXT: v_mov_b32_e32 v3, v5
1830+ ; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v8 , s[0:3], null offen th:TH_ATOMIC_RETURN
18321831; GFX12-NEXT: s_wait_loadcnt 0x0
18331832; GFX12-NEXT: global_inv scope:SCOPE_DEV
1834- ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
1833+ ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
1834+ ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0
18351835; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4
18361836; GFX12-NEXT: s_wait_alu 0xfffe
18371837; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
@@ -1854,27 +1854,27 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18541854; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
18551855; GFX11: ; %bb.0:
18561856; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1857- ; GFX11-NEXT: v_mov_b32_e32 v6 , s16
1858- ; GFX11-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1
1857+ ; GFX11-NEXT: v_mov_b32_e32 v8 , s16
1858+ ; GFX11-NEXT: v_max_f64 v[6:7], v[0:1], v[0:1]
18591859; GFX11-NEXT: s_mov_b32 s4, 0
1860- ; GFX11-NEXT: buffer_load_b64 v[0:1], v6, s[0:3], 0 offen
1861- ; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
1860+ ; GFX11-NEXT: buffer_load_b64 v[4:5], v8, s[0:3], 0 offen
18621861; GFX11-NEXT: .LBB14_1: ; %atomicrmw.start
18631862; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
18641863; GFX11-NEXT: s_waitcnt vmcnt(0)
1865- ; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1864+ ; GFX11-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
18661865; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18671866; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1868- ; GFX11-NEXT: v_max_f64 v[0:1 ], v[9:10 ], v[9:10 ]
1869- ; GFX11-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
1870- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
1871- ; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1872- ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
1873- ; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6 , s[0:3], 0 offen glc
1867+ ; GFX11-NEXT: v_max_f64 v[2:3 ], v[0:1 ], v[6:7 ]
1868+ ; GFX11-NEXT: v_mov_b32_e32 v0, v2
1869+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
1870+ ; GFX11-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_mov_b32 v2, v4
1871+ ; GFX11-NEXT: v_mov_b32_e32 v3, v5
1872+ ; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v8 , s[0:3], 0 offen glc
18741873; GFX11-NEXT: s_waitcnt vmcnt(0)
18751874; GFX11-NEXT: buffer_gl1_inv
18761875; GFX11-NEXT: buffer_gl0_inv
1877- ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
1876+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
1877+ ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0
18781878; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
18791879; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18801880; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
@@ -1906,28 +1906,26 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19061906; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
19071907; GFX908: ; %bb.0:
19081908; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909- ; GFX908-NEXT: v_mov_b32_e32 v6, s20
1910- ; GFX908-NEXT: v_mov_b32_e32 v2, v0
1911- ; GFX908-NEXT: v_mov_b32_e32 v3, v1
1912- ; GFX908-NEXT: buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen
1913- ; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
1909+ ; GFX908-NEXT: v_mov_b32_e32 v8, s20
1910+ ; GFX908-NEXT: buffer_load_dwordx2 v[4:5], v8, s[16:19], 0 offen
1911+ ; GFX908-NEXT: v_max_f64 v[6:7], v[0:1], v[0:1]
19141912; GFX908-NEXT: s_mov_b64 s[4:5], 0
19151913; GFX908-NEXT: .LBB14_1: ; %atomicrmw.start
19161914; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
19171915; GFX908-NEXT: s_waitcnt vmcnt(0)
1918- ; GFX908-NEXT: v_mov_b32_e32 v10, v1
1919- ; GFX908-NEXT: v_mov_b32_e32 v9, v0
1920- ; GFX908-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1921- ; GFX908-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
1922- ; GFX908-NEXT: v_mov_b32_e32 v0, v7
1923- ; GFX908-NEXT: v_mov_b32_e32 v1, v8
1924- ; GFX908-NEXT: v_mov_b32_e32 v2, v9
1925- ; GFX908-NEXT: v_mov_b32_e32 v3, v10
1926- ; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
1916+ ; GFX908-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
1917+ ; GFX908-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7]
1918+ ; GFX908-NEXT: v_mov_b32_e32 v0, v2
1919+ ; GFX908-NEXT: v_mov_b32_e32 v1, v3
1920+ ; GFX908-NEXT: v_mov_b32_e32 v2, v4
1921+ ; GFX908-NEXT: v_mov_b32_e32 v3, v5
1922+ ; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v8, s[16:19], 0 offen glc
19271923; GFX908-NEXT: s_waitcnt vmcnt(0)
19281924; GFX908-NEXT: buffer_wbinvl1
1929- ; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
1925+ ; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5]
1926+ ; GFX908-NEXT: v_mov_b32_e32 v5, v1
19301927; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1928+ ; GFX908-NEXT: v_mov_b32_e32 v4, v0
19311929; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
19321930; GFX908-NEXT: s_cbranch_execnz .LBB14_1
19331931; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1937,28 +1935,26 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19371935; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
19381936; GFX8: ; %bb.0:
19391937; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1940- ; GFX8-NEXT: v_mov_b32_e32 v6, s20
1941- ; GFX8-NEXT: v_mov_b32_e32 v2, v0
1942- ; GFX8-NEXT: v_mov_b32_e32 v3, v1
1943- ; GFX8-NEXT: buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen
1944- ; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
1938+ ; GFX8-NEXT: v_mov_b32_e32 v8, s20
1939+ ; GFX8-NEXT: buffer_load_dwordx2 v[4:5], v8, s[16:19], 0 offen
1940+ ; GFX8-NEXT: v_max_f64 v[6:7], v[0:1], v[0:1]
19451941; GFX8-NEXT: s_mov_b64 s[4:5], 0
19461942; GFX8-NEXT: .LBB14_1: ; %atomicrmw.start
19471943; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
19481944; GFX8-NEXT: s_waitcnt vmcnt(0)
1949- ; GFX8-NEXT: v_mov_b32_e32 v10, v1
1950- ; GFX8-NEXT: v_mov_b32_e32 v9, v0
1951- ; GFX8-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1952- ; GFX8-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
1953- ; GFX8-NEXT: v_mov_b32_e32 v0, v7
1954- ; GFX8-NEXT: v_mov_b32_e32 v1, v8
1955- ; GFX8-NEXT: v_mov_b32_e32 v2, v9
1956- ; GFX8-NEXT: v_mov_b32_e32 v3, v10
1957- ; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
1945+ ; GFX8-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
1946+ ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7]
1947+ ; GFX8-NEXT: v_mov_b32_e32 v0, v2
1948+ ; GFX8-NEXT: v_mov_b32_e32 v1, v3
1949+ ; GFX8-NEXT: v_mov_b32_e32 v2, v4
1950+ ; GFX8-NEXT: v_mov_b32_e32 v3, v5
1951+ ; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v8, s[16:19], 0 offen glc
19581952; GFX8-NEXT: s_waitcnt vmcnt(0)
19591953; GFX8-NEXT: buffer_wbinvl1
1960- ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
1954+ ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5]
1955+ ; GFX8-NEXT: v_mov_b32_e32 v5, v1
19611956; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1957+ ; GFX8-NEXT: v_mov_b32_e32 v4, v0
19621958; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
19631959; GFX8-NEXT: s_cbranch_execnz .LBB14_1
19641960; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
0 commit comments