@@ -2356,11 +2356,10 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23562356; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
23572357; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
23582358; GFX6-NEXT: s_mov_b32 s8, 0
2359- ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
2360- ; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23612359; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
2360+ ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
23622361; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
2363- ; GFX6-NEXT: v_cmp_neq_f64_e32 vcc, s[8:9], v[0:1 ]
2362+ ; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9 ]
23642363; GFX6-NEXT: s_mov_b32 s6, 0
23652364; GFX6-NEXT: s_mov_b32 s7, 0xf000
23662365; GFX6-NEXT: s_mov_b32 s4, s6
@@ -2375,46 +2374,43 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23752374; GFX7: ; %bb.0: ; %entry
23762375; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23772376; GFX7-NEXT: s_mov_b32 s4, 0
2378- ; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2379- ; GFX7-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2380- ; GFX7-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23812377; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
2382- ; GFX7-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
2378+ ; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2379+ ; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2380+ ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
23832381; GFX7-NEXT: s_mov_b32 s6, 0
23842382; GFX7-NEXT: s_mov_b32 s7, 0xf000
23852383; GFX7-NEXT: s_mov_b32 s4, s6
23862384; GFX7-NEXT: s_mov_b32 s5, s6
2387- ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2388- ; GFX7-NEXT: v_cndmask_b32_e32 v0 , 0, v6 , vcc
2389- ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
2385+ ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2386+ ; GFX7-NEXT: v_cndmask_b32_e32 v1 , 0, v5 , vcc
2387+ ; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
23902388; GFX7-NEXT: s_waitcnt vmcnt(0)
23912389; GFX7-NEXT: s_setpc_b64 s[30:31]
23922390;
23932391; GFX8-LABEL: safe_math_fract_f64:
23942392; GFX8: ; %bb.0: ; %entry
23952393; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23962394; GFX8-NEXT: s_mov_b32 s4, 0
2397- ; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2398- ; GFX8-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2399- ; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
24002395; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
2401- ; GFX8-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
2402- ; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
2403- ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
2404- ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
2396+ ; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2397+ ; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2398+ ; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2399+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2400+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
2401+ ; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
24052402; GFX8-NEXT: s_waitcnt vmcnt(0)
24062403; GFX8-NEXT: s_setpc_b64 s[30:31]
24072404;
24082405; GFX11-LABEL: safe_math_fract_f64:
24092406; GFX11: ; %bb.0: ; %entry
24102407; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411- ; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2412- ; GFX11-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2413- ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2414- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2415- ; GFX11-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2416- ; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
2417- ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
2408+ ; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2409+ ; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2410+ ; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2411+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
2412+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2413+ ; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
24182414; GFX11-NEXT: s_setpc_b64 s[30:31]
24192415;
24202416; GFX12-LABEL: safe_math_fract_f64:
@@ -2424,14 +2420,13 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
24242420; GFX12-NEXT: s_wait_samplecnt 0x0
24252421; GFX12-NEXT: s_wait_bvhcnt 0x0
24262422; GFX12-NEXT: s_wait_kmcnt 0x0
2427- ; GFX12-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2428- ; GFX12-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2429- ; GFX12-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2430- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2431- ; GFX12-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2432- ; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off
2423+ ; GFX12-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2424+ ; GFX12-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2425+ ; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
24332426; GFX12-NEXT: s_wait_alu 0xfffd
2434- ; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
2427+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
2428+ ; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2429+ ; GFX12-NEXT: global_store_b64 v[2:3], v[6:7], off
24352430; GFX12-NEXT: s_setpc_b64 s[30:31]
24362431entry:
24372432 %floor = tail call double @llvm.floor.f64 (double %x )
0 commit comments