@@ -2479,8 +2479,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
24792479; GFX1032-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
24802480; GFX1032-NEXT: v_cvt_u32_f32_e32 v1, v1
24812481; GFX1032-NEXT: v_mul_lo_u32 v2, s1, v1
2482- ; GFX1032-NEXT: s_ff1_i32_b32 s1, 0x80000000
2483- ; GFX1032-NEXT: s_add_i32 s1, s1, 32
2482+ ; GFX1032-NEXT: s_brev_b32 s1, 1
24842483; GFX1032-NEXT: v_mul_hi_u32 v2, v1, v2
24852484; GFX1032-NEXT: v_add_nc_u32_e32 v1, v1, v2
24862485; GFX1032-NEXT: v_mul_hi_u32 v1, v0, v1
@@ -2494,8 +2493,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
24942493; GFX1032-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
24952494; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
24962495; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2497- ; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
2498- ; GFX1032-NEXT: s_min_u32 s0, s0, s1
2496+ ; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
24992497; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
25002498; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
25012499; GFX1032-NEXT: s_and_b32 s0, vcc_lo, s0
@@ -2529,10 +2527,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
25292527; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
25302528; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
25312529; GFX1064-NEXT: s_bitset1_b32 s1, 31
2532- ; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
2533- ; GFX1064-NEXT: s_ff1_i32_b32 s1, s1
2534- ; GFX1064-NEXT: s_add_i32 s1, s1, 32
2535- ; GFX1064-NEXT: s_min_u32 s0, s0, s1
2530+ ; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
25362531; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
25372532; GFX1064-NEXT: s_cselect_b64 s[0:1], -1, 0
25382533; GFX1064-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
@@ -2576,9 +2571,8 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
25762571; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
25772572; GFX1032-NEXT: v_div_scale_f32 v1, s1, s0, s0, v0
25782573; GFX1032-NEXT: v_div_scale_f32 v4, vcc_lo, v0, s0, v0
2579- ; GFX1032-NEXT: s_ff1_i32_b32 s1, 0x80000000
2574+ ; GFX1032-NEXT: s_brev_b32 s1, 1
25802575; GFX1032-NEXT: v_rcp_f32_e32 v2, v1
2581- ; GFX1032-NEXT: s_add_i32 s1, s1, 32
25822576; GFX1032-NEXT: v_fma_f32 v3, -v1, v2, 1.0
25832577; GFX1032-NEXT: v_fmac_f32_e32 v2, v3, v2
25842578; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v2
@@ -2592,8 +2586,7 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
25922586; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
25932587; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
25942588; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2595- ; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
2596- ; GFX1032-NEXT: s_min_u32 s0, s0, s1
2589+ ; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
25972590; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
25982591; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
25992592; GFX1032-NEXT: s_and_b32 s0, vcc_lo, s0
@@ -2609,26 +2602,23 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
26092602; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
26102603; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
26112604; GFX1064-NEXT: v_div_scale_f32 v1, s[0:1], s2, s2, v0
2612- ; GFX1064-NEXT: v_div_scale_f32 v4, vcc, v0, s2, v0
26132605; GFX1064-NEXT: v_rcp_f32_e32 v2, v1
26142606; GFX1064-NEXT: v_fma_f32 v3, -v1, v2, 1.0
26152607; GFX1064-NEXT: v_fmac_f32_e32 v2, v3, v2
2616- ; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v2
2617- ; GFX1064-NEXT: v_fma_f32 v5, -v1, v3, v4
2618- ; GFX1064-NEXT: v_fmac_f32_e32 v3, v5, v2
2619- ; GFX1064-NEXT: v_fma_f32 v1, -v1, v3, v4
2620- ; GFX1064-NEXT: v_div_fmas_f32 v1, v1, v2, v3
2608+ ; GFX1064-NEXT: v_div_scale_f32 v3, vcc, v0, s2, v0
2609+ ; GFX1064-NEXT: v_mul_f32_e32 v4, v3, v2
2610+ ; GFX1064-NEXT: v_fma_f32 v5, -v1, v4, v3
2611+ ; GFX1064-NEXT: v_fmac_f32_e32 v4, v5, v2
2612+ ; GFX1064-NEXT: v_fma_f32 v1, -v1, v4, v3
2613+ ; GFX1064-NEXT: v_div_fmas_f32 v1, v1, v2, v4
26212614; GFX1064-NEXT: v_div_fixup_f32 v1, v1, s2, v0
26222615; GFX1064-NEXT: v_trunc_f32_e32 v1, v1
26232616; GFX1064-NEXT: v_fma_f32 v0, -v1, s2, v0
26242617; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
26252618; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
26262619; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
26272620; GFX1064-NEXT: s_bitset1_b32 s1, 31
2628- ; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
2629- ; GFX1064-NEXT: s_ff1_i32_b32 s1, s1
2630- ; GFX1064-NEXT: s_add_i32 s1, s1, 32
2631- ; GFX1064-NEXT: s_min_u32 s0, s0, s1
2621+ ; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
26322622; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
26332623; GFX1064-NEXT: s_cselect_b64 s[0:1], -1, 0
26342624; GFX1064-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
0 commit comments