@@ -4388,12 +4388,11 @@ define <2 x bfloat> @v_copysign_out_v2bf16_mag_v2bf16_sign_v2f32(<2 x bfloat> %m
43884388; GFX8-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f32: 
43894389; GFX8:       ; %bb.0: 
43904390; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
4391- ; GFX8-NEXT:    v_bfe_u32 v4, v1, 16, 1 
4392- ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v1 
4393- ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x7fff, v4 
4394- ; GFX8-NEXT:    v_or_b32_e32 v3, 0x400000, v1 
4391+ ; GFX8-NEXT:    v_bfe_u32 v3, v1, 16, 1 
4392+ ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v1 
4393+ ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x7fff, v3 
43954394; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1 
4396- ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v3 , vcc 
4395+ ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1 , vcc 
43974396; GFX8-NEXT:    v_bfe_u32 v3, v2, 16, 1 
43984397; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v2 
43994398; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x7fff, v3 
@@ -5267,13 +5266,12 @@ define amdgpu_ps i32 @s_copysign_out_v2bf16_mag_v2bf16_sign_v2f32(<2 x bfloat> i
52675266; 
52685267; GFX8-LABEL: s_copysign_out_v2bf16_mag_v2bf16_sign_v2f32: 
52695268; GFX8:       ; %bb.0: 
5270- ; GFX8-NEXT:    s_bfe_u32 s4, s1, 0x10010 
5271- ; GFX8-NEXT:    s_add_i32 s4, s4, s1 
5272- ; GFX8-NEXT:    s_or_b32 s3, s1, 0x400000 
5273- ; GFX8-NEXT:    s_add_i32 s6, s4, 0x7fff 
5269+ ; GFX8-NEXT:    s_bfe_u32 s3, s1, 0x10010 
5270+ ; GFX8-NEXT:    s_add_i32 s3, s3, s1 
5271+ ; GFX8-NEXT:    s_addk_i32 s3, 0x7fff 
52745272; GFX8-NEXT:    v_cmp_u_f32_e64 s[4:5], s1, s1 
52755273; GFX8-NEXT:    s_and_b64 s[4:5], s[4:5], exec 
5276- ; GFX8-NEXT:    s_cselect_b32 s1, s3, s6  
5274+ ; GFX8-NEXT:    s_cselect_b32 s1, s1, s3  
52775275; GFX8-NEXT:    s_bfe_u32 s3, s2, 0x10010 
52785276; GFX8-NEXT:    s_add_i32 s3, s3, s2 
52795277; GFX8-NEXT:    s_addk_i32 s3, 0x7fff 
@@ -6340,18 +6338,16 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f32(<3 x bfloat> %m
63406338; GFX8-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f32: 
63416339; GFX8:       ; %bb.0: 
63426340; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
6343- ; GFX8-NEXT:    v_bfe_u32 v6, v2, 16, 1 
6344- ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v2 
6345- ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x7fff, v6 
6346- ; GFX8-NEXT:    v_or_b32_e32 v5, 0x400000, v2 
6347- ; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2 
6348- ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v5, vcc 
63496341; GFX8-NEXT:    v_bfe_u32 v5, v4, 16, 1 
6350- ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff 
63516342; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v4 
6352- ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s4 , v5 
6343+ ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x7fff , v5 
63536344; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4 
63546345; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc 
6346+ ; GFX8-NEXT:    v_bfe_u32 v5, v2, 16, 1 
6347+ ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v2 
6348+ ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x7fff, v5 
6349+ ; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2 
6350+ ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc 
63556351; GFX8-NEXT:    v_bfe_u32 v5, v3, 16, 1 
63566352; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v3 
63576353; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x7fff, v5 
@@ -7687,24 +7683,22 @@ define <4 x bfloat> @v_copysign_out_v4bf16_mag_v4bf16_sign_v4f32(<4 x bfloat> %m
76877683; GFX8-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f32: 
76887684; GFX8:       ; %bb.0: 
76897685; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 
7690- ; GFX8-NEXT:    v_bfe_u32 v7, v4, 16, 1 
7691- ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v4 
7692- ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x7fff, v7 
7693- ; GFX8-NEXT:    v_or_b32_e32 v6, 0x400000, v4 
7686+ ; GFX8-NEXT:    v_bfe_u32 v6, v4, 16, 1 
7687+ ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v4 
7688+ ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x7fff, v6 
76947689; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4 
7695- ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc 
7696- ; GFX8-NEXT:    v_bfe_u32 v7, v2, 16, 1 
7697- ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff 
7698- ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v2 
7699- ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, s4, v7 
7700- ; GFX8-NEXT:    v_or_b32_e32 v6, 0x400000, v2 
7701- ; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2 
7702- ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc 
7690+ ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc 
77037691; GFX8-NEXT:    v_bfe_u32 v6, v5, 16, 1 
7692+ ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff 
77047693; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v5 
77057694; GFX8-NEXT:    v_add_u32_e32 v6, vcc, s4, v6 
77067695; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5 
77077696; GFX8-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc 
7697+ ; GFX8-NEXT:    v_bfe_u32 v6, v2, 16, 1 
7698+ ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v2 
7699+ ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, s4, v6 
7700+ ; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2 
7701+ ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc 
77087702; GFX8-NEXT:    v_bfe_u32 v6, v3, 16, 1 
77097703; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v3 
77107704; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x7fff, v6 
0 commit comments