@@ -4388,12 +4388,11 @@ define <2 x bfloat> @v_copysign_out_v2bf16_mag_v2bf16_sign_v2f32(<2 x bfloat> %m
43884388; GFX8-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f32:
43894389; GFX8: ; %bb.0:
43904390; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4391- ; GFX8-NEXT: v_bfe_u32 v4, v1, 16, 1
4392- ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v1
4393- ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x7fff, v4
4394- ; GFX8-NEXT: v_or_b32_e32 v3, 0x400000, v1
4391+ ; GFX8-NEXT: v_bfe_u32 v3, v1, 16, 1
4392+ ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v1
4393+ ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x7fff, v3
43954394; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
4396- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3 , vcc
4395+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1 , vcc
43974396; GFX8-NEXT: v_bfe_u32 v3, v2, 16, 1
43984397; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v2
43994398; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x7fff, v3
@@ -5267,13 +5266,12 @@ define amdgpu_ps i32 @s_copysign_out_v2bf16_mag_v2bf16_sign_v2f32(<2 x bfloat> i
52675266;
52685267; GFX8-LABEL: s_copysign_out_v2bf16_mag_v2bf16_sign_v2f32:
52695268; GFX8: ; %bb.0:
5270- ; GFX8-NEXT: s_bfe_u32 s4, s1, 0x10010
5271- ; GFX8-NEXT: s_add_i32 s4, s4, s1
5272- ; GFX8-NEXT: s_or_b32 s3, s1, 0x400000
5273- ; GFX8-NEXT: s_add_i32 s6, s4, 0x7fff
5269+ ; GFX8-NEXT: s_bfe_u32 s3, s1, 0x10010
5270+ ; GFX8-NEXT: s_add_i32 s3, s3, s1
5271+ ; GFX8-NEXT: s_addk_i32 s3, 0x7fff
52745272; GFX8-NEXT: v_cmp_u_f32_e64 s[4:5], s1, s1
52755273; GFX8-NEXT: s_and_b64 s[4:5], s[4:5], exec
5276- ; GFX8-NEXT: s_cselect_b32 s1, s3, s6
5274+ ; GFX8-NEXT: s_cselect_b32 s1, s1, s3
52775275; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10010
52785276; GFX8-NEXT: s_add_i32 s3, s3, s2
52795277; GFX8-NEXT: s_addk_i32 s3, 0x7fff
@@ -6340,18 +6338,16 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f32(<3 x bfloat> %m
63406338; GFX8-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f32:
63416339; GFX8: ; %bb.0:
63426340; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6343- ; GFX8-NEXT: v_bfe_u32 v6, v2, 16, 1
6344- ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v2
6345- ; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6
6346- ; GFX8-NEXT: v_or_b32_e32 v5, 0x400000, v2
6347- ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
6348- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v5, vcc
63496341; GFX8-NEXT: v_bfe_u32 v5, v4, 16, 1
6350- ; GFX8-NEXT: s_movk_i32 s4, 0x7fff
63516342; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v4
6352- ; GFX8-NEXT: v_add_u32_e32 v5, vcc, s4 , v5
6343+ ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff , v5
63536344; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
63546345; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
6346+ ; GFX8-NEXT: v_bfe_u32 v5, v2, 16, 1
6347+ ; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v2
6348+ ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5
6349+ ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
6350+ ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
63556351; GFX8-NEXT: v_bfe_u32 v5, v3, 16, 1
63566352; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v3
63576353; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5
@@ -7687,24 +7683,22 @@ define <4 x bfloat> @v_copysign_out_v4bf16_mag_v4bf16_sign_v4f32(<4 x bfloat> %m
76877683; GFX8-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f32:
76887684; GFX8: ; %bb.0:
76897685; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7690- ; GFX8-NEXT: v_bfe_u32 v7, v4, 16, 1
7691- ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v4
7692- ; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x7fff, v7
7693- ; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v4
7686+ ; GFX8-NEXT: v_bfe_u32 v6, v4, 16, 1
7687+ ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v4
7688+ ; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6
76947689; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
7695- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
7696- ; GFX8-NEXT: v_bfe_u32 v7, v2, 16, 1
7697- ; GFX8-NEXT: s_movk_i32 s4, 0x7fff
7698- ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v2
7699- ; GFX8-NEXT: v_add_u32_e32 v7, vcc, s4, v7
7700- ; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v2
7701- ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
7702- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
7690+ ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
77037691; GFX8-NEXT: v_bfe_u32 v6, v5, 16, 1
7692+ ; GFX8-NEXT: s_movk_i32 s4, 0x7fff
77047693; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v5
77057694; GFX8-NEXT: v_add_u32_e32 v6, vcc, s4, v6
77067695; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
77077696; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
7697+ ; GFX8-NEXT: v_bfe_u32 v6, v2, 16, 1
7698+ ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v2
7699+ ; GFX8-NEXT: v_add_u32_e32 v6, vcc, s4, v6
7700+ ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
7701+ ; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
77087702; GFX8-NEXT: v_bfe_u32 v6, v3, 16, 1
77097703; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v3
77107704; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6
0 commit comments