@@ -4677,37 +4677,33 @@ define <2 x bfloat> @v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64(<2 x bfloat> %m
46774677; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46784678; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
46794679; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
4680- ; GCN-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
4681- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, v[4:5]
4680+ ; GCN-NEXT: v_and_b32_e32 v2, 0x80000000, v5
4681+ ; GCN-NEXT: v_and_b32_e32 v3, 0x80000000, v3
46824682; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4683- ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
46844683; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
4684+ ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
46854685; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
4686- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
4687- ; GCN-NEXT: v_and_b32_e32 v2, 0x8000, v2
4688- ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
4689- ; GCN-NEXT: v_or_b32_e32 v0, v0, v2
4690- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4686+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v2
4687+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v3
46914688; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4689+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
46924690; GCN-NEXT: s_setpc_b64 s[30:31]
46934691;
46944692; GFX7-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
46954693; GFX7: ; %bb.0:
46964694; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4697- ; GFX7-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
4698- ; GFX7-NEXT: v_cvt_f32_f64_e32 v3, v[4:5]
4699- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
47004695; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
4696+ ; GFX7-NEXT: v_and_b32_e32 v2, 0x80000000, v5
47014697; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4702- ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
4703- ; GFX7-NEXT: v_and_b32_e32 v3, 0x8000, v3
47044698; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
4705- ; GFX7-NEXT: v_and_b32_e32 v2, 0x8000, v2
4699+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
4700+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
4701+ ; GFX7-NEXT: v_and_b32_e32 v2, 0x80000000, v3
4702+ ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
47064703; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
4707- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
47084704; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
4709- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
47104705; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4706+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
47114707; GFX7-NEXT: s_setpc_b64 s[30:31]
47124708;
47134709; GFX8-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
@@ -5585,35 +5581,31 @@ define amdgpu_ps i32 @s_copysign_out_v2bf16_mag_v2bf16_sign_v2f64(<2 x bfloat> i
55855581; GCN: ; %bb.0:
55865582; GCN-NEXT: v_mul_f32_e64 v0, 1.0, s1
55875583; GCN-NEXT: v_mul_f32_e64 v1, 1.0, s0
5588- ; GCN-NEXT: v_cvt_f32_f64_e32 v2, s[4:5]
5589- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, s[2:3]
5590- ; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2
5591- ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
5584+ ; GCN-NEXT: s_and_b32 s0, s3, 0x80000000
5585+ ; GCN-NEXT: s_and_b32 s1, s5, 0x80000000
5586+ ; GCN-NEXT: s_lshr_b32 s0, s0, 16
55925587; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
5588+ ; GCN-NEXT: s_lshr_b32 s1, s1, 16
55935589; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
5594- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
5595- ; GCN-NEXT: v_and_b32_e32 v2, 0x8000, v2
5596- ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
5597- ; GCN-NEXT: v_or_b32_e32 v0, v0, v2
5590+ ; GCN-NEXT: v_or_b32_e32 v1, s0, v1
5591+ ; GCN-NEXT: v_or_b32_e32 v0, s1, v0
55985592; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
55995593; GCN-NEXT: v_or_b32_e32 v0, v1, v0
56005594; GCN-NEXT: v_readfirstlane_b32 s0, v0
56015595; GCN-NEXT: ; return to shader part epilog
56025596;
56035597; GFX7-LABEL: s_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
56045598; GFX7: ; %bb.0:
5605- ; GFX7-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
5606- ; GFX7-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
5607- ; GFX7-NEXT: v_mul_f32_e64 v2, 1.0, s1
5608- ; GFX7-NEXT: v_mul_f32_e64 v3, 1.0, s0
5609- ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5610- ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
5611- ; GFX7-NEXT: v_and_b32_e32 v0, 0x8000, v0
5612- ; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
5613- ; GFX7-NEXT: v_and_b32_e32 v1, 0x8000, v1
5614- ; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 15
5615- ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0
5616- ; GFX7-NEXT: v_or_b32_e32 v1, v3, v1
5599+ ; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s0
5600+ ; GFX7-NEXT: s_and_b32 s0, s3, 0x80000000
5601+ ; GFX7-NEXT: s_lshr_b32 s0, s0, 16
5602+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
5603+ ; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s1
5604+ ; GFX7-NEXT: v_or_b32_e32 v1, s0, v1
5605+ ; GFX7-NEXT: s_and_b32 s0, s5, 0x80000000
5606+ ; GFX7-NEXT: s_lshr_b32 s0, s0, 16
5607+ ; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
5608+ ; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
56175609; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
56185610; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
56195611; GFX7-NEXT: v_readfirstlane_b32 s0, v0
@@ -6682,51 +6674,45 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64(<3 x bfloat> %m
66826674; GCN: ; %bb.0:
66836675; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66846676; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
6685- ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
66866677; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
6687- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, v[3:4]
6688- ; GCN-NEXT: v_cvt_f32_f64_e32 v4, v[5:6]
6689- ; GCN-NEXT: v_cvt_f32_f64_e32 v5, v[7:8]
6678+ ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
6679+ ; GCN-NEXT: v_and_b32_e32 v3, 0x80000000, v6
6680+ ; GCN-NEXT: v_and_b32_e32 v5, 0x80000000, v8
6681+ ; GCN-NEXT: v_and_b32_e32 v4, 0x80000000, v4
66906682; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6691- ; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
6683+ ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
66926684; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
66936685; GCN-NEXT: v_bfe_u32 v2, v2, 16, 15
6694- ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
6686+ ; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
66956687; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
6696- ; GCN-NEXT: v_and_b32_e32 v5, 0x8000, v5
6697- ; GCN-NEXT: v_and_b32_e32 v4, 0x8000, v4
6698- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
6688+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
66996689; GCN-NEXT: v_or_b32_e32 v2, v2, v5
6700- ; GCN-NEXT: v_or_b32_e32 v1, v1, v4
6701- ; GCN-NEXT: v_or_b32_e32 v0, v0, v3
6702- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6690+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v4
67036691; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6692+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
67046693; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
67056694; GCN-NEXT: s_setpc_b64 s[30:31]
67066695;
67076696; GFX7-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
67086697; GFX7: ; %bb.0:
67096698; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6710- ; GFX7-NEXT: v_cvt_f32_f64_e32 v3, v[3:4]
6711- ; GFX7-NEXT: v_cvt_f32_f64_e32 v4, v[5:6]
6712- ; GFX7-NEXT: v_cvt_f32_f64_e32 v5, v[7:8]
6713- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
67146699; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
6700+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v6
6701+ ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6702+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
67156703; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
6704+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
6705+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v8
67166706; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6717- ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
6718- ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
6719- ; GFX7-NEXT: v_and_b32_e32 v5, 0x8000, v5
67206707; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
6721- ; GFX7-NEXT: v_and_b32_e32 v4, 0x8000, v4
6722- ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
6723- ; GFX7-NEXT: v_and_b32_e32 v3, 0x8000, v3
6708+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
6709+ ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
6710+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v4
6711+ ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
67246712; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
6725- ; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
6726- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
67276713; GFX7-NEXT: v_or_b32_e32 v0, v0, v3
6728- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
67296714; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6715+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
67306716; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
67316717; GFX7-NEXT: s_setpc_b64 s[30:31]
67326718;
@@ -8082,66 +8068,58 @@ define <4 x bfloat> @v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64(<4 x bfloat> %m
80828068; GCN: ; %bb.0:
80838069; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80848070; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
8085- ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
80868071; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
80878072; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
8088- ; GCN-NEXT: v_cvt_f32_f64_e32 v4, v[4:5]
8089- ; GCN-NEXT: v_cvt_f32_f64_e32 v5, v[6:7]
8090- ; GCN-NEXT: v_cvt_f32_f64_e32 v6, v[8:9]
8091- ; GCN-NEXT: v_cvt_f32_f64_e32 v7, v[10:11]
8073+ ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
8074+ ; GCN-NEXT: v_and_b32_e32 v4, 0x80000000, v7
8075+ ; GCN-NEXT: v_and_b32_e32 v6, 0x80000000, v11
8076+ ; GCN-NEXT: v_and_b32_e32 v7, 0x80000000, v9
8077+ ; GCN-NEXT: v_and_b32_e32 v5, 0x80000000, v5
80928078; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8093- ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8079+ ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
80948080; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v6
8095- ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7
80968081; GCN-NEXT: v_bfe_u32 v3, v3, 16, 15
8082+ ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7
80978083; GCN-NEXT: v_bfe_u32 v2, v2, 16, 15
8098- ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
8084+ ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
80998085; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
8100- ; GCN-NEXT: v_and_b32_e32 v7, 0x8000, v7
8101- ; GCN-NEXT: v_and_b32_e32 v6, 0x8000, v6
8102- ; GCN-NEXT: v_and_b32_e32 v5, 0x8000, v5
8103- ; GCN-NEXT: v_and_b32_e32 v4, 0x8000, v4
8104- ; GCN-NEXT: v_or_b32_e32 v3, v3, v7
8105- ; GCN-NEXT: v_or_b32_e32 v2, v2, v6
8106- ; GCN-NEXT: v_or_b32_e32 v1, v1, v5
8107- ; GCN-NEXT: v_or_b32_e32 v0, v0, v4
8108- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8086+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v4
8087+ ; GCN-NEXT: v_or_b32_e32 v3, v3, v6
8088+ ; GCN-NEXT: v_or_b32_e32 v2, v2, v7
8089+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v5
81098090; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8110- ; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
81118091; GCN-NEXT: v_lshlrev_b32_e32 v3, 16, v3
8092+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8093+ ; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
81128094; GCN-NEXT: s_setpc_b64 s[30:31]
81138095;
81148096; GFX7-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64:
81158097; GFX7: ; %bb.0:
81168098; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8117- ; GFX7-NEXT: v_cvt_f32_f64_e32 v4, v[4:5]
8118- ; GFX7-NEXT: v_cvt_f32_f64_e32 v5, v[6:7]
8119- ; GFX7-NEXT: v_cvt_f32_f64_e32 v6, v[8:9]
8120- ; GFX7-NEXT: v_cvt_f32_f64_e32 v7, v[10:11]
8121- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
81228099; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
8123- ; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
8100+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v7
8101+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8102+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
81248103; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
8104+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
8105+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v11
81258106; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8126- ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8127- ; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v6
8128- ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v7
8129- ; GFX7-NEXT: v_and_b32_e32 v7, 0x8000, v7
81308107; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 15
8131- ; GFX7-NEXT: v_and_b32_e32 v6, 0x8000, v6
8108+ ; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
8109+ ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4
8110+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v9
8111+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
81328112; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
8133- ; GFX7-NEXT: v_and_b32_e32 v5, 0x8000, v5
8134- ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
8135- ; GFX7-NEXT: v_and_b32_e32 v4, 0x8000, v4
8113+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
8114+ ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4
8115+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v5
8116+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
81368117; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
8137- ; GFX7-NEXT: v_or_b32_e32 v3, v3, v7
8138- ; GFX7-NEXT: v_or_b32_e32 v2, v2, v6
8139- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v5
81408118; GFX7-NEXT: v_or_b32_e32 v0, v0, v4
8141- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
81428119; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8143- ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
81448120; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
8121+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8122+ ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
81458123; GFX7-NEXT: s_setpc_b64 s[30:31]
81468124;
81478125; GFX8-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64:
0 commit comments