@@ -37712,12 +37712,10 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
3771237712; GFX11TRUE16-LABEL: v_select_bf16:
3771337713; GFX11TRUE16: ; %bb.0:
3771437714; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37715- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37716- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
37717- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
37718- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
37719- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37720- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
37715+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37716+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37717+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37718+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
3772137719; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3772237720;
3772337721; GFX11FAKE16-LABEL: v_select_bf16:
@@ -37785,14 +37783,11 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3778537783; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16:
3778637784; GFX11TRUE16: ; %bb.0:
3778737785; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37788- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37789- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
37790- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
37791- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
37792- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37793- ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
37794- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37795- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
37786+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37787+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
37788+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37789+ ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
37790+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
3779637791; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3779737792;
3779837793; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16:
@@ -37862,14 +37857,11 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3786237857; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16:
3786337858; GFX11TRUE16: ; %bb.0:
3786437859; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37865- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37866- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
37867- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
37868- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
37869- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37870- ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
37871- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37872- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
37860+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37861+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
37862+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37863+ ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
37864+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
3787337865; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3787437866;
3787537867; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16:
@@ -42659,17 +42651,16 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4265942651; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:72
4266042652; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:4
4266142653; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:68
42662- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4266342654; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
4266442655; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
42656+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4266542657; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
4266642658; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
4266742659; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
4266842660; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
4266942661; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
4267042662; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
4267142663; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
42672- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4267342664; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
4267442665; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
4267542666; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
@@ -42693,6 +42684,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4269342684; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
4269442685; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
4269542686; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v14
42687+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4269642688; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v18
4269742689; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v20
4269842690; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v22
@@ -42722,45 +42714,44 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4272242714; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v27
4272342715; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v29
4272442716; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32)
42725- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.l
42717+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v31, 1, v31
4272642718; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31)
42727- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17 , 16, v32
42719+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16 , 16, v32
4272842720; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30)
42729- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18 , 16, v33
42721+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17 , 16, v33
4273042722; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v33.l, v32.l, s28
4273142723; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29)
42732- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v34
42733- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
42724+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v34
4273442725; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28)
42735- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20 , 16, v35
42726+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19 , 16, v35
4273642727; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v34.l, s27
4273742728; GFX11TRUE16-NEXT: s_waitcnt vmcnt(27)
42738- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21 , 16, v36
42729+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20 , 16, v36
4273942730; GFX11TRUE16-NEXT: s_waitcnt vmcnt(26)
42740- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22 , 16, v37
42731+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21 , 16, v37
4274142732; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v37.l, v36.l, s25
4274242733; GFX11TRUE16-NEXT: s_waitcnt vmcnt(25)
42743- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23 , 16, v38
42734+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22 , 16, v38
4274442735; GFX11TRUE16-NEXT: s_waitcnt vmcnt(24)
42745- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24 , 16, v39
42736+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23 , 16, v39
4274642737; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v39.l, v38.l, s23
4274742738; GFX11TRUE16-NEXT: s_waitcnt vmcnt(23)
42748- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25 , 16, v48
42739+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24 , 16, v48
4274942740; GFX11TRUE16-NEXT: s_waitcnt vmcnt(22)
42750- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26 , 16, v49
42741+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25 , 16, v49
4275142742; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v49.l, v48.l, s21
4275242743; GFX11TRUE16-NEXT: s_waitcnt vmcnt(21)
42753- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27 , 16, v50
42744+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26 , 16, v50
4275442745; GFX11TRUE16-NEXT: s_waitcnt vmcnt(20)
42755- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28 , 16, v51
42746+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27 , 16, v51
4275642747; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v51.l, v50.l, s19
4275742748; GFX11TRUE16-NEXT: s_waitcnt vmcnt(19)
42758- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29 , 16, v52
42749+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28 , 16, v52
4275942750; GFX11TRUE16-NEXT: s_waitcnt vmcnt(18)
42760- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30 , 16, v53
42751+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29 , 16, v53
4276142752; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v53.l, v52.l, s17
4276242753; GFX11TRUE16-NEXT: s_waitcnt vmcnt(17)
42763- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v31 , 16, v54
42754+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30 , 16, v54
4276442755; GFX11TRUE16-NEXT: s_waitcnt vmcnt(16)
4276542756; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v55
4276642757; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v55.l, v54.l, s15
@@ -42798,20 +42789,20 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4279842789; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
4279942790; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v87
4280042791; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v87.l, v86.l, vcc_lo
42801- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
42792+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v31
4280242793; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v67.l, v66.l, s11
4280342794; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v69.l, v68.l, s9
4280442795; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v71.l, v70.l, s7
4280542796; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v81.l, v80.l, s5
4280642797; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v83.l, v82.l, s3
4280742798; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v85.l, v84.l, s1
42808- ; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v20 .l, v19 .l, s29
42809- ; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v22 .l, v21 .l, s26
42810- ; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v24 .l, v23 .l, s24
42811- ; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v26 .l, v25 .l, s22
42812- ; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v28 .l, v27 .l, s20
42813- ; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v30 .l, v29 .l, s18
42814- ; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v32.l, v31 .l, s16
42799+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v19 .l, v18 .l, s29
42800+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v21 .l, v20 .l, s26
42801+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v23 .l, v22 .l, s24
42802+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v25 .l, v24 .l, s22
42803+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v27 .l, v26 .l, s20
42804+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v29 .l, v28 .l, s18
42805+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v32.l, v30 .l, s16
4281542806; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v34.l, v33.l, s14
4281642807; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v36.l, v35.l, s12
4281742808; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v38.l, v37.l, s10
@@ -42820,7 +42811,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4282042811; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v54.l, v53.l, s2
4282142812; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v52.l, v51.l, s4
4282242813; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v50.l, v49.l, s6
42823- ; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v18 .l, v17 .l, vcc_lo
42814+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v17 .l, v16 .l, vcc_lo
4282442815; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
4282542816;
4282642817; GFX11FAKE16-LABEL: v_vselect_v32bf16:
0 commit comments