@@ -37712,12 +37712,10 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
3771237712; GFX11TRUE16-LABEL: v_select_bf16:
3771337713; GFX11TRUE16: ; %bb.0:
3771437714; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37715- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37716- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
37717- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
37718- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
37719- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37720- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
37715+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37716+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37717+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37718+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
3772137719; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3772237720;
3772337721; GFX11FAKE16-LABEL: v_select_bf16:
@@ -37785,14 +37783,11 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3778537783; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16:
3778637784; GFX11TRUE16: ; %bb.0:
3778737785; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37788- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37789- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
37790- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
37791- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
37792- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37793- ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
37794- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37795- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
37786+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37787+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
37788+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37789+ ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
37790+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
3779637791; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3779737792;
3779837793; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16:
@@ -37862,14 +37857,11 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3786237857; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16:
3786337858; GFX11TRUE16: ; %bb.0:
3786437859; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37865- ; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
37866- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
37867- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
37868- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
37869- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
37870- ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
37871- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
37872- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
37860+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
37861+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
37862+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
37863+ ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
37864+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
3787337865; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3787437866;
3787537867; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16:
@@ -42810,17 +42802,16 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4281042802; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:72
4281142803; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:4
4281242804; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:68
42813- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4281442805; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
4281542806; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
42807+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
4281642808; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
4281742809; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
4281842810; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
4281942811; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
4282042812; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
4282142813; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
4282242814; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
42823- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4282442815; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
4282542816; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
4282642817; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
@@ -42844,6 +42835,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4284442835; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
4284542836; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
4284642837; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v14
42838+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
4284742839; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v18
4284842840; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v20
4284942841; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v22
@@ -42873,45 +42865,44 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4287342865; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v27
4287442866; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v29
4287542867; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32)
42876- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.l
42868+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v31, 1, v31
4287742869; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31)
42878- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17 , 16, v32
42870+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16 , 16, v32
4287942871; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30)
42880- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18 , 16, v33
42872+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17 , 16, v33
4288142873; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v33.l, v32.l, s28
4288242874; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29)
42883- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v34
42884- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
42875+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v34
4288542876; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28)
42886- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20 , 16, v35
42877+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19 , 16, v35
4288742878; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v34.l, s27
4288842879; GFX11TRUE16-NEXT: s_waitcnt vmcnt(27)
42889- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21 , 16, v36
42880+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20 , 16, v36
4289042881; GFX11TRUE16-NEXT: s_waitcnt vmcnt(26)
42891- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22 , 16, v37
42882+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21 , 16, v37
4289242883; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v37.l, v36.l, s25
4289342884; GFX11TRUE16-NEXT: s_waitcnt vmcnt(25)
42894- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23 , 16, v38
42885+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22 , 16, v38
4289542886; GFX11TRUE16-NEXT: s_waitcnt vmcnt(24)
42896- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24 , 16, v39
42887+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23 , 16, v39
4289742888; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v39.l, v38.l, s23
4289842889; GFX11TRUE16-NEXT: s_waitcnt vmcnt(23)
42899- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25 , 16, v48
42890+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24 , 16, v48
4290042891; GFX11TRUE16-NEXT: s_waitcnt vmcnt(22)
42901- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26 , 16, v49
42892+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25 , 16, v49
4290242893; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v49.l, v48.l, s21
4290342894; GFX11TRUE16-NEXT: s_waitcnt vmcnt(21)
42904- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27 , 16, v50
42895+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26 , 16, v50
4290542896; GFX11TRUE16-NEXT: s_waitcnt vmcnt(20)
42906- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28 , 16, v51
42897+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27 , 16, v51
4290742898; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v51.l, v50.l, s19
4290842899; GFX11TRUE16-NEXT: s_waitcnt vmcnt(19)
42909- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29 , 16, v52
42900+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28 , 16, v52
4291042901; GFX11TRUE16-NEXT: s_waitcnt vmcnt(18)
42911- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30 , 16, v53
42902+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29 , 16, v53
4291242903; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v53.l, v52.l, s17
4291342904; GFX11TRUE16-NEXT: s_waitcnt vmcnt(17)
42914- ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v31 , 16, v54
42905+ ; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30 , 16, v54
4291542906; GFX11TRUE16-NEXT: s_waitcnt vmcnt(16)
4291642907; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v55
4291742908; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v55.l, v54.l, s15
@@ -42949,20 +42940,20 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4294942940; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
4295042941; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v87
4295142942; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v87.l, v86.l, vcc_lo
42952- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
42943+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v31
4295342944; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v67.l, v66.l, s11
4295442945; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v69.l, v68.l, s9
4295542946; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v71.l, v70.l, s7
4295642947; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v81.l, v80.l, s5
4295742948; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v83.l, v82.l, s3
4295842949; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v85.l, v84.l, s1
42959- ; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v20 .l, v19 .l, s29
42960- ; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v22 .l, v21 .l, s26
42961- ; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v24 .l, v23 .l, s24
42962- ; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v26 .l, v25 .l, s22
42963- ; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v28 .l, v27 .l, s20
42964- ; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v30 .l, v29 .l, s18
42965- ; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v32.l, v31 .l, s16
42950+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v19 .l, v18 .l, s29
42951+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v21 .l, v20 .l, s26
42952+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v23 .l, v22 .l, s24
42953+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v25 .l, v24 .l, s22
42954+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v27 .l, v26 .l, s20
42955+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v29 .l, v28 .l, s18
42956+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v32.l, v30 .l, s16
4296642957; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v34.l, v33.l, s14
4296742958; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v36.l, v35.l, s12
4296842959; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v38.l, v37.l, s10
@@ -42971,7 +42962,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
4297142962; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v54.l, v53.l, s2
4297242963; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v52.l, v51.l, s4
4297342964; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v50.l, v49.l, s6
42974- ; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v18 .l, v17 .l, vcc_lo
42965+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v17 .l, v16 .l, vcc_lo
4297542966; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
4297642967;
4297742968; GFX11FAKE16-LABEL: v_vselect_v32bf16:
0 commit comments