@@ -38259,16 +38259,14 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3825938259; GFX11TRUE16-LABEL: s_select_v2bf16:
3826038260; GFX11TRUE16: ; %bb.0:
3826138261; GFX11TRUE16-NEXT: s_lshr_b32 s2, s0, 16
38262- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
3826338262; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
38264- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38265- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
38266- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38267- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38268- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38269- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, vcc_lo
38270- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38271- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38263+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
38264+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
38265+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38266+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38267+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
38268+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38269+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
3827238270; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
3827338271; GFX11TRUE16-NEXT: ; return to shader part epilog
3827438272;
@@ -38376,19 +38374,17 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3837638374;
3837738375; GFX11TRUE16-LABEL: s_vselect_v2bf16:
3837838376; GFX11TRUE16: ; %bb.0:
38379- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
38380- ; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16
38377+ ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16
3838138378; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3838238379; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
3838338380; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38384- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
38385- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38386- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38387- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38388- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s2
38389- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38390- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38391- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
38381+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
38382+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38383+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38384+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, s0, v0.l, s2
38385+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38386+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
38387+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v1
3839238388; GFX11TRUE16-NEXT: ; return to shader part epilog
3839338389;
3839438390; GFX11FAKE16-LABEL: s_vselect_v2bf16:
@@ -40095,30 +40091,25 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
4009540091;
4009640092; GFX11TRUE16-LABEL: s_vselect_v4bf16:
4009740093; GFX11TRUE16: ; %bb.0:
40098- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s3, 16
40094+ ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s1, 16
40095+ ; GFX11TRUE16-NEXT: s_lshr_b32 s9, s0, 16
4009940096; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4010040097; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
40101- ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s1, 16
40102- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40103- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s3
40104- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
40105- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s0, 16
4010640098; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
4010740099; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
40108- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s8
40109- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
40110- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
40111- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, s2
40112- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s0
40113- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, s1
40114- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s6
40115- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v1.h, v2.l, s4
40116- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
40117- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v2.h, v3.l, vcc_lo
40118- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.h, s5
40100+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40101+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s9
40102+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
40103+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
40104+ ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s3, 16
40105+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s2, 16
40106+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, s8, v0.l, s6
40107+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v0.h, s4
40108+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
40109+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, s3, v1.h, s5
4011940110; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
40120- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v4
40121- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v0
40111+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
40112+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v2
4012240113; GFX11TRUE16-NEXT: ; return to shader part epilog
4012340114;
4012440115; GFX11FAKE16-LABEL: s_vselect_v4bf16:
0 commit comments