@@ -40777,39 +40777,41 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
4077740777; GFX9: ; %bb.0:
4077840778; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4077940779; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
40780- ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
40781- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40780+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
4078240781; GFX9-NEXT: v_and_b32_e32 v1, 1, v3
40783- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
4078440782; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
40785- ; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[4:5]
40786- ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v5
40787- ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v7
40788- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
40789- ; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v3, s[4:5]
40790- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
40791- ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, v4, s[4:5]
40783+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40784+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v2
40785+ ; GFX9-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40786+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
40787+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v1
40788+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
40789+ ; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
40790+ ; GFX9-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[6:7]
4079240791; GFX9-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4079340792; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4079440793; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
40795- ; GFX9-NEXT: v_perm_b32 v1, v1, v2 , s4
40794+ ; GFX9-NEXT: v_perm_b32 v1, v2, v1 , s4
4079640795; GFX9-NEXT: s_setpc_b64 s[30:31]
4079740796;
4079840797; GFX10-LABEL: v_vselect_v4bf16:
4079940798; GFX10: ; %bb.0:
4080040799; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40801- ; GFX10-NEXT: v_and_b32_e32 v2 , 1, v2
40802- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v3
40800+ ; GFX10-NEXT: v_and_b32_e32 v3 , 1, v3
40801+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
4080340802; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
40804- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v2
40803+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40804+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
40805+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v2
40806+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v0
40807+ ; GFX10-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40808+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
40809+ ; GFX10-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4080540810; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
40806- ; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, v5, s4
40807- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
40808- ; GFX10-NEXT: v_cndmask_b32_sdwa v2, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40809- ; GFX10-NEXT: v_cndmask_b32_sdwa v3, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40810- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v4, s4
40811- ; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
40812- ; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
40811+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v4, s5
40812+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc_lo
40813+ ; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
40814+ ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
4081340815; GFX10-NEXT: s_setpc_b64 s[30:31]
4081440816;
4081540817; GFX11TRUE16-LABEL: v_vselect_v4bf16:
@@ -41058,39 +41060,36 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
4105841060; GFX10: ; %bb.0:
4105941061; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4106041062; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
41061- ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4
41062- ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5
41063+ ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
41064+ ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
41065+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
4106341066; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41064- ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 16, v14
41065- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41066- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v3
41067+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
41068+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v5
41069+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41070+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v3
4106741071; GFX10-NEXT: v_and_b32_e32 v3, 1, v6
41068- ; GFX10-NEXT: v_and_b32_e32 v6, 1, v7
41069- ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v11
41070- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41071- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v3
41072- ; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v11, s4
41073- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v4
41074- ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v10
41075- ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v15
41076- ; GFX10-NEXT: v_cndmask_b32_e64 v4, v14, v10, s4
41077- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v5
41078- ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v13
41079- ; GFX10-NEXT: v_cndmask_b32_e64 v5, v16, v11, s4
41080- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v2
41081- ; GFX10-NEXT: v_cndmask_b32_e64 v2, v13, v9, s4
41082- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
41083- ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v9
41084- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v8, s4
41085- ; GFX10-NEXT: v_cndmask_b32_sdwa v8, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41072+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 1, v1
41073+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v4
41074+ ; GFX10-NEXT: v_cndmask_b32_sdwa v4, v15, v11, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4108641075; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41087- ; GFX10-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
41088- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc_lo
41089- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
41076+ ; GFX10-NEXT: v_cndmask_b32_e32 v5, v14, v10, vcc_lo
41077+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s6
41078+ ; GFX10-NEXT: v_cndmask_b32_sdwa v6, v14, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41079+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s5
41080+ ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v13, v9, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41081+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41082+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41083+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
41084+ ; GFX10-NEXT: v_cndmask_b32_sdwa v7, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41085+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
41086+ ; GFX10-NEXT: v_perm_b32 v0, v7, v0, 0x5040100
41087+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41088+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
4109041089; GFX10-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
41091- ; GFX10-NEXT: v_cndmask_b32_e32 v6 , v15, v7 , vcc_lo
41092- ; GFX10-NEXT: v_perm_b32 v2, v5, v4 , 0x5040100
41093- ; GFX10-NEXT: v_perm_b32 v3, v6 , v3, 0x5040100
41090+ ; GFX10-NEXT: v_cndmask_b32_e32 v3 , v15, v11 , vcc_lo
41091+ ; GFX10-NEXT: v_perm_b32 v2, v6, v5 , 0x5040100
41092+ ; GFX10-NEXT: v_perm_b32 v3, v4 , v3, 0x5040100
4109441093; GFX10-NEXT: s_setpc_b64 s[30:31]
4109541094;
4109641095; GFX11TRUE16-LABEL: v_vselect_v8bf16:
0 commit comments