@@ -38481,7 +38481,10 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3848138481; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3848238482; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3848338483; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38484- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38484+ ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38485+ ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38486+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
38487+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3848538488; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3848638489; GFX8-NEXT: s_setpc_b64 s[30:31]
3848738490;
@@ -38491,7 +38494,9 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3849138494; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3849238495; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3849338496; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38494- ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38497+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38498+ ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38499+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
3849538500; GFX9-NEXT: s_mov_b32 s4, 0x5040100
3849638501; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
3849738502; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -38500,9 +38505,11 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3850038505; GFX10: ; %bb.0:
3850138506; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3850238507; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38508+ ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v1
38509+ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
3850338510; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3850438511; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
38505- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1 , vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38512+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v3 , vcc_lo
3850638513; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3850738514; GFX10-NEXT: s_setpc_b64 s[30:31]
3850838515;
@@ -38570,37 +38577,44 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
3857038577; GFX8-LABEL: v_vselect_v2bf16:
3857138578; GFX8: ; %bb.0:
3857238579; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38573- ; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
3857438580; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38581+ ; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
38582+ ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38583+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38584+ ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38585+ ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3
3857538586; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38576- ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38577- ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38578- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38587+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
38588+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3857938589; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3858038590; GFX8-NEXT: s_setpc_b64 s[30:31]
3858138591;
3858238592; GFX9-LABEL: v_vselect_v2bf16:
3858338593; GFX9: ; %bb.0:
3858438594; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38585- ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
3858638595; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38596+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
38597+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38598+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38599+ ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38600+ ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
3858738601; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38588- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38589- ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38590- ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38602+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3859138603; GFX9-NEXT: s_mov_b32 s4, 0x5040100
3859238604; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
3859338605; GFX9-NEXT: s_setpc_b64 s[30:31]
3859438606;
3859538607; GFX10-LABEL: v_vselect_v2bf16:
3859638608; GFX10: ; %bb.0:
3859738609; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38598- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
3859938610; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38611+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
38612+ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
38613+ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v3
38614+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
38615+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
3860038616; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
38601- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
38602- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38603- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v3, v2, s4
38617+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
3860438618; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3860538619; GFX10-NEXT: s_setpc_b64 s[30:31]
3860638620;
@@ -38757,12 +38771,13 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3875738771; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3875838772; GFX8-NEXT: v_mov_b32_e32 v1, s3
3875938773; GFX8-NEXT: v_mov_b32_e32 v2, s2
38760- ; GFX8-NEXT: v_mov_b32_e32 v3, s1
38761- ; GFX8-NEXT: v_mov_b32_e32 v4, s0
3876238774; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38763- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
38764- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38765- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38775+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
38776+ ; GFX8-NEXT: v_mov_b32_e32 v1, s1
38777+ ; GFX8-NEXT: v_mov_b32_e32 v2, s0
38778+ ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
38779+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
38780+ ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3876638781; GFX8-NEXT: v_readfirstlane_b32 s0, v0
3876738782; GFX8-NEXT: ; return to shader part epilog
3876838783;
@@ -38867,13 +38882,14 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3886738882; GFX8: ; %bb.0:
3886838883; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3886938884; GFX8-NEXT: s_lshr_b32 s3, s1, 16
38885+ ; GFX8-NEXT: v_mov_b32_e32 v2, s3
38886+ ; GFX8-NEXT: v_mov_b32_e32 v3, s2
3887038887; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
38871- ; GFX8-NEXT: v_mov_b32_e32 v1, s3
38872- ; GFX8-NEXT: v_mov_b32_e32 v2, s2
38873- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38888+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
3887438889; GFX8-NEXT: v_mov_b32_e32 v2, s1
3887538890; GFX8-NEXT: v_mov_b32_e32 v3, s0
3887638891; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38892+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3887738893; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
3887838894; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3887938895; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -40776,42 +40792,48 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
4077640792; GFX9-LABEL: v_vselect_v4bf16:
4077740793; GFX9: ; %bb.0:
4077840794; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40779- ; GFX9-NEXT: v_and_b32_e32 v1 , 1, v1
40780- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5] , 1, v1
40781- ; GFX9-NEXT: v_and_b32_e32 v1 , 1, v3
40795+ ; GFX9-NEXT: v_and_b32_e32 v2 , 1, v2
40796+ ; GFX9-NEXT: v_and_b32_e32 v3 , 1, v3
40797+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc , 1, v2
4078240798; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
40783- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40784- ; GFX9-NEXT: v_and_b32_e32 v1, 1, v2
40785- ; GFX9-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40799+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc
40800+ ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40801+ ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40802+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
40803+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
40804+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
4078640805; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
40787- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v1
4078840806; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
40789- ; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
40790- ; GFX9-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[6:7]
40791- ; GFX9-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40807+ ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
40808+ ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v6
40809+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40810+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
4079240811; GFX9-NEXT: s_mov_b32 s4, 0x5040100
40793- ; GFX9-NEXT: v_perm_b32 v0, v3 , v0, s4
40794- ; GFX9-NEXT: v_perm_b32 v1, v2, v1 , s4
40812+ ; GFX9-NEXT: v_perm_b32 v0, v1 , v0, s4
40813+ ; GFX9-NEXT: v_perm_b32 v1, v3, v2 , s4
4079540814; GFX9-NEXT: s_setpc_b64 s[30:31]
4079640815;
4079740816; GFX10-LABEL: v_vselect_v4bf16:
4079840817; GFX10: ; %bb.0:
4079940818; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40800- ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
40801- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40819+ ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
4080240820; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
40803- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40804- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
40805- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v2
40806- ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v0
40807- ; GFX10-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40808- ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
40809- ; GFX10-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40821+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40822+ ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
40823+ ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v4
40824+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
40825+ ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v6
40826+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc_lo
40827+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
40828+ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40829+ ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40830+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
4081040831; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
40811- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v4, s5
40812- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc_lo
40813- ; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
40814- ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
40832+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
40833+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40834+ ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
40835+ ; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
40836+ ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
4081540837; GFX10-NEXT: s_setpc_b64 s[30:31]
4081640838;
4081740839; GFX11TRUE16-LABEL: v_vselect_v4bf16:
@@ -41059,37 +41081,42 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
4105941081; GFX10-LABEL: v_vselect_v8bf16:
4106041082; GFX10: ; %bb.0:
4106141083; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41084+ ; GFX10-NEXT: v_and_b32_e32 v6, 1, v6
41085+ ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4
41086+ ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5
41087+ ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41088+ ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 16, v10
41089+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
41090+ ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v14
41091+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
4106241092; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
41063- ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
4106441093; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
41065- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41066- ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41067- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
41068- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v5
41069- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41070- ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v3
41071- ; GFX10-NEXT: v_and_b32_e32 v3, 1, v6
41072- ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 1, v1
41073- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v4
41074- ; GFX10-NEXT: v_cndmask_b32_sdwa v4, v15, v11, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41075- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41076- ; GFX10-NEXT: v_cndmask_b32_e32 v5, v14, v10, vcc_lo
41077- ; GFX10-NEXT: s_mov_b32 vcc_lo, s6
41078- ; GFX10-NEXT: v_cndmask_b32_sdwa v6, v14, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41079- ; GFX10-NEXT: s_mov_b32 vcc_lo, s5
41080- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v13, v9, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41081- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41082- ; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41083- ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
41084- ; GFX10-NEXT: v_cndmask_b32_sdwa v7, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41094+ ; GFX10-NEXT: v_cndmask_b32_e32 v6, v15, v11, vcc_lo
41095+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
41096+ ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
41097+ ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v11
41098+ ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v15
41099+ ; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v10, vcc_lo
41100+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
41101+ ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v8
41102+ ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v12
41103+ ; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
4108541104; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
41086- ; GFX10-NEXT: v_perm_b32 v0, v7, v0, 0x5040100
4108741105; GFX10-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41106+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41107+ ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v9
41108+ ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v13
41109+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41110+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41111+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
4108841112; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
41089- ; GFX10-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
41090- ; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v11, vcc_lo
41091- ; GFX10-NEXT: v_perm_b32 v2, v6, v5, 0x5040100
41092- ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
41113+ ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
41114+ ; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
41115+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41116+ ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
41117+ ; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
41118+ ; GFX10-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
41119+ ; GFX10-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
4109341120; GFX10-NEXT: s_setpc_b64 s[30:31]
4109441121;
4109541122; GFX11TRUE16-LABEL: v_vselect_v8bf16:
0 commit comments