diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1697da09e0f72..deab638b7e546 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6153,12 +6153,13 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, return true; } // Handle CONCAT(SUB0, SUB1). - // Limit this to vXi64 vector cases to make the most of cross lane shuffles. + // Limit to vXi64/splat cases to make the most of cross lane shuffles. if (Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) && - NumBitsPerElt == 64 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOpcode() == ISD::INSERT_SUBVECTOR && Src.getOperand(0).isUndef() && Src.getOperand(1).getValueType() == SubVT && Src.getConstantOperandVal(2) == 0 && + (NumBitsPerElt == 64 || Src.getOperand(1) == Sub) && SDNode::areOnlyUsersOf({N.getNode(), Src.getNode()}, Sub.getNode())) { for (int i = 0; i != (int)NumSubElts; ++i) Mask.push_back(i); diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll index 4753dba2d468f..7c12e5295257c 100644 --- a/llvm/test/CodeGen/X86/vector-partial-undef.ll +++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll @@ -150,8 +150,7 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) { ; AVX-LABEL: xor_undef_elts_alt: ; AVX: # %bb.0: ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7] +; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [2,1,1,0,3,2,0,3] ; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq