diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0f82b541744d6..bbab43d4e92af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58684,6 +58684,21 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, } } + // Attempt to constant fold (if we're not widening). + if (!Vec.isUndef() && !ISD::isBuildVectorAllZeros(Vec.getNode())) { + unsigned EltSizeInBits = OpVT.getScalarSizeInBits(); + APInt VecUndefElts, SubUndefElts; + SmallVector VecEltBits, SubEltBits; + if (getTargetConstantBitsFromNode(Vec, EltSizeInBits, VecUndefElts, + VecEltBits) && + getTargetConstantBitsFromNode(SubVec, EltSizeInBits, SubUndefElts, + SubEltBits)) { + VecUndefElts.insertBits(SubUndefElts, IdxVal); + llvm::copy(SubEltBits, VecEltBits.begin() + IdxVal); + return getConstVector(VecEltBits, VecUndefElts, OpVT, DAG, dl); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 483191fb32bdf..f53c7a3370174 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -140,16 +140,11 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x ; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3] ; X86-NEXT: retl ; -; X64-AVX-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3] -; X64-AVX-NEXT: retq -; -; X64-AVX2-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] -; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3] -; X64-AVX2-NEXT: retq +; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd: +; X64: # %bb.0: +; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] +; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3] +; X64-NEXT: retq %res0 = insertelement <4 x i64> , i64 %a2, i32 0 %res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0) %res2 = shufflevector <4 x double> %res1, <4 x double> undef, <4 x i32>