diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 68ea72c732e1e..4b7fc45908119 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5460,6 +5460,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; + case ISD::EXTRACT_SUBVECTOR: { + SDValue Src = Op.getOperand(0); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly, + Depth + 1); + } + + case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + break; + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.clearBits(Idx, Idx + NumSubElts); + + if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison( + Sub, DemandedSubElts, PoisonOnly, Depth + 1)) + return false; + if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison( + Src, DemandedSrcElts, PoisonOnly, Depth + 1)) + return false; + return true; + } + + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Src = Op.getOperand(0); + auto *IndexC = dyn_cast(Op.getOperand(1)); + EVT SrcVT = Src.getValueType(); + if (SrcVT.isFixedLengthVector() && IndexC && + IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) { + APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), + IndexC->getZExtValue()); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly, + Depth + 1); + } + break; + } + + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + EVT VT = InVec.getValueType(); + auto *IndexC = dyn_cast(EltNo); + if (IndexC && VT.isFixedLengthVector() && + IndexC->getAPIntValue().ult(VT.getVectorNumElements())) { + if (DemandedElts[IndexC->getZExtValue()] && + !isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1)) + return false; + APInt InVecDemandedElts = DemandedElts; + InVecDemandedElts.clearBit(IndexC->getZExtValue()); + if (!!InVecDemandedElts && + !isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts, + PoisonOnly, Depth + 1)) + return false; + return true; + } + break; + } + + case ISD::SCALAR_TO_VECTOR: + // Check upper (known undef) elements. + if (DemandedElts.ugt(1) && !PoisonOnly) + return false; + // Check element zero. + if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), PoisonOnly, Depth + 1)) + return false; + return true; + case ISD::SPLAT_VECTOR: return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly, Depth + 1); @@ -5482,6 +5559,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, return true; } + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + // Shift amount operand is checked by canCreateUndefOrPoison. So it is + // enough to check operand 0 if Op can't create undef/poison. + return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, + /*ConsiderFlags*/ true, Depth) && + isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts, + PoisonOnly, Depth + 1); + + case ISD::BSWAP: + case ISD::CTPOP: + case ISD::BITREVERSE: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::VSELECT: { + // If Op can't create undef/poison and none of its operands are undef/poison + // then Op is never undef/poison. A difference from the more common check + // below, outside the switch, is that we handle elementwise operations for + // which the DemandedElts mask is valid for all operands here. + return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, + /*ConsiderFlags*/ true, Depth) && + all_of(Op->ops(), [&](SDValue V) { + return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts, + PoisonOnly, Depth + 1); + }); + } + // TODO: Search for noundef attributes from library functions. // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. diff --git a/llvm/test/CodeGen/X86/pr62286.ll b/llvm/test/CodeGen/X86/pr62286.ll index 9728e130333c4..ce03f8fad4a19 100644 --- a/llvm/test/CodeGen/X86/pr62286.ll +++ b/llvm/test/CodeGen/X86/pr62286.ll @@ -28,8 +28,9 @@ define i64 @PR62286(i32 %a) { ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] ; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 @@ -42,10 +43,10 @@ define i64 @PR62286(i32 %a) { ; AVX2-LABEL: PR62286: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 -; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] -; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1 +; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 @@ -58,12 +59,13 @@ define i64 @PR62286(i32 %a) { ; AVX512-LABEL: PR62286: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovd %edi, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] -; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512-NEXT: movw $4369, %ax # imm = 0x1111 +; AVX512-NEXT: movb $8, %al ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpaddd %zmm0, %zmm0, %zmm1 {%k1} -; AVX512-NEXT: vpmovsxdq %ymm1, %zmm0 +; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z} +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] +; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1