Skip to content

Commit 5e7924a

Browse files
authored
[SelectionDAG] Handle more opcodes in isGuaranteedNotToBeUndefOrPoison (#147019)
Add special handling of EXTRACT_SUBVECTOR, INSERT_SUBVECTOR, EXTRACT_VECTOR_ELT, INSERT_VECTOR_ELT and SCALAR_TO_VECTOR in isGuaranteedNotToBeUndefOrPoison. Make use of DemandedElts to improve the analysis and only check relevant elements for each operand. Also start using DemandedElts in the recursive calls that check isGuaranteedNotToBeUndefOrPoison for all operands for operations that do not create undef/poison. We can do that for a number of elementwise operations for which the DemandedElts can be applied to every operand (e.g. ADD, OR, BITREVERSE, TRUNCATE).
1 parent cd8c3bd commit 5e7924a

File tree

2 files changed

+135
-10
lines changed

2 files changed

+135
-10
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5460,6 +5460,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54605460
}
54615461
return true;
54625462

5463+
case ISD::EXTRACT_SUBVECTOR: {
5464+
SDValue Src = Op.getOperand(0);
5465+
if (Src.getValueType().isScalableVector())
5466+
break;
5467+
uint64_t Idx = Op.getConstantOperandVal(1);
5468+
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
5469+
APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
5470+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5471+
Depth + 1);
5472+
}
5473+
5474+
case ISD::INSERT_SUBVECTOR: {
5475+
if (Op.getValueType().isScalableVector())
5476+
break;
5477+
SDValue Src = Op.getOperand(0);
5478+
SDValue Sub = Op.getOperand(1);
5479+
uint64_t Idx = Op.getConstantOperandVal(2);
5480+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5481+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5482+
APInt DemandedSrcElts = DemandedElts;
5483+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5484+
5485+
if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
5486+
Sub, DemandedSubElts, PoisonOnly, Depth + 1))
5487+
return false;
5488+
if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
5489+
Src, DemandedSrcElts, PoisonOnly, Depth + 1))
5490+
return false;
5491+
return true;
5492+
}
5493+
5494+
case ISD::EXTRACT_VECTOR_ELT: {
5495+
SDValue Src = Op.getOperand(0);
5496+
auto *IndexC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5497+
EVT SrcVT = Src.getValueType();
5498+
if (SrcVT.isFixedLengthVector() && IndexC &&
5499+
IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
5500+
APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
5501+
IndexC->getZExtValue());
5502+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5503+
Depth + 1);
5504+
}
5505+
break;
5506+
}
5507+
5508+
case ISD::INSERT_VECTOR_ELT: {
5509+
SDValue InVec = Op.getOperand(0);
5510+
SDValue InVal = Op.getOperand(1);
5511+
SDValue EltNo = Op.getOperand(2);
5512+
EVT VT = InVec.getValueType();
5513+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5514+
if (IndexC && VT.isFixedLengthVector() &&
5515+
IndexC->getAPIntValue().ult(VT.getVectorNumElements())) {
5516+
if (DemandedElts[IndexC->getZExtValue()] &&
5517+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5518+
return false;
5519+
APInt InVecDemandedElts = DemandedElts;
5520+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5521+
if (!!InVecDemandedElts &&
5522+
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5523+
PoisonOnly, Depth + 1))
5524+
return false;
5525+
return true;
5526+
}
5527+
break;
5528+
}
5529+
5530+
case ISD::SCALAR_TO_VECTOR:
5531+
// Check upper (known undef) elements.
5532+
if (DemandedElts.ugt(1) && !PoisonOnly)
5533+
return false;
5534+
// Check element zero.
5535+
if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison(
5536+
Op.getOperand(0), PoisonOnly, Depth + 1))
5537+
return false;
5538+
return true;
5539+
54635540
case ISD::SPLAT_VECTOR:
54645541
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54655542
Depth + 1);
@@ -5482,6 +5559,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54825559
return true;
54835560
}
54845561

5562+
case ISD::SHL:
5563+
case ISD::SRL:
5564+
case ISD::SRA:
5565+
// Shift amount operand is checked by canCreateUndefOrPoison. So it is
5566+
// enough to check operand 0 if Op can't create undef/poison.
5567+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5568+
/*ConsiderFlags*/ true, Depth) &&
5569+
isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
5570+
PoisonOnly, Depth + 1);
5571+
5572+
case ISD::BSWAP:
5573+
case ISD::CTPOP:
5574+
case ISD::BITREVERSE:
5575+
case ISD::AND:
5576+
case ISD::OR:
5577+
case ISD::XOR:
5578+
case ISD::ADD:
5579+
case ISD::SUB:
5580+
case ISD::MUL:
5581+
case ISD::SADDSAT:
5582+
case ISD::UADDSAT:
5583+
case ISD::SSUBSAT:
5584+
case ISD::USUBSAT:
5585+
case ISD::SSHLSAT:
5586+
case ISD::USHLSAT:
5587+
case ISD::SMIN:
5588+
case ISD::SMAX:
5589+
case ISD::UMIN:
5590+
case ISD::UMAX:
5591+
case ISD::ZERO_EXTEND:
5592+
case ISD::SIGN_EXTEND:
5593+
case ISD::ANY_EXTEND:
5594+
case ISD::TRUNCATE:
5595+
case ISD::VSELECT: {
5596+
// If Op can't create undef/poison and none of its operands are undef/poison
5597+
// then Op is never undef/poison. A difference from the more common check
5598+
// below, outside the switch, is that we handle elementwise operations for
5599+
// which the DemandedElts mask is valid for all operands here.
5600+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5601+
/*ConsiderFlags*/ true, Depth) &&
5602+
all_of(Op->ops(), [&](SDValue V) {
5603+
return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts,
5604+
PoisonOnly, Depth + 1);
5605+
});
5606+
}
5607+
54855608
// TODO: Search for noundef attributes from library functions.
54865609

54875610
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.

llvm/test/CodeGen/X86/pr62286.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ define i64 @PR62286(i32 %a) {
2828
; AVX1-NEXT: vmovd %edi, %xmm0
2929
; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
3030
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
31+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
3133
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
32-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3334
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
3435
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3536
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
@@ -42,10 +43,10 @@ define i64 @PR62286(i32 %a) {
4243
; AVX2-LABEL: PR62286:
4344
; AVX2: # %bb.0:
4445
; AVX2-NEXT: vmovd %edi, %xmm0
45-
; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
46-
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
47-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
48-
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
46+
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1
47+
; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
48+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
49+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
4950
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
5051
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5152
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
@@ -58,12 +59,13 @@ define i64 @PR62286(i32 %a) {
5859
; AVX512-LABEL: PR62286:
5960
; AVX512: # %bb.0:
6061
; AVX512-NEXT: vmovd %edi, %xmm0
61-
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
62-
; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
63-
; AVX512-NEXT: movw $4369, %ax # imm = 0x1111
62+
; AVX512-NEXT: movb $8, %al
6463
; AVX512-NEXT: kmovd %eax, %k1
65-
; AVX512-NEXT: vpaddd %zmm0, %zmm0, %zmm1 {%k1}
66-
; AVX512-NEXT: vpmovsxdq %ymm1, %zmm0
64+
; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z}
65+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
66+
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
67+
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
68+
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
6769
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
6870
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
6971
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1

0 commit comments

Comments
 (0)