Skip to content

Commit fe73a97

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 2)
Add support in isGuaranteedNotToBeUndefOrPoison to avoid regressions seen after a previous commit fixing #141034.
1 parent 0f35a70 commit fe73a97

File tree

8 files changed

+191
-264
lines changed

8 files changed

+191
-264
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,6 +1889,12 @@ LLVM_ABI SDValue peekThroughExtractSubvectors(SDValue V);
18891889
/// If \p V is not a truncation, it is returned as-is.
18901890
LLVM_ABI SDValue peekThroughTruncates(SDValue V);
18911891

1892+
/// Recursively peek through INSERT_VECTOR_ELT nodes, returning the source
1893+
/// vector operand of \p V, as long as \p V is an INSERT_VECTOR_ELT operation
1894+
/// that do not insert into any of the demanded vector elts.
1895+
LLVM_ABI SDValue peekThroughInsertVectorElt(SDValue V,
1896+
const APInt &DemandedElts);
1897+
18921898
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
18931899
/// constant is canonicalized to be operand 1.
18941900
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs = false);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5456,6 +5456,60 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54565456
}
54575457
return true;
54585458

5459+
case ISD::INSERT_SUBVECTOR: {
5460+
if (Op.getValueType().isScalableVector())
5461+
break;
5462+
SDValue Src = Op.getOperand(0);
5463+
SDValue Sub = Op.getOperand(1);
5464+
uint64_t Idx = Op.getConstantOperandVal(2);
5465+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5466+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5467+
APInt DemandedSrcElts = DemandedElts;
5468+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5469+
5470+
if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
5471+
Sub, DemandedSubElts, PoisonOnly, Depth + 1))
5472+
return false;
5473+
if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
5474+
Src, DemandedSrcElts, PoisonOnly, Depth + 1))
5475+
return false;
5476+
return true;
5477+
}
5478+
5479+
case ISD::INSERT_VECTOR_ELT: {
5480+
SDValue InVec = Op.getOperand(0);
5481+
SDValue InVal = Op.getOperand(1);
5482+
SDValue EltNo = Op.getOperand(2);
5483+
EVT VT = InVec.getValueType();
5484+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5485+
if (IndexC && VT.isFixedLengthVector() &&
5486+
IndexC->getZExtValue() < VT.getVectorNumElements()) {
5487+
if (DemandedElts[IndexC->getZExtValue()] &&
5488+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5489+
return false;
5490+
APInt InVecDemandedElts = DemandedElts;
5491+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5492+
if (!!InVecDemandedElts &&
5493+
!isGuaranteedNotToBeUndefOrPoison(
5494+
peekThroughInsertVectorElt(InVec, InVecDemandedElts),
5495+
InVecDemandedElts, PoisonOnly, Depth + 1))
5496+
return false;
5497+
return true;
5498+
}
5499+
break;
5500+
}
5501+
5502+
case ISD::SCALAR_TO_VECTOR:
5503+
// Check upper (known undef) elements.
5504+
if (DemandedElts.ugt(1) && !PoisonOnly)
5505+
return false;
5506+
// Check element zero.
5507+
if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0),
5508+
PoisonOnly,
5509+
Depth + 1))
5510+
return false;
5511+
return true;
5512+
54595513
case ISD::SPLAT_VECTOR:
54605514
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54615515
Depth + 1);
@@ -12508,6 +12562,23 @@ SDValue llvm::peekThroughTruncates(SDValue V) {
1250812562
return V;
1250912563
}
1251012564

12565+
SDValue llvm::peekThroughInsertVectorElt(SDValue V, const APInt &DemandedElts) {
12566+
while (V.getOpcode() == ISD::INSERT_VECTOR_ELT) {
12567+
SDValue InVec = V.getOperand(0);
12568+
SDValue EltNo = V.getOperand(2);
12569+
EVT VT = InVec.getValueType();
12570+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
12571+
if (IndexC && VT.isFixedLengthVector() &&
12572+
IndexC->getZExtValue() < VT.getVectorNumElements() &&
12573+
!DemandedElts[IndexC->getZExtValue()]) {
12574+
V = InVec;
12575+
continue;
12576+
}
12577+
break;
12578+
}
12579+
return V;
12580+
}
12581+
1251112582
bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
1251212583
if (V.getOpcode() != ISD::XOR)
1251312584
return false;

llvm/test/CodeGen/Thumb2/mve-vld3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -663,8 +663,8 @@ define void @vld3_v2i8(ptr %src, ptr %dst) {
663663
; CHECK: @ %bb.0: @ %entry
664664
; CHECK-NEXT: .pad #8
665665
; CHECK-NEXT: sub sp, #8
666-
; CHECK-NEXT: ldrd r2, r0, [r0]
667-
; CHECK-NEXT: strd r2, r0, [sp]
666+
; CHECK-NEXT: ldrd r0, r2, [r0]
667+
; CHECK-NEXT: strd r0, r2, [sp]
668668
; CHECK-NEXT: mov r0, sp
669669
; CHECK-NEXT: vldrb.u16 q0, [r0]
670670
; CHECK-NEXT: vmov.u16 r0, q0[4]

llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll

Lines changed: 22 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -262,54 +262,37 @@ define <4 x float> @merge_4f32_f32_45zz(ptr %ptr) nounwind uwtable noinline ssp
262262
define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp {
263263
; SSE2-LABEL: merge_4f32_f32_012u:
264264
; SSE2: # %bb.0:
265-
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
266265
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
267-
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
268-
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
269-
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
270-
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
266+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
267+
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271268
; SSE2-NEXT: retq
272269
;
273270
; SSE41-LABEL: merge_4f32_f32_012u:
274271
; SSE41: # %bb.0:
275-
; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
276-
; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
277-
; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
278-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
279-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
280-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
272+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
273+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
281274
; SSE41-NEXT: retq
282275
;
283276
; AVX-LABEL: merge_4f32_f32_012u:
284277
; AVX: # %bb.0:
285-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
286-
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
287-
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
288-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
289-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
290-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
278+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
279+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
291280
; AVX-NEXT: retq
292281
;
293282
; X86-SSE1-LABEL: merge_4f32_f32_012u:
294283
; X86-SSE1: # %bb.0:
295284
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
296-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
285+
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
286+
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
297287
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
298-
; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
299-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
300-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
301-
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
288+
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302289
; X86-SSE1-NEXT: retl
303290
;
304291
; X86-SSE41-LABEL: merge_4f32_f32_012u:
305292
; X86-SSE41: # %bb.0:
306293
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
307-
; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
308-
; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
309-
; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
310-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
311-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
312-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
294+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
295+
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
313296
; X86-SSE41-NEXT: retl
314297
%ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
315298
%ptr2 = getelementptr inbounds float, ptr %ptr, i64 2
@@ -326,54 +309,37 @@ define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp
326309
define <4 x float> @merge_4f32_f32_019u(ptr %ptr) nounwind uwtable noinline ssp {
327310
; SSE2-LABEL: merge_4f32_f32_019u:
328311
; SSE2: # %bb.0:
329-
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330312
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
331-
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
332-
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
333-
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334-
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
313+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
314+
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
335315
; SSE2-NEXT: retq
336316
;
337317
; SSE41-LABEL: merge_4f32_f32_019u:
338318
; SSE41: # %bb.0:
339-
; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
340-
; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
341-
; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
342-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
343-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
344-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
319+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
320+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
345321
; SSE41-NEXT: retq
346322
;
347323
; AVX-LABEL: merge_4f32_f32_019u:
348324
; AVX: # %bb.0:
349-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
350-
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
351-
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
352-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
353-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
354-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
325+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
326+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
355327
; AVX-NEXT: retq
356328
;
357329
; X86-SSE1-LABEL: merge_4f32_f32_019u:
358330
; X86-SSE1: # %bb.0:
359331
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
360-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
332+
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
333+
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
361334
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362-
; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
363-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
364-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
365-
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
335+
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366336
; X86-SSE1-NEXT: retl
367337
;
368338
; X86-SSE41-LABEL: merge_4f32_f32_019u:
369339
; X86-SSE41: # %bb.0:
370340
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
371-
; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
372-
; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
373-
; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
374-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
375-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
376-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
341+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
342+
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
377343
; X86-SSE41-NEXT: retl
378344
%ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
379345
%ptr2 = getelementptr inbounds float, ptr %ptr, i64 9

0 commit comments

Comments
 (0)