Skip to content

Commit d9b64b1

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR
As reported in #141034 SelectionDAG::getNode had some unexpected behaviors when trying to create vectors with UNDEF elements. Since we treat both UNDEF and POISON as undefined (when using isUndef()) we can't just fold away INSERT_VECTOR_ELT/INSERT_SUBVECTOR based on isUndef(), as that could make the resulting vector more poisonous. Same kind of bug existed in DAGCombiner::visitINSERT_SUBVECTOR. Here are some examples: This fold was done even if vec[idx] was POISON: INSERT_VECTOR_ELT vec, UNDEF, idx -> vec This fold was done even if any of vec[idx..idx+size] was POISON: INSERT_SUBVECTOR vec, UNDEF, idx -> vec This fold was done even if the elements not extracted from vec could be POISON: sub = EXTRACT_SUBVECTOR vec, idx INSERT_SUBVECTOR UNDEF, sub, idx -> vec With this patch we avoid such folds unless we can prove that the result isn't more poisonous when eliminating the insert. Fixes #141034
1 parent eef79c8 commit d9b64b1

11 files changed

+264
-110
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,12 @@ LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V);
18851885
/// If \p V is not an extracted subvector, it is returned as-is.
18861886
LLVM_ABI SDValue peekThroughExtractSubvectors(SDValue V);
18871887

1888+
/// Recursively peek through INSERT_VECTOR_ELT nodes, returning the source
1889+
/// vector operand of \p V, as long as \p V is an INSERT_VECTOR_ELT operation
1890+
/// that do not insert into any of the demanded vector elts.
1891+
LLVM_ABI SDValue peekThroughInsertVectorElt(SDValue V,
1892+
const APInt &DemandedElts);
1893+
18881894
/// Return the non-truncated source operand of \p V if it exists.
18891895
/// If \p V is not a truncation, it is returned as-is.
18901896
LLVM_ABI SDValue peekThroughTruncates(SDValue V);

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16832,6 +16832,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1683216832
bool AllowMultipleMaybePoisonOperands =
1683316833
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
1683416834
N0.getOpcode() == ISD::BUILD_VECTOR ||
16835+
N0.getOpcode() == ISD::INSERT_SUBVECTOR ||
1683516836
N0.getOpcode() == ISD::BUILD_PAIR ||
1683616837
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
1683716838
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
@@ -23303,6 +23304,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2330323304
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
2330423305
return InVec;
2330523306

23307+
// Remove insert of UNDEF/POISON elements.
23308+
if (InVal.isUndef()) {
23309+
if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23310+
return InVec;
23311+
return DAG.getFreeze(InVec);
23312+
}
23313+
2330623314
if (!IndexC) {
2330723315
// If this is variable insert to undef vector, it might be better to splat:
2330823316
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
@@ -27792,18 +27800,34 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2779227800
SDValue N2 = N->getOperand(2);
2779327801
uint64_t InsIdx = N->getConstantOperandVal(2);
2779427802

27795-
// If inserting an UNDEF, just return the original vector.
27796-
if (N1.isUndef())
27797-
return N0;
27803+
// Remove insert of UNDEF/POISON.
27804+
if (N1.isUndef()) {
27805+
if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
27806+
return N0;
27807+
return DAG.getFreeze(N0);
27808+
}
2779827809

27799-
// If this is an insert of an extracted vector into an undef vector, we can
27800-
// just use the input to the extract if the types match, and can simplify
27810+
// If this is an insert of an extracted vector into an undef/poison vector, we
27811+
// can just use the input to the extract if the types match, and can simplify
2780127812
// in some cases even if they don't.
2780227813
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
2780327814
N1.getOperand(1) == N2) {
27815+
EVT N1VT = N1.getValueType();
2780427816
EVT SrcVT = N1.getOperand(0).getValueType();
27805-
if (SrcVT == VT)
27806-
return N1.getOperand(0);
27817+
if (SrcVT == VT) {
27818+
// Need to ensure that result isn't more poisonous if skipping both the
27819+
// extract+insert.
27820+
if (N0.getOpcode() == ISD::POISON)
27821+
return N1.getOperand(0);
27822+
if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27823+
unsigned SubVecNumElts = N1VT.getVectorNumElements();
27824+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27825+
InsIdx + SubVecNumElts);
27826+
if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27827+
return N1.getOperand(0);
27828+
} else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27829+
return N1.getOperand(0);
27830+
}
2780727831
// TODO: To remove the zero check, need to adjust the offset to
2780827832
// a multiple of the new src type.
2780927833
if (isNullConstant(N2)) {

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 75 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5525,8 +5525,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
55255525
APInt InVecDemandedElts = DemandedElts;
55265526
InVecDemandedElts.clearBit(IndexC->getZExtValue());
55275527
if (!!InVecDemandedElts &&
5528-
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5529-
PoisonOnly, Depth + 1))
5528+
!isGuaranteedNotToBeUndefOrPoison(
5529+
peekThroughInsertVectorElt(InVec, InVecDemandedElts),
5530+
InVecDemandedElts, PoisonOnly, Depth + 1))
55305531
return false;
55315532
return true;
55325533
}
@@ -8243,23 +8244,42 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
82438244
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
82448245
// for scalable vectors where we will generate appropriate code to
82458246
// deal with out-of-bounds cases correctly.
8246-
if (N3C && N1.getValueType().isFixedLengthVector() &&
8247-
N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
8247+
if (N3C && VT.isFixedLengthVector() &&
8248+
N3C->getZExtValue() >= VT.getVectorNumElements())
82488249
return getUNDEF(VT);
82498250

82508251
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
82518252
if (N3.isUndef())
82528253
return getUNDEF(VT);
82538254

8254-
// If the inserted element is an UNDEF, just use the input vector.
8255-
if (N2.isUndef())
8255+
// If inserting poison, just use the input vector.
8256+
if (N2.getOpcode() == ISD::POISON)
82568257
return N1;
82578258

8259+
// Inserting undef into undef/poison is still undef.
8260+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
8261+
return getUNDEF(VT);
8262+
8263+
// If the inserted element is an UNDEF, just use the input vector.
8264+
// But not if skipping the insert could make the result more poisonous.
8265+
if (N2.isUndef()) {
8266+
if (N3C && VT.isFixedLengthVector()) {
8267+
APInt EltMask =
8268+
APInt::getOneBitSet(VT.getVectorNumElements(), N3C->getZExtValue());
8269+
if (isGuaranteedNotToBePoison(N1, EltMask))
8270+
return N1;
8271+
} else if (isGuaranteedNotToBePoison(N1))
8272+
return N1;
8273+
}
82588274
break;
82598275
}
82608276
case ISD::INSERT_SUBVECTOR: {
8261-
// Inserting undef into undef is still undef.
8262-
if (N1.isUndef() && N2.isUndef())
8277+
// If inserting poison, just use the input vector,
8278+
if (N2.getOpcode() == ISD::POISON)
8279+
return N1;
8280+
8281+
// Inserting undef into undef/poison is still undef.
8282+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
82638283
return getUNDEF(VT);
82648284

82658285
EVT N2VT = N2.getValueType();
@@ -8288,11 +8308,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
82888308
if (VT == N2VT)
82898309
return N2;
82908310

8291-
// If this is an insert of an extracted vector into an undef vector, we
8292-
// can just use the input to the extract.
8311+
// If this is an insert of an extracted vector into an undef/poison vector,
8312+
// we can just use the input to the extract. But not if skipping the
8313+
// extract+insert could make the result more poisonous.
82938314
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
8294-
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
8295-
return N2.getOperand(0);
8315+
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) {
8316+
if (N1.getOpcode() == ISD::POISON)
8317+
return N2.getOperand(0);
8318+
if (VT.isFixedLengthVector() && N2VT.isFixedLengthVector()) {
8319+
unsigned LoBit = N3->getAsZExtVal();
8320+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8321+
APInt EltMask =
8322+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8323+
if (isGuaranteedNotToBePoison(N2.getOperand(0), ~EltMask))
8324+
return N2.getOperand(0);
8325+
} else if (isGuaranteedNotToBePoison(N2.getOperand(0)))
8326+
return N2.getOperand(0);
8327+
}
8328+
8329+
// If the inserted subvector is UNDEF, just use the input vector.
8330+
// But not if skipping the insert could make the result more poisonous.
8331+
if (N2.isUndef()) {
8332+
if (VT.isFixedLengthVector()) {
8333+
unsigned LoBit = N3->getAsZExtVal();
8334+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8335+
APInt EltMask =
8336+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8337+
if (isGuaranteedNotToBePoison(N1, EltMask))
8338+
return N1;
8339+
} else if (isGuaranteedNotToBePoison(N1))
8340+
return N1;
8341+
}
82968342
break;
82978343
}
82988344
case ISD::BITCAST:
@@ -12770,6 +12816,23 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
1277012816
return V;
1277112817
}
1277212818

12819+
SDValue llvm::peekThroughInsertVectorElt(SDValue V, const APInt &DemandedElts) {
12820+
while (V.getOpcode() == ISD::INSERT_VECTOR_ELT) {
12821+
SDValue InVec = V.getOperand(0);
12822+
SDValue EltNo = V.getOperand(2);
12823+
EVT VT = InVec.getValueType();
12824+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
12825+
if (IndexC && VT.isFixedLengthVector() &&
12826+
IndexC->getAPIntValue().ult(VT.getVectorNumElements()) &&
12827+
!DemandedElts[IndexC->getZExtValue()]) {
12828+
V = InVec;
12829+
continue;
12830+
}
12831+
break;
12832+
}
12833+
return V;
12834+
}
12835+
1277312836
SDValue llvm::peekThroughTruncates(SDValue V) {
1277412837
while (V.getOpcode() == ISD::TRUNCATE)
1277512838
V = V.getOperand(0);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3439,8 +3439,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34393439
break;
34403440
}
34413441
case ISD::INSERT_SUBVECTOR: {
3442-
// Demand any elements from the subvector and the remainder from the src its
3443-
// inserted into.
3442+
// Demand any elements from the subvector and the remainder from the src it
3443+
// is inserted into.
34443444
SDValue Src = Op.getOperand(0);
34453445
SDValue Sub = Op.getOperand(1);
34463446
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -3449,6 +3449,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34493449
APInt DemandedSrcElts = DemandedElts;
34503450
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
34513451

3452+
// If none of the sub operand elements are demanded, bypass the insert.
3453+
if (!DemandedSubElts)
3454+
return TLO.CombineTo(Op, Src);
3455+
34523456
APInt SubUndef, SubZero;
34533457
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
34543458
Depth + 1))

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15402,7 +15402,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1540215402
for (unsigned i = 0; i < NumElts; ++i) {
1540315403
SDValue V = Op.getOperand(i);
1540415404
SDValue LaneIdx = DAG.getConstant(i, DL, MVT::i64);
15405-
if (!isIntOrFPConstant(V))
15405+
if (!isIntOrFPConstant(V) && !V.isUndef())
1540615406
// Note that type legalization likely mucked about with the VT of the
1540715407
// source operand, so we may have to convert it here before inserting.
1540815408
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Val, V, LaneIdx);

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
9696
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
97+
; CHECK-NEXT: dup v0.4s, w8
9898
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
10099
; CHECK-NEXT: mov v1.s[2], w2
101100
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
101+
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
102+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
103+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
104+
; CHECK-NEXT: uaddlv s0, v0.8h
107105
; CHECK-NEXT: fmov w0, s0
108106
; CHECK-NEXT: ret
109107
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1198,11 +1198,15 @@ define void @masked_gather_passthru(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #
11981198
; CHECK-NEXT: ptrue p0.s, vl32
11991199
; CHECK-NEXT: ptrue p2.d, vl32
12001200
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1201-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
12021201
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
12031202
; CHECK-NEXT: ld1d { z0.d }, p2/z, [x1]
12041203
; CHECK-NEXT: punpklo p2.h, p1.b
1204+
; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
1205+
; CHECK-NEXT: ptrue p1.s
12051206
; CHECK-NEXT: ld1w { z0.d }, p2/z, [z0.d]
1207+
; CHECK-NEXT: and z1.s, z1.s, #0x1
1208+
; CHECK-NEXT: cmpne p1.s, p1/z, z1.s, #0
1209+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
12061210
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
12071211
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
12081212
; CHECK-NEXT: st1w { z0.s }, p0, [x0]

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,42 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
3-
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
2+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING1
3+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING2
44
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH
55
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN
66
; Check that the default value enables the web folding and
77
; that it is bigger than 3.
88
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
99

1010
define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) {
11-
; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users:
12-
; NO_FOLDING: # %bb.0:
13-
; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
14-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
15-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
16-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
17-
; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
18-
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
19-
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
20-
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
21-
; NO_FOLDING-NEXT: vse32.v v10, (a0)
22-
; NO_FOLDING-NEXT: vse32.v v11, (a1)
23-
; NO_FOLDING-NEXT: vse32.v v8, (a2)
24-
; NO_FOLDING-NEXT: ret
11+
; NO_FOLDING1-LABEL: vfwmul_v2f116_multiple_users:
12+
; NO_FOLDING1: # %bb.0:
13+
; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
14+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
15+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
16+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
17+
; NO_FOLDING1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
18+
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
19+
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
20+
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
21+
; NO_FOLDING1-NEXT: vse32.v v10, (a0)
22+
; NO_FOLDING1-NEXT: vse32.v v11, (a1)
23+
; NO_FOLDING1-NEXT: vse32.v v8, (a2)
24+
; NO_FOLDING1-NEXT: ret
25+
;
26+
; NO_FOLDING2-LABEL: vfwmul_v2f116_multiple_users:
27+
; NO_FOLDING2: # %bb.0:
28+
; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
29+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
30+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
31+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
32+
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
33+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
34+
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
35+
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
36+
; NO_FOLDING2-NEXT: vse32.v v9, (a0)
37+
; NO_FOLDING2-NEXT: vse32.v v11, (a1)
38+
; NO_FOLDING2-NEXT: vse32.v v8, (a2)
39+
; NO_FOLDING2-NEXT: ret
2540
;
2641
; ZVFH-LABEL: vfwmul_v2f116_multiple_users:
2742
; ZVFH: # %bb.0:
@@ -61,20 +76,35 @@ define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a,
6176
}
6277

6378
define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
64-
; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users:
65-
; NO_FOLDING: # %bb.0:
66-
; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
67-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
68-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
69-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
70-
; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma
71-
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
72-
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
73-
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
74-
; NO_FOLDING-NEXT: vse64.v v10, (a0)
75-
; NO_FOLDING-NEXT: vse64.v v11, (a1)
76-
; NO_FOLDING-NEXT: vse64.v v8, (a2)
77-
; NO_FOLDING-NEXT: ret
79+
; NO_FOLDING1-LABEL: vfwmul_v2f32_multiple_users:
80+
; NO_FOLDING1: # %bb.0:
81+
; NO_FOLDING1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
82+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
83+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
84+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
85+
; NO_FOLDING1-NEXT: vsetvli zero, zero, e64, m1, ta, ma
86+
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
87+
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
88+
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
89+
; NO_FOLDING1-NEXT: vse64.v v10, (a0)
90+
; NO_FOLDING1-NEXT: vse64.v v11, (a1)
91+
; NO_FOLDING1-NEXT: vse64.v v8, (a2)
92+
; NO_FOLDING1-NEXT: ret
93+
;
94+
; NO_FOLDING2-LABEL: vfwmul_v2f32_multiple_users:
95+
; NO_FOLDING2: # %bb.0:
96+
; NO_FOLDING2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
97+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
98+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
99+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e64, m1, ta, ma
100+
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
101+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
102+
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
103+
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
104+
; NO_FOLDING2-NEXT: vse64.v v9, (a0)
105+
; NO_FOLDING2-NEXT: vse64.v v11, (a1)
106+
; NO_FOLDING2-NEXT: vse64.v v8, (a2)
107+
; NO_FOLDING2-NEXT: ret
78108
;
79109
; FOLDING-LABEL: vfwmul_v2f32_multiple_users:
80110
; FOLDING: # %bb.0:

0 commit comments

Comments
 (0)