Skip to content

Commit 1c4c7bd

Browse files
authored
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (#143102)
As reported in #141034 SelectionDAG::getNode had some unexpected behaviors when trying to create vectors with UNDEF elements. Since we treat both UNDEF and POISON as undefined (when using isUndef()) we can't just fold away INSERT_VECTOR_ELT/INSERT_SUBVECTOR based on isUndef(), as that could make the resulting vector more poisonous. Same kind of bug existed in DAGCombiner::visitINSERT_SUBVECTOR. Here are some examples: This fold was done even if vec[idx] was POISON: INSERT_VECTOR_ELT vec, UNDEF, idx -> vec This fold was done even if any of vec[idx..idx+size] was POISON: INSERT_SUBVECTOR vec, UNDEF, idx -> vec This fold was done even if the elements not extracted from vec could be POISON: sub = EXTRACT_SUBVECTOR vec, idx INSERT_SUBVECTOR UNDEF, sub, idx -> vec With this patch we avoid such folds unless we can prove that the result isn't more poisonous when eliminating the insert. Fixes #141034
1 parent 96f2ab2 commit 1c4c7bd

11 files changed

+262
-108
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,12 @@ LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V);
18851885
/// If \p V is not an extracted subvector, it is returned as-is.
18861886
LLVM_ABI SDValue peekThroughExtractSubvectors(SDValue V);
18871887

1888+
/// Recursively peek through INSERT_VECTOR_ELT nodes, returning the source
1889+
/// vector operand of \p V, as long as \p V is an INSERT_VECTOR_ELT operation
1890+
/// that do not insert into any of the demanded vector elts.
1891+
LLVM_ABI SDValue peekThroughInsertVectorElt(SDValue V,
1892+
const APInt &DemandedElts);
1893+
18881894
/// Return the non-truncated source operand of \p V if it exists.
18891895
/// If \p V is not a truncation, it is returned as-is.
18901896
LLVM_ABI SDValue peekThroughTruncates(SDValue V);

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16892,6 +16892,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1689216892
bool AllowMultipleMaybePoisonOperands =
1689316893
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
1689416894
N0.getOpcode() == ISD::BUILD_VECTOR ||
16895+
N0.getOpcode() == ISD::INSERT_SUBVECTOR ||
1689516896
N0.getOpcode() == ISD::BUILD_PAIR ||
1689616897
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
1689716898
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
@@ -23363,6 +23364,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2336323364
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
2336423365
return InVec;
2336523366

23367+
// Remove insert of UNDEF/POISON elements.
23368+
if (InVal.isUndef()) {
23369+
if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23370+
return InVec;
23371+
return DAG.getFreeze(InVec);
23372+
}
23373+
2336623374
if (!IndexC) {
2336723375
// If this is variable insert to undef vector, it might be better to splat:
2336823376
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
@@ -27852,18 +27860,34 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2785227860
SDValue N2 = N->getOperand(2);
2785327861
uint64_t InsIdx = N->getConstantOperandVal(2);
2785427862

27855-
// If inserting an UNDEF, just return the original vector.
27856-
if (N1.isUndef())
27857-
return N0;
27863+
// Remove insert of UNDEF/POISON.
27864+
if (N1.isUndef()) {
27865+
if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
27866+
return N0;
27867+
return DAG.getFreeze(N0);
27868+
}
2785827869

27859-
// If this is an insert of an extracted vector into an undef vector, we can
27860-
// just use the input to the extract if the types match, and can simplify
27870+
// If this is an insert of an extracted vector into an undef/poison vector, we
27871+
// can just use the input to the extract if the types match, and can simplify
2786127872
// in some cases even if they don't.
2786227873
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
2786327874
N1.getOperand(1) == N2) {
27875+
EVT N1VT = N1.getValueType();
2786427876
EVT SrcVT = N1.getOperand(0).getValueType();
27865-
if (SrcVT == VT)
27866-
return N1.getOperand(0);
27877+
if (SrcVT == VT) {
27878+
// Need to ensure that result isn't more poisonous if skipping both the
27879+
// extract+insert.
27880+
if (N0.getOpcode() == ISD::POISON)
27881+
return N1.getOperand(0);
27882+
if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27883+
unsigned SubVecNumElts = N1VT.getVectorNumElements();
27884+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27885+
InsIdx + SubVecNumElts);
27886+
if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27887+
return N1.getOperand(0);
27888+
} else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27889+
return N1.getOperand(0);
27890+
}
2786727891
// TODO: To remove the zero check, need to adjust the offset to
2786827892
// a multiple of the new src type.
2786927893
if (isNullConstant(N2)) {

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5525,8 +5525,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
55255525
APInt InVecDemandedElts = DemandedElts;
55265526
InVecDemandedElts.clearBit(IndexC->getZExtValue());
55275527
if (!!InVecDemandedElts &&
5528-
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5529-
PoisonOnly, Depth + 1))
5528+
!isGuaranteedNotToBeUndefOrPoison(
5529+
peekThroughInsertVectorElt(InVec, InVecDemandedElts),
5530+
InVecDemandedElts, PoisonOnly, Depth + 1))
55305531
return false;
55315532
return true;
55325533
}
@@ -8255,15 +8256,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
82558256
if (N3.isUndef())
82568257
return getUNDEF(VT);
82578258

8258-
// If the inserted element is an UNDEF, just use the input vector.
8259-
if (N2.isUndef())
8259+
// If inserting poison, just use the input vector.
8260+
if (N2.getOpcode() == ISD::POISON)
82608261
return N1;
82618262

8263+
// Inserting undef into undef/poison is still undef.
8264+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
8265+
return getUNDEF(VT);
8266+
8267+
// If the inserted element is an UNDEF, just use the input vector.
8268+
// But not if skipping the insert could make the result more poisonous.
8269+
if (N2.isUndef()) {
8270+
if (N3C && VT.isFixedLengthVector()) {
8271+
APInt EltMask =
8272+
APInt::getOneBitSet(VT.getVectorNumElements(), N3C->getZExtValue());
8273+
if (isGuaranteedNotToBePoison(N1, EltMask))
8274+
return N1;
8275+
} else if (isGuaranteedNotToBePoison(N1))
8276+
return N1;
8277+
}
82628278
break;
82638279
}
82648280
case ISD::INSERT_SUBVECTOR: {
8265-
// Inserting undef into undef is still undef.
8266-
if (N1.isUndef() && N2.isUndef())
8281+
// If inserting poison, just use the input vector,
8282+
if (N2.getOpcode() == ISD::POISON)
8283+
return N1;
8284+
8285+
// Inserting undef into undef/poison is still undef.
8286+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
82678287
return getUNDEF(VT);
82688288

82698289
EVT N2VT = N2.getValueType();
@@ -8292,11 +8312,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
82928312
if (VT == N2VT)
82938313
return N2;
82948314

8295-
// If this is an insert of an extracted vector into an undef vector, we
8296-
// can just use the input to the extract.
8315+
// If this is an insert of an extracted vector into an undef/poison vector,
8316+
// we can just use the input to the extract. But not if skipping the
8317+
// extract+insert could make the result more poisonous.
82978318
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
8298-
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
8299-
return N2.getOperand(0);
8319+
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) {
8320+
if (N1.getOpcode() == ISD::POISON)
8321+
return N2.getOperand(0);
8322+
if (VT.isFixedLengthVector() && N2VT.isFixedLengthVector()) {
8323+
unsigned LoBit = N3->getAsZExtVal();
8324+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8325+
APInt EltMask =
8326+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8327+
if (isGuaranteedNotToBePoison(N2.getOperand(0), ~EltMask))
8328+
return N2.getOperand(0);
8329+
} else if (isGuaranteedNotToBePoison(N2.getOperand(0)))
8330+
return N2.getOperand(0);
8331+
}
8332+
8333+
// If the inserted subvector is UNDEF, just use the input vector.
8334+
// But not if skipping the insert could make the result more poisonous.
8335+
if (N2.isUndef()) {
8336+
if (VT.isFixedLengthVector()) {
8337+
unsigned LoBit = N3->getAsZExtVal();
8338+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8339+
APInt EltMask =
8340+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8341+
if (isGuaranteedNotToBePoison(N1, EltMask))
8342+
return N1;
8343+
} else if (isGuaranteedNotToBePoison(N1))
8344+
return N1;
8345+
}
83008346
break;
83018347
}
83028348
case ISD::BITCAST:
@@ -12778,6 +12824,23 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
1277812824
return V;
1277912825
}
1278012826

12827+
SDValue llvm::peekThroughInsertVectorElt(SDValue V, const APInt &DemandedElts) {
12828+
while (V.getOpcode() == ISD::INSERT_VECTOR_ELT) {
12829+
SDValue InVec = V.getOperand(0);
12830+
SDValue EltNo = V.getOperand(2);
12831+
EVT VT = InVec.getValueType();
12832+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
12833+
if (IndexC && VT.isFixedLengthVector() &&
12834+
IndexC->getAPIntValue().ult(VT.getVectorNumElements()) &&
12835+
!DemandedElts[IndexC->getZExtValue()]) {
12836+
V = InVec;
12837+
continue;
12838+
}
12839+
break;
12840+
}
12841+
return V;
12842+
}
12843+
1278112844
SDValue llvm::peekThroughTruncates(SDValue V) {
1278212845
while (V.getOpcode() == ISD::TRUNCATE)
1278312846
V = V.getOperand(0);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3418,8 +3418,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34183418
break;
34193419
}
34203420
case ISD::INSERT_SUBVECTOR: {
3421-
// Demand any elements from the subvector and the remainder from the src its
3422-
// inserted into.
3421+
// Demand any elements from the subvector and the remainder from the src it
3422+
// is inserted into.
34233423
SDValue Src = Op.getOperand(0);
34243424
SDValue Sub = Op.getOperand(1);
34253425
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -3428,6 +3428,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34283428
APInt DemandedSrcElts = DemandedElts;
34293429
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
34303430

3431+
// If none of the sub operand elements are demanded, bypass the insert.
3432+
if (!DemandedSubElts)
3433+
return TLO.CombineTo(Op, Src);
3434+
34313435
APInt SubUndef, SubZero;
34323436
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
34333437
Depth + 1))

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15676,7 +15676,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1567615676
for (unsigned i = 0; i < NumElts; ++i) {
1567715677
SDValue V = Op.getOperand(i);
1567815678
SDValue LaneIdx = DAG.getConstant(i, DL, MVT::i64);
15679-
if (!isIntOrFPConstant(V))
15679+
if (!isIntOrFPConstant(V) && !V.isUndef())
1568015680
// Note that type legalization likely mucked about with the VT of the
1568115681
// source operand, so we may have to convert it here before inserting.
1568215682
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Val, V, LaneIdx);

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
9696
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
97+
; CHECK-NEXT: dup v0.4s, w8
9898
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
10099
; CHECK-NEXT: mov v1.s[2], w2
101100
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
101+
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
102+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
103+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
104+
; CHECK-NEXT: uaddlv s0, v0.8h
107105
; CHECK-NEXT: fmov w0, s0
108106
; CHECK-NEXT: ret
109107
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1198,11 +1198,15 @@ define void @masked_gather_passthru(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #
11981198
; CHECK-NEXT: ptrue p0.s, vl32
11991199
; CHECK-NEXT: ptrue p2.d, vl32
12001200
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1201-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
12021201
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
12031202
; CHECK-NEXT: ld1d { z0.d }, p2/z, [x1]
12041203
; CHECK-NEXT: punpklo p2.h, p1.b
1204+
; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
1205+
; CHECK-NEXT: ptrue p1.s
12051206
; CHECK-NEXT: ld1w { z0.d }, p2/z, [z0.d]
1207+
; CHECK-NEXT: and z1.s, z1.s, #0x1
1208+
; CHECK-NEXT: cmpne p1.s, p1/z, z1.s, #0
1209+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
12061210
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
12071211
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
12081212
; CHECK-NEXT: st1w { z0.s }, p0, [x0]

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,42 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
3-
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
2+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING1
3+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING2
44
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH
55
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN
66
; Check that the default value enables the web folding and
77
; that it is bigger than 3.
88
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
99

1010
define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) {
11-
; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users:
12-
; NO_FOLDING: # %bb.0:
13-
; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
14-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
15-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
16-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
17-
; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
18-
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
19-
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
20-
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
21-
; NO_FOLDING-NEXT: vse32.v v10, (a0)
22-
; NO_FOLDING-NEXT: vse32.v v11, (a1)
23-
; NO_FOLDING-NEXT: vse32.v v8, (a2)
24-
; NO_FOLDING-NEXT: ret
11+
; NO_FOLDING1-LABEL: vfwmul_v2f116_multiple_users:
12+
; NO_FOLDING1: # %bb.0:
13+
; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
14+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
15+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
16+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
17+
; NO_FOLDING1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
18+
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
19+
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
20+
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
21+
; NO_FOLDING1-NEXT: vse32.v v10, (a0)
22+
; NO_FOLDING1-NEXT: vse32.v v11, (a1)
23+
; NO_FOLDING1-NEXT: vse32.v v8, (a2)
24+
; NO_FOLDING1-NEXT: ret
25+
;
26+
; NO_FOLDING2-LABEL: vfwmul_v2f116_multiple_users:
27+
; NO_FOLDING2: # %bb.0:
28+
; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
29+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
30+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
31+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
32+
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
33+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
34+
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
35+
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
36+
; NO_FOLDING2-NEXT: vse32.v v9, (a0)
37+
; NO_FOLDING2-NEXT: vse32.v v11, (a1)
38+
; NO_FOLDING2-NEXT: vse32.v v8, (a2)
39+
; NO_FOLDING2-NEXT: ret
2540
;
2641
; ZVFH-LABEL: vfwmul_v2f116_multiple_users:
2742
; ZVFH: # %bb.0:
@@ -61,20 +76,35 @@ define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a,
6176
}
6277

6378
define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
64-
; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users:
65-
; NO_FOLDING: # %bb.0:
66-
; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
67-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
68-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
69-
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
70-
; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma
71-
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
72-
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
73-
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
74-
; NO_FOLDING-NEXT: vse64.v v10, (a0)
75-
; NO_FOLDING-NEXT: vse64.v v11, (a1)
76-
; NO_FOLDING-NEXT: vse64.v v8, (a2)
77-
; NO_FOLDING-NEXT: ret
79+
; NO_FOLDING1-LABEL: vfwmul_v2f32_multiple_users:
80+
; NO_FOLDING1: # %bb.0:
81+
; NO_FOLDING1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
82+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
83+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
84+
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
85+
; NO_FOLDING1-NEXT: vsetvli zero, zero, e64, m1, ta, ma
86+
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
87+
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
88+
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
89+
; NO_FOLDING1-NEXT: vse64.v v10, (a0)
90+
; NO_FOLDING1-NEXT: vse64.v v11, (a1)
91+
; NO_FOLDING1-NEXT: vse64.v v8, (a2)
92+
; NO_FOLDING1-NEXT: ret
93+
;
94+
; NO_FOLDING2-LABEL: vfwmul_v2f32_multiple_users:
95+
; NO_FOLDING2: # %bb.0:
96+
; NO_FOLDING2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
97+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
98+
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
99+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e64, m1, ta, ma
100+
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
101+
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
102+
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
103+
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
104+
; NO_FOLDING2-NEXT: vse64.v v9, (a0)
105+
; NO_FOLDING2-NEXT: vse64.v v11, (a1)
106+
; NO_FOLDING2-NEXT: vse64.v v8, (a2)
107+
; NO_FOLDING2-NEXT: ret
78108
;
79109
; FOLDING-LABEL: vfwmul_v2f32_multiple_users:
80110
; FOLDING: # %bb.0:

0 commit comments

Comments
 (0)