Skip to content

Commit e3274a2

Browse files
committed
[VectorCombine] Support simplification to scalar store for multiple insertelt
Previously, we supported simplifying load-insertelt-store to getelementptr-store when only one insertelt exists. This patch supports multiple insertelements. Proof: https://alive2.llvm.org/ce/z/QTspTf
1 parent ea11ce1 commit e3274a2

File tree

2 files changed

+138
-100
lines changed

2 files changed

+138
-100
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 66 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class VectorCombine {
115115
bool scalarizeVPIntrinsic(Instruction &I);
116116
bool foldExtractedCmps(Instruction &I);
117117
bool foldBinopOfReductions(Instruction &I);
118-
bool foldSingleElementStore(Instruction &I);
118+
bool foldInsertElementsStore(Instruction &I);
119119
bool scalarizeLoadExtract(Instruction &I);
120120
bool foldConcatOfBoolMasks(Instruction &I);
121121
bool foldPermuteOfBinops(Instruction &I);
@@ -1493,58 +1493,88 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
14931493
// %0 = bitcast <4 x i32>* %a to i32*
14941494
// %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
14951495
// store i32 %b, i32* %1
1496-
bool VectorCombine::foldSingleElementStore(Instruction &I) {
1496+
bool VectorCombine::foldInsertElementsStore(Instruction &I) {
14971497
auto *SI = cast<StoreInst>(&I);
14981498
if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
14991499
return false;
15001500

1501-
// TODO: Combine more complicated patterns (multiple insert) by referencing
1502-
// TargetTransformInfo.
1503-
Instruction *Source;
1504-
Value *NewElement;
1505-
Value *Idx;
1506-
if (!match(SI->getValueOperand(),
1507-
m_InsertElt(m_Instruction(Source), m_Value(NewElement),
1508-
m_Value(Idx))))
1509-
return false;
1510-
1511-
if (auto *Load = dyn_cast<LoadInst>(Source)) {
1512-
auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
1513-
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
1514-
// Don't optimize for atomic/volatile load or store. Ensure memory is not
1515-
// modified between, vector type matches store size, and index is inbounds.
1516-
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
1517-
!DL->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
1518-
SrcAddr != SI->getPointerOperand()->stripPointerCasts())
1519-
return false;
1501+
Value *Source = SI->getValueOperand();
1502+
// Track back multiple inserts.
1503+
SmallVector<std::pair<Value *, Value *>, 4> InsertElements;
1504+
Value *Base = Source;
1505+
while (auto *Insert = dyn_cast<InsertElementInst>(Base)) {
1506+
if (!Insert->hasOneUse())
1507+
break;
1508+
Value *InsertVal = Insert->getOperand(1);
1509+
Value *Idx = Insert->getOperand(2);
1510+
InsertElements.push_back({InsertVal, Idx});
1511+
Base = Insert->getOperand(0);
1512+
}
15201513

1521-
auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
1522-
if (ScalarizableIdx.isUnsafe() ||
1523-
isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
1524-
MemoryLocation::get(SI), AA))
1525-
return false;
1514+
if (InsertElements.empty())
1515+
return false;
15261516

1527-
// Ensure we add the load back to the worklist BEFORE its users so they can
1528-
// erased in the correct order.
1529-
Worklist.push(Load);
1517+
auto *Load = dyn_cast<LoadInst>(Base);
1518+
if (!Load)
1519+
return false;
15301520

1521+
auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
1522+
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
1523+
// Don't optimize for atomic/volatile load or store. Ensure memory is not
1524+
// modified between, vector type matches store size, and index is inbounds.
1525+
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
1526+
!DL->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
1527+
SrcAddr != SI->getPointerOperand()->stripPointerCasts())
1528+
return false;
1529+
1530+
if (isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
1531+
MemoryLocation::get(SI), AA))
1532+
return false;
1533+
1534+
for (size_t i = 0; i < InsertElements.size(); i++) {
1535+
Value *Idx = InsertElements[i].second;
1536+
auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
1537+
if (ScalarizableIdx.isUnsafe())
1538+
return false;
15311539
if (ScalarizableIdx.isSafeWithFreeze())
15321540
ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
1541+
}
1542+
1543+
// Ensure we add the load back to the worklist BEFORE its users so they can
1544+
// erased in the correct order.
1545+
Worklist.push(Load);
1546+
stable_sort(InsertElements, [](const std::pair<Value *, Value *> &A,
1547+
const std::pair<Value *, Value *> &B) {
1548+
bool AIsConst = isa<ConstantInt>(A.second);
1549+
bool BIsConst = isa<ConstantInt>(B.second);
1550+
if (AIsConst != BIsConst)
1551+
return AIsConst;
1552+
1553+
if (AIsConst && BIsConst)
1554+
return cast<ConstantInt>(A.second)->getZExtValue() <
1555+
cast<ConstantInt>(B.second)->getZExtValue();
1556+
return false;
1557+
});
1558+
1559+
StoreInst *NSI;
1560+
for (size_t i = 0; i < InsertElements.size(); i++) {
1561+
Value *InsertVal = InsertElements[i].first;
1562+
Value *Idx = InsertElements[i].second;
1563+
15331564
Value *GEP = Builder.CreateInBoundsGEP(
15341565
SI->getValueOperand()->getType(), SI->getPointerOperand(),
15351566
{ConstantInt::get(Idx->getType(), 0), Idx});
1536-
StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
1567+
NSI = Builder.CreateStore(InsertVal, GEP);
15371568
NSI->copyMetadata(*SI);
15381569
Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1539-
std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
1570+
std::max(SI->getAlign(), Load->getAlign()), InsertVal->getType(), Idx,
15401571
*DL);
15411572
NSI->setAlignment(ScalarOpAlignment);
1542-
replaceValue(I, *NSI);
1543-
eraseInstruction(I);
1544-
return true;
15451573
}
15461574

1547-
return false;
1575+
replaceValue(I, *NSI);
1576+
eraseInstruction(I);
1577+
return true;
15481578
}
15491579

15501580
/// Try to scalarize vector loads feeding extractelement instructions.
@@ -3527,7 +3557,7 @@ bool VectorCombine::run() {
35273557
}
35283558

35293559
if (Opcode == Instruction::Store)
3530-
MadeChange |= foldSingleElementStore(I);
3560+
MadeChange |= foldInsertElementsStore(I);
35313561

35323562
// If this is an early pipeline invocation of this pass, we are done.
35333563
if (TryEarlyFoldsOnly)

llvm/test/Transforms/VectorCombine/load-insert-store.ll

Lines changed: 72 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ entry:
1919
define void @insert_store2(ptr %q, i16 zeroext %s) {
2020
; CHECK-LABEL: @insert_store2(
2121
; CHECK-NEXT: entry:
22-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
23-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 6
24-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 7
25-
; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 1
22+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 6
23+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 4
24+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
25+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
2626
; CHECK-NEXT: ret void
2727
;
2828
entry:
@@ -36,11 +36,12 @@ entry:
3636
define void @insert_store3(ptr %q, i16 zeroext %s) {
3737
; CHECK-LABEL: @insert_store3(
3838
; CHECK-NEXT: entry:
39-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
40-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 5
41-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
42-
; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 7
43-
; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 1
39+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 5
40+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
41+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
42+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 4
43+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
44+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 2
4445
; CHECK-NEXT: ret void
4546
;
4647
entry:
@@ -55,16 +56,22 @@ entry:
5556
define void @insert_store8(ptr %q, i16 zeroext %s) {
5657
; CHECK-LABEL: @insert_store8(
5758
; CHECK-NEXT: entry:
58-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
59-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
60-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 1
61-
; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 2
62-
; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[S]], i32 3
63-
; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[S]], i32 4
64-
; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[S]], i32 5
65-
; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 [[S]], i32 6
66-
; CHECK-NEXT: [[VEC8:%.*]] = insertelement <8 x i16> [[VEC7]], i16 [[S]], i32 7
67-
; CHECK-NEXT: store <8 x i16> [[VEC8]], ptr [[Q]], align 1
59+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 0
60+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
61+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 1
62+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
63+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 2
64+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 4
65+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 3
66+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP3]], align 2
67+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 4
68+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP4]], align 8
69+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 5
70+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP5]], align 2
71+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
72+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP6]], align 4
73+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
74+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP7]], align 2
6875
; CHECK-NEXT: ret void
6976
;
7077
entry:
@@ -896,10 +903,10 @@ declare double @llvm.log2.f64(double)
896903
define void @insert_store_gap(ptr %q, i16 zeroext %s) {
897904
; CHECK-LABEL: @insert_store_gap(
898905
; CHECK-NEXT: entry:
899-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
900-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 2
901-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 5
902-
; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
906+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 2
907+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 4
908+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 5
909+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
903910
; CHECK-NEXT: ret void
904911
;
905912
entry:
@@ -913,11 +920,12 @@ entry:
913920
define void @insert_store_reverse(ptr %q, i16 zeroext %s) {
914921
; CHECK-LABEL: @insert_store_reverse(
915922
; CHECK-NEXT: entry:
916-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
917-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 7
918-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
919-
; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 5
920-
; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 16
923+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 5
924+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
925+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
926+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 4
927+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
928+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 2
921929
; CHECK-NEXT: ret void
922930
;
923931
entry:
@@ -932,10 +940,10 @@ entry:
932940
define void @insert_store_duplicate(ptr %q, i16 zeroext %s) {
933941
; CHECK-LABEL: @insert_store_duplicate(
934942
; CHECK-NEXT: entry:
935-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
936-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
937-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 3
938-
; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
943+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 3
944+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
945+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 3
946+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
939947
; CHECK-NEXT: ret void
940948
;
941949
entry:
@@ -949,10 +957,10 @@ entry:
949957
define void @insert_store_i32(ptr %q, i32 zeroext %s) {
950958
; CHECK-LABEL: @insert_store_i32(
951959
; CHECK-NEXT: entry:
952-
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 16
953-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[S:%.*]], i32 2
954-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[S]], i32 3
955-
; CHECK-NEXT: store <4 x i32> [[VEC2]], ptr [[Q]], align 16
960+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q:%.*]], i32 0, i32 2
961+
; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 8
962+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q]], i32 0, i32 3
963+
; CHECK-NEXT: store i32 [[S]], ptr [[TMP1]], align 4
956964
; CHECK-NEXT: ret void
957965
;
958966
entry:
@@ -966,10 +974,10 @@ entry:
966974
define void @insert_store_i8(ptr %q, i8 zeroext %s) {
967975
; CHECK-LABEL: @insert_store_i8(
968976
; CHECK-NEXT: entry:
969-
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
970-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 8
971-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i8> [[VEC1]], i8 [[S]], i32 9
972-
; CHECK-NEXT: store <16 x i8> [[VEC2]], ptr [[Q]], align 16
977+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 8
978+
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 8
979+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q]], i32 0, i32 9
980+
; CHECK-NEXT: store i8 [[S]], ptr [[TMP1]], align 1
973981
; CHECK-NEXT: ret void
974982
;
975983
entry:
@@ -983,10 +991,10 @@ entry:
983991
define void @insert_store_alignment(ptr %q, i16 zeroext %s) {
984992
; CHECK-LABEL: @insert_store_alignment(
985993
; CHECK-NEXT: entry:
986-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
987-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
988-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 4
989-
; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
994+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 0
995+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
996+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 4
997+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 8
990998
; CHECK-NEXT: ret void
991999
;
9921000
entry:
@@ -1000,10 +1008,10 @@ entry:
10001008
define void @insert_store_size(ptr %q, i16 zeroext %s) {
10011009
; CHECK-LABEL: @insert_store_size(
10021010
; CHECK-NEXT: entry:
1003-
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[Q:%.*]], align 32
1004-
; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i16> [[TMP0]], i16 [[S:%.*]], i32 8
1005-
; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i16> [[VEC1]], i16 [[S]], i32 12
1006-
; CHECK-NEXT: store <16 x i16> [[VEC2]], ptr [[Q]], align 32
1011+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i16>, ptr [[Q:%.*]], i32 0, i32 8
1012+
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
1013+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i16>, ptr [[Q]], i32 0, i32 12
1014+
; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 8
10071015
; CHECK-NEXT: ret void
10081016
;
10091017
entry:
@@ -1059,10 +1067,10 @@ define void @insert_store_nonconst_large_alignment2(ptr %q, i32 zeroext %s, i32
10591067
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 4
10601068
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
10611069
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
1062-
; CHECK-NEXT: [[I:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 128
1063-
; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <4 x i32> [[I]], i32 [[S:%.*]], i32 [[IDX1]]
1064-
; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <4 x i32> [[VECINS1]], i32 [[S]], i32 [[IDX2]]
1065-
; CHECK-NEXT: store <4 x i32> [[VECINS2]], ptr [[Q]], align 128
1070+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
1071+
; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 4
1072+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q]], i32 0, i32 [[IDX1]]
1073+
; CHECK-NEXT: store i32 [[S]], ptr [[TMP1]], align 4
10661074
; CHECK-NEXT: ret void
10671075
;
10681076
entry:
@@ -1083,10 +1091,10 @@ define void @insert_store_nonconst_align_maximum_8_2(ptr %q, i64 %s, i32 %idx1,
10831091
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
10841092
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
10851093
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
1086-
; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 8
1087-
; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
1088-
; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
1089-
; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 8
1094+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
1095+
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 8
1096+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
1097+
; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 8
10901098
; CHECK-NEXT: ret void
10911099
;
10921100
%cmp1 = icmp ult i32 %idx1, 2
@@ -1106,10 +1114,10 @@ define void @insert_store_nonconst_align_maximum_4_2(ptr %q, i64 %s, i32 %idx1,
11061114
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
11071115
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
11081116
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
1109-
; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
1110-
; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
1111-
; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
1112-
; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 4
1117+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
1118+
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
1119+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
1120+
; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 4
11131121
; CHECK-NEXT: ret void
11141122
;
11151123
%cmp1 = icmp ult i32 %idx1, 2
@@ -1129,10 +1137,10 @@ define void @insert_store_nonconst_align_larger_2(ptr %q, i64 %s, i32 %idx1, i32
11291137
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
11301138
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
11311139
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
1132-
; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
1133-
; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
1134-
; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
1135-
; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 2
1140+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
1141+
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
1142+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
1143+
; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 4
11361144
; CHECK-NEXT: ret void
11371145
;
11381146
%cmp1 = icmp ult i32 %idx1, 2

0 commit comments

Comments
 (0)