Skip to content

Commit f78b059

Browse files
committed
Reuse allocas and place allocas in entry block
1 parent bfa93ea commit f78b059

File tree

2 files changed

+155
-75
lines changed

2 files changed

+155
-75
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 67 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,16 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
7979
friend bool findAndReplaceVectors(llvm::Module &M);
8080

8181
private:
82+
typedef std::pair<AllocaInst *, SmallVector<Value *, 4>> AllocaAndGEPs;
83+
typedef SmallDenseMap<Value *, AllocaAndGEPs>
84+
VectorToArrayMap; // A map from a vector-typed Value to its corresponding
85+
// AllocaInst and GEPs to each element of an array
86+
VectorToArrayMap VectorAllocaMap;
87+
AllocaAndGEPs createArrayFromVector(IRBuilder<> &Builder, Value *Vec,
88+
const Twine &Name);
89+
bool replaceDynamicInsertElementInst(InsertElementInst &IEI);
90+
bool replaceDynamicExtractElementInst(ExtractElementInst &EEI);
91+
8292
GlobalVariable *lookupReplacementGlobal(Value *CurrOperand);
8393
DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap;
8494
};
@@ -90,6 +100,7 @@ bool DataScalarizerVisitor::visit(Function &F) {
90100
for (Instruction &I : make_early_inc_range(*BB))
91101
MadeChange |= InstVisitor::visit(I);
92102
}
103+
VectorAllocaMap.clear();
93104
return MadeChange;
94105
}
95106

@@ -172,38 +183,61 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
172183
return false;
173184
}
174185

175-
static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
176-
IRBuilder<> Builder(&IEI);
186+
DataScalarizerVisitor::AllocaAndGEPs
187+
DataScalarizerVisitor::createArrayFromVector(IRBuilder<> &Builder, Value *Vec,
188+
const Twine &Name = "") {
189+
// If there is already an alloca for this vector, return it
190+
auto VA = VectorAllocaMap.find(Vec);
191+
if (VA != VectorAllocaMap.end())
192+
return VA->second;
177193

178-
Value *Vec = IEI.getOperand(0);
179-
Value *Val = IEI.getOperand(1);
180-
Value *Index = IEI.getOperand(2);
181-
Type *IndexTy = Index->getType();
194+
auto InsertPoint = Builder.GetInsertPoint();
195+
Builder.SetInsertPointPastAllocas(Builder.GetInsertBlock()->getParent());
182196

183197
Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType());
184-
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
198+
AllocaInst *ArrAlloca =
199+
Builder.CreateAlloca(ArrTy, nullptr, Name + ".alloca");
185200
const uint64_t ArrNumElems = ArrTy->getArrayNumElements();
186201

187202
SmallVector<Value *, 4> GEPs(ArrNumElems);
188203
for (unsigned I = 0; I < ArrNumElems; ++I) {
189-
Value *EE = Builder.CreateExtractElement(Vec, I);
190-
Value *GEP = Builder.CreateInBoundsGEP(
191-
ArrTy, ArrAlloca,
192-
{ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
193-
Builder.CreateStore(EE, GEP);
194-
GEPs[I] = GEP;
204+
Value *EE = Builder.CreateExtractElement(Vec, I, Name + ".extract");
205+
GEPs[I] = Builder.CreateInBoundsGEP(
206+
ArrTy, ArrAlloca, {Builder.getInt32(0), Builder.getInt32(I)},
207+
Name + ".index");
208+
Builder.CreateStore(EE, GEPs[I]);
195209
}
196210

197-
Value *GEPForStore = Builder.CreateInBoundsGEP(
198-
ArrTy, ArrAlloca, {ConstantInt::get(IndexTy, 0), Index});
211+
VectorAllocaMap.insert({Vec, {ArrAlloca, GEPs}});
212+
Builder.SetInsertPoint(InsertPoint);
213+
return {ArrAlloca, GEPs};
214+
}
215+
216+
bool DataScalarizerVisitor::replaceDynamicInsertElementInst(
217+
InsertElementInst &IEI) {
218+
IRBuilder<> Builder(&IEI);
219+
220+
Value *Vec = IEI.getOperand(0);
221+
Value *Val = IEI.getOperand(1);
222+
Value *Index = IEI.getOperand(2);
223+
224+
AllocaAndGEPs ArrAllocaAndGEPs =
225+
createArrayFromVector(Builder, Vec, IEI.getName());
226+
AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first;
227+
SmallVector<Value *, 4> &ArrGEPs = ArrAllocaAndGEPs.second;
228+
229+
Type *ArrTy = ArrAlloca->getAllocatedType();
230+
Value *GEPForStore =
231+
Builder.CreateInBoundsGEP(ArrTy, ArrAlloca, {Builder.getInt32(0), Index},
232+
IEI.getName() + ".dynindex");
199233
Builder.CreateStore(Val, GEPForStore);
200234

201235
Value *NewIEI = PoisonValue::get(Vec->getType());
202-
for (unsigned I = 0; I < ArrNumElems; ++I) {
203-
Value *GEP = GEPs[I];
204-
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
205-
NewIEI =
206-
Builder.CreateInsertElement(NewIEI, Load, ConstantInt::get(IndexTy, I));
236+
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
237+
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), ArrGEPs[I],
238+
IEI.getName() + ".load");
239+
NewIEI = Builder.CreateInsertElement(NewIEI, Load, Builder.getInt32(I),
240+
IEI.getName() + ".insert");
207241
}
208242

209243
IEI.replaceAllUsesWith(NewIEI);
@@ -219,25 +253,20 @@ bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
219253
return replaceDynamicInsertElementInst(IEI);
220254
}
221255

222-
static bool replaceDynamicExtractElementInst(ExtractElementInst &EEI) {
256+
bool DataScalarizerVisitor::replaceDynamicExtractElementInst(
257+
ExtractElementInst &EEI) {
223258
IRBuilder<> Builder(&EEI);
224259

225-
Value *Index = EEI.getIndexOperand();
226-
Type *IndexTy = Index->getType();
227-
228-
Type *ArrTy = equivalentArrayTypeFromVector(EEI.getVectorOperandType());
229-
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
230-
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
231-
Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
232-
Value *GEP = Builder.CreateInBoundsGEP(
233-
ArrTy, ArrAlloca,
234-
{ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
235-
Builder.CreateStore(EE, GEP);
236-
}
260+
AllocaAndGEPs ArrAllocaAndGEPs =
261+
createArrayFromVector(Builder, EEI.getVectorOperand(), EEI.getName());
262+
AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first;
237263

238-
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
239-
{ConstantInt::get(IndexTy, 0), Index});
240-
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
264+
Type *ArrTy = ArrAlloca->getAllocatedType();
265+
Value *GEP = Builder.CreateInBoundsGEP(
266+
ArrTy, ArrAlloca, {Builder.getInt32(0), EEI.getIndexOperand()},
267+
EEI.getName() + ".index");
268+
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP,
269+
EEI.getName() + ".load");
241270

242271
EEI.replaceAllUsesWith(Load);
243272
EEI.eraseFromParent();
@@ -276,8 +305,8 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
276305
return true;
277306
}
278307

279-
Constant *transformInitializer(Constant *Init, Type *OrigType, Type *NewType,
280-
LLVMContext &Ctx) {
308+
static Constant *transformInitializer(Constant *Init, Type *OrigType,
309+
Type *NewType, LLVMContext &Ctx) {
281310
// Handle ConstantAggregateZero (zero-initialized constants)
282311
if (isa<ConstantAggregateZero>(Init)) {
283312
return ConstantAggregateZero::get(NewType);

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll

Lines changed: 88 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,76 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
33

4+
; Allocas should be placed in the entry block.
5+
; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector
6+
define void @alloca_placement_and_reuse(<3 x i32> %v1, <3 x i32> %v2, i32 %a, i32 %i, i32 %j) {
7+
; CHECK-LABEL: define void @alloca_placement_and_reuse(
8+
; CHECK-SAME: <3 x i32> [[V1:%.*]], <3 x i32> [[V2:%.*]], i32 [[A:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) {
9+
; CHECK-NEXT: [[AL:%.*]] = alloca [3 x i32], align 4
10+
; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4
11+
; CHECK-NEXT: [[EE2_ALLOCA:%.*]] = alloca [3 x i32], align 4
12+
; CHECK-NEXT: [[EE2_EXTRACT:%.*]] = extractelement <3 x i32> [[V2]], i64 0
13+
; CHECK-NEXT: [[EE2_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 0
14+
; CHECK-NEXT: store i32 [[EE2_EXTRACT]], ptr [[EE2_INDEX]], align 4
15+
; CHECK-NEXT: [[EE2_EXTRACT10:%.*]] = extractelement <3 x i32> [[V2]], i64 1
16+
; CHECK-NEXT: [[EE2_INDEX11:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 1
17+
; CHECK-NEXT: store i32 [[EE2_EXTRACT10]], ptr [[EE2_INDEX11]], align 4
18+
; CHECK-NEXT: [[EE2_EXTRACT12:%.*]] = extractelement <3 x i32> [[V2]], i64 2
19+
; CHECK-NEXT: [[EE2_INDEX13:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 2
20+
; CHECK-NEXT: store i32 [[EE2_EXTRACT12]], ptr [[EE2_INDEX13]], align 4
21+
; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V1]], i64 0
22+
; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0
23+
; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4
24+
; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V1]], i64 1
25+
; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1
26+
; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4
27+
; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V1]], i64 2
28+
; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2
29+
; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4
30+
; CHECK-NEXT: br label %[[BODY:.*]]
31+
; CHECK: [[BODY]]:
32+
; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
33+
; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4
34+
; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
35+
; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4
36+
; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4
37+
; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0
38+
; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4
39+
; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1
40+
; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4
41+
; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2
42+
; CHECK-NEXT: [[EE2_INDEX14:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 [[J]]
43+
; CHECK-NEXT: [[EE2_LOAD:%.*]] = load i32, ptr [[EE2_INDEX14]], align 4
44+
; CHECK-NEXT: ret void
45+
;
46+
%al = alloca [3 x i32], align 4
47+
br label %body
48+
body:
49+
%ee1 = extractelement <3 x i32> %v1, i32 %i
50+
%ie1 = insertelement <3 x i32> %v1, i32 %a, i32 %i
51+
%ee2 = extractelement <3 x i32> %v2, i32 %j
52+
ret void
53+
}
54+
455
define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
556
; CHECK-LABEL: define float @extract_float_vec_dynamic(
657
; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
7-
; CHECK-NEXT: [[TMP1:%.*]] = alloca [4 x float], align 4
8-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[V]], i64 0
9-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 0
10-
; CHECK-NEXT: store float [[TMP2]], ptr [[TMP3]], align 4
11-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[V]], i64 1
12-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 1
13-
; CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
14-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[V]], i64 2
15-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 2
16-
; CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
17-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[V]], i64 3
18-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 3
19-
; CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
20-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 [[I]]
21-
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
22-
; CHECK-NEXT: ret float [[TMP11]]
58+
; CHECK-NEXT: [[EE_ALLOCA:%.*]] = alloca [4 x float], align 4
59+
; CHECK-NEXT: [[EE_EXTRACT:%.*]] = extractelement <4 x float> [[V]], i64 0
60+
; CHECK-NEXT: [[EE_INDEX:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 0
61+
; CHECK-NEXT: store float [[EE_EXTRACT]], ptr [[EE_INDEX]], align 4
62+
; CHECK-NEXT: [[EE_EXTRACT1:%.*]] = extractelement <4 x float> [[V]], i64 1
63+
; CHECK-NEXT: [[EE_INDEX2:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 1
64+
; CHECK-NEXT: store float [[EE_EXTRACT1]], ptr [[EE_INDEX2]], align 4
65+
; CHECK-NEXT: [[EE_EXTRACT3:%.*]] = extractelement <4 x float> [[V]], i64 2
66+
; CHECK-NEXT: [[EE_INDEX4:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 2
67+
; CHECK-NEXT: store float [[EE_EXTRACT3]], ptr [[EE_INDEX4]], align 4
68+
; CHECK-NEXT: [[EE_EXTRACT5:%.*]] = extractelement <4 x float> [[V]], i64 3
69+
; CHECK-NEXT: [[EE_INDEX6:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 3
70+
; CHECK-NEXT: store float [[EE_EXTRACT5]], ptr [[EE_INDEX6]], align 4
71+
; CHECK-NEXT: [[EE_INDEX7:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 [[I]]
72+
; CHECK-NEXT: [[EE_LOAD:%.*]] = load float, ptr [[EE_INDEX7]], align 4
73+
; CHECK-NEXT: ret float [[EE_LOAD]]
2374
;
2475
%ee = extractelement <4 x float> %v, i32 %i
2576
ret float %ee
@@ -28,25 +79,25 @@ define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
2879
define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
2980
; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic(
3081
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
31-
; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
32-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
33-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 0
34-
; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
35-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x i32> [[V]], i64 1
36-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 1
37-
; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
38-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x i32> [[V]], i64 2
39-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 2
40-
; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
41-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
42-
; CHECK-NEXT: store i32 [[A]], ptr [[TMP8]], align 4
43-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4
44-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i32> poison, i32 [[TMP9]], i32 0
45-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP5]], align 4
46-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x i32> [[TMP10]], i32 [[TMP11]], i32 1
47-
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
48-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x i32> [[TMP12]], i32 [[TMP13]], i32 2
49-
; CHECK-NEXT: ret <3 x i32> [[TMP14]]
82+
; CHECK-NEXT: [[IE_ALLOCA:%.*]] = alloca [3 x i32], align 4
83+
; CHECK-NEXT: [[IE_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0
84+
; CHECK-NEXT: [[IE_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 0
85+
; CHECK-NEXT: store i32 [[IE_EXTRACT]], ptr [[IE_INDEX]], align 4
86+
; CHECK-NEXT: [[IE_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1
87+
; CHECK-NEXT: [[IE_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 1
88+
; CHECK-NEXT: store i32 [[IE_EXTRACT1]], ptr [[IE_INDEX2]], align 4
89+
; CHECK-NEXT: [[IE_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2
90+
; CHECK-NEXT: [[IE_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 2
91+
; CHECK-NEXT: store i32 [[IE_EXTRACT3]], ptr [[IE_INDEX4]], align 4
92+
; CHECK-NEXT: [[IE_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 [[I]]
93+
; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX]], align 4
94+
; CHECK-NEXT: [[IE_LOAD:%.*]] = load i32, ptr [[IE_INDEX]], align 4
95+
; CHECK-NEXT: [[IE_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD]], i32 0
96+
; CHECK-NEXT: [[IE_LOAD5:%.*]] = load i32, ptr [[IE_INDEX2]], align 4
97+
; CHECK-NEXT: [[IE_INSERT6:%.*]] = insertelement <3 x i32> [[IE_INSERT]], i32 [[IE_LOAD5]], i32 1
98+
; CHECK-NEXT: [[IE_LOAD7:%.*]] = load i32, ptr [[IE_INDEX4]], align 4
99+
; CHECK-NEXT: [[IE_INSERT8:%.*]] = insertelement <3 x i32> [[IE_INSERT6]], i32 [[IE_LOAD7]], i32 2
100+
; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8]]
50101
;
51102
%ie = insertelement <3 x i32> %v, i32 %a, i32 %i
52103
ret <3 x i32> %ie
@@ -67,8 +118,8 @@ define i16 @extract_i16_vec_constant(<4 x i16> %v) {
67118
define <2 x half> @insert_half_vec_constant(<2 x half> %v, half %a) {
68119
; CHECK-LABEL: define <2 x half> @insert_half_vec_constant(
69120
; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
70-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
71-
; CHECK-NEXT: ret <2 x half> [[TMP1]]
121+
; CHECK-NEXT: [[IE:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
122+
; CHECK-NEXT: ret <2 x half> [[IE]]
72123
;
73124
%ie = insertelement <2 x half> %v, half %a, i32 1
74125
ret <2 x half> %ie

0 commit comments

Comments
 (0)