Skip to content

Commit bfa93ea

Browse files
committed
Complete scalarization of insertelement with dynamic index
1 parent 989d82e commit bfa93ea

File tree

2 files changed

+50
-34
lines changed

2 files changed

+50
-34
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -172,36 +172,41 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
172172
return false;
173173
}
174174

175-
// Allocates and populates an array equivalent to the vector operand Vec.
176-
// Returns the array and the type of the array.
177-
static std::pair<Value *, Type *>
178-
allocaArrayFromVector(IRBuilder<> &Builder, Value *Vec, Type *IdxTy) {
175+
static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
176+
IRBuilder<> Builder(&IEI);
177+
178+
Value *Vec = IEI.getOperand(0);
179+
Value *Val = IEI.getOperand(1);
180+
Value *Index = IEI.getOperand(2);
181+
Type *IndexTy = Index->getType();
182+
179183
Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType());
180184
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
181-
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
185+
const uint64_t ArrNumElems = ArrTy->getArrayNumElements();
186+
187+
SmallVector<Value *, 4> GEPs(ArrNumElems);
188+
for (unsigned I = 0; I < ArrNumElems; ++I) {
182189
Value *EE = Builder.CreateExtractElement(Vec, I);
183190
Value *GEP = Builder.CreateInBoundsGEP(
184191
ArrTy, ArrAlloca,
185-
{ConstantInt::get(IdxTy, 0), ConstantInt::get(IdxTy, I)});
192+
{ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
186193
Builder.CreateStore(EE, GEP);
194+
GEPs[I] = GEP;
187195
}
188-
return std::make_pair(ArrAlloca, ArrTy);
189-
}
190196

191-
static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
192-
IRBuilder<> Builder(&IEI);
197+
Value *GEPForStore = Builder.CreateInBoundsGEP(
198+
ArrTy, ArrAlloca, {ConstantInt::get(IndexTy, 0), Index});
199+
Builder.CreateStore(Val, GEPForStore);
193200

194-
Value *Vec = IEI.getOperand(0);
195-
Value *Val = IEI.getOperand(1);
196-
Value *Index = IEI.getOperand(2);
197-
Type *IndexTy = Index->getType();
201+
Value *NewIEI = PoisonValue::get(Vec->getType());
202+
for (unsigned I = 0; I < ArrNumElems; ++I) {
203+
Value *GEP = GEPs[I];
204+
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
205+
NewIEI =
206+
Builder.CreateInsertElement(NewIEI, Load, ConstantInt::get(IndexTy, I));
207+
}
198208

199-
std::pair<Value *, Type *> Arr = allocaArrayFromVector(Builder, Vec, IndexTy);
200-
Value *ArrAlloca = Arr.first;
201-
Type *ArrTy = Arr.second;
202-
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
203-
{ConstantInt::get(IndexTy, 0), Index});
204-
Builder.CreateStore(Val, GEP);
209+
IEI.replaceAllUsesWith(NewIEI);
205210
IEI.eraseFromParent();
206211
return true;
207212
}
@@ -220,10 +225,15 @@ static bool replaceDynamicExtractElementInst(ExtractElementInst &EEI) {
220225
Value *Index = EEI.getIndexOperand();
221226
Type *IndexTy = Index->getType();
222227

223-
std::pair<Value *, Type *> Arr =
224-
allocaArrayFromVector(Builder, EEI.getVectorOperand(), IndexTy);
225-
Value *ArrAlloca = Arr.first;
226-
Type *ArrTy = Arr.second;
228+
Type *ArrTy = equivalentArrayTypeFromVector(EEI.getVectorOperandType());
229+
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
230+
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
231+
Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
232+
Value *GEP = Builder.CreateInBoundsGEP(
233+
ArrTy, ArrAlloca,
234+
{ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
235+
Builder.CreateStore(EE, GEP);
236+
}
227237

228238
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
229239
{ConstantInt::get(IndexTy, 0), Index});

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
2525
ret float %ee
2626
}
2727

28-
define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
29-
; CHECK-LABEL: define void @insert_i32_vec_dynamic(
28+
define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
29+
; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic(
3030
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
3131
; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
3232
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
@@ -40,10 +40,16 @@ define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
4040
; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
4141
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
4242
; CHECK-NEXT: store i32 [[A]], ptr [[TMP8]], align 4
43-
; CHECK-NEXT: ret void
43+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4
44+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i32> poison, i32 [[TMP9]], i32 0
45+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP5]], align 4
46+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x i32> [[TMP10]], i32 [[TMP11]], i32 1
47+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
48+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x i32> [[TMP12]], i32 [[TMP13]], i32 2
49+
; CHECK-NEXT: ret <3 x i32> [[TMP14]]
4450
;
45-
insertelement <3 x i32> %v, i32 %a, i32 %i
46-
ret void
51+
%ie = insertelement <3 x i32> %v, i32 %a, i32 %i
52+
ret <3 x i32> %ie
4753
}
4854

4955
; An extractelement with a constant index should not be converted to array form
@@ -58,13 +64,13 @@ define i16 @extract_i16_vec_constant(<4 x i16> %v) {
5864
}
5965

6066
; An insertelement with a constant index should not be converted to array form
61-
define void @insert_half_vec_constant(<2 x half> %v, half %a) {
62-
; CHECK-LABEL: define void @insert_half_vec_constant(
67+
define <2 x half> @insert_half_vec_constant(<2 x half> %v, half %a) {
68+
; CHECK-LABEL: define <2 x half> @insert_half_vec_constant(
6369
; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
6470
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
65-
; CHECK-NEXT: ret void
71+
; CHECK-NEXT: ret <2 x half> [[TMP1]]
6672
;
67-
insertelement <2 x half> %v, half %a, i32 1
68-
ret void
73+
%ie = insertelement <2 x half> %v, half %a, i32 1
74+
ret <2 x half> %ie
6975
}
7076

0 commit comments

Comments
 (0)