Skip to content

Commit 15cf98c

Browse files
committed
Scalarize dynamically-indexed insertelement
1 parent be5d425 commit 15cf98c

File tree

2 files changed

+88
-24
lines changed

2 files changed

+88
-24
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
6868
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
6969
bool visitCastInst(CastInst &CI) { return false; }
7070
bool visitBitCastInst(BitCastInst &BCI) { return false; }
71-
bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
71+
bool visitInsertElementInst(InsertElementInst &IEI);
7272
bool visitExtractElementInst(ExtractElementInst &EEI);
7373
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
7474
bool visitPHINode(PHINode &PHI) { return false; }
@@ -172,6 +172,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
172172
return false;
173173
}
174174

175+
bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
176+
Value *Vec = IEI.getOperand(0);
177+
Value *Val = IEI.getOperand(1);
178+
Value *Index = IEI.getOperand(2);
179+
Type *IndexTy = Index->getType();
180+
181+
// If the index is a constant then we don't need to scalarize it
182+
if (isa<ConstantInt>(Index))
183+
return false;
184+
185+
IRBuilder<> Builder(&IEI);
186+
Type *VecTy = Vec->getType();
187+
188+
Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
189+
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
190+
191+
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
192+
Value *EE = Builder.CreateExtractElement(Vec, I);
193+
Value *GEP = Builder.CreateInBoundsGEP(
194+
ArrTy, ArrAlloca,
195+
{ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
196+
Builder.CreateStore(EE, GEP);
197+
}
198+
199+
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
200+
{ConstantInt::get(IndexTy, 0), Index});
201+
Builder.CreateStore(Val, GEP);
202+
203+
IEI.eraseFromParent();
204+
return true;
205+
}
206+
175207
bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
176208
// If the index is a constant then we don't need to scalarize it
177209
Value *Index = EEI.getIndexOperand();
Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,70 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
33

4-
define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
4+
define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
55
; CHECK-LABEL: define float @extract_float_vec_dynamic(
6-
; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
7-
; CHECK-NEXT: [[TMP3:%.*]] = alloca [4 x float], align 4
8-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
9-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
6+
; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = alloca [4 x float], align 4
8+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[V]], i64 0
9+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 0
10+
; CHECK-NEXT: store float [[TMP2]], ptr [[TMP3]], align 4
11+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[V]], i64 1
12+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 1
1013
; CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
11-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
12-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
14+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[V]], i64 2
15+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 2
1316
; CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
14-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
15-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
17+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[V]], i64 3
18+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 3
1619
; CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
17-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
18-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
19-
; CHECK-NEXT: store float [[TMP10]], ptr [[TMP11]], align 4
20-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
21-
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
22-
; CHECK-NEXT: ret float [[TMP13]]
20+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 [[I]]
21+
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
22+
; CHECK-NEXT: ret float [[TMP11]]
2323
;
24-
%e = extractelement <4 x float> %0, i32 %1
25-
ret float %e
24+
%ee = extractelement <4 x float> %v, i32 %i
25+
ret float %ee
26+
}
27+
28+
define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
29+
; CHECK-LABEL: define void @insert_i32_vec_dynamic(
30+
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
31+
; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
32+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
33+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 0
34+
; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
35+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x i32> [[V]], i64 1
36+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 1
37+
; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
38+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x i32> [[V]], i64 2
39+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 2
40+
; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
41+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
42+
; CHECK-NEXT: store i32 [[A]], ptr [[TMP8]], align 4
43+
; CHECK-NEXT: ret void
44+
;
45+
insertelement <3 x i32> %v, i32 %a, i32 %i
46+
ret void
2647
}
2748

2849
; An extractelement with a constant index should not be converted to array form
29-
define i16 @extract_i16_vec_constant(<4 x i16> %0) {
50+
define i16 @extract_i16_vec_constant(<4 x i16> %v) {
3051
; CHECK-LABEL: define i16 @extract_i16_vec_constant(
31-
; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
32-
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
33-
; CHECK-NEXT: ret i16 [[E]]
52+
; CHECK-SAME: <4 x i16> [[V:%.*]]) {
53+
; CHECK-NEXT: [[EE:%.*]] = extractelement <4 x i16> [[V]], i32 1
54+
; CHECK-NEXT: ret i16 [[EE]]
55+
;
56+
%ee = extractelement <4 x i16> %v, i32 1
57+
ret i16 %ee
58+
}
59+
60+
; An insertelement with a constant index should not be converted to array form
61+
define void @insert_half_vec_constant(<2 x half> %v, half %a) {
62+
; CHECK-LABEL: define void @insert_half_vec_constant(
63+
; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
64+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
65+
; CHECK-NEXT: ret void
3466
;
35-
%e = extractelement <4 x i16> %0, i32 1
36-
ret i16 %e
67+
insertelement <2 x half> %v, half %a, i32 1
68+
ret void
3769
}
3870

0 commit comments

Comments
 (0)