Skip to content

Commit 63afe6e

Browse files
committed
Ensure correctness in control flow and multiple insertelements
1 parent f78b059 commit 63afe6e

File tree

2 files changed

+117
-41
lines changed

2 files changed

+117
-41
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,19 @@ DataScalarizerVisitor::createArrayFromVector(IRBuilder<> &Builder, Value *Vec,
192192
return VA->second;
193193

194194
auto InsertPoint = Builder.GetInsertPoint();
195-
Builder.SetInsertPointPastAllocas(Builder.GetInsertBlock()->getParent());
196195

196+
// Allocate the array to hold the vector elements
197+
Builder.SetInsertPointPastAllocas(Builder.GetInsertBlock()->getParent());
197198
Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType());
198199
AllocaInst *ArrAlloca =
199200
Builder.CreateAlloca(ArrTy, nullptr, Name + ".alloca");
200201
const uint64_t ArrNumElems = ArrTy->getArrayNumElements();
201202

203+
// Create loads and stores to populate the array immediately after the
204+
// original vector's defining instruction if available, else immediately after
205+
// the alloca
206+
if (auto *Instr = dyn_cast<Instruction>(Vec))
207+
Builder.SetInsertPoint(Instr->getNextNonDebugInstruction());
202208
SmallVector<Value *, 4> GEPs(ArrNumElems);
203209
for (unsigned I = 0; I < ArrNumElems; ++I) {
204210
Value *EE = Builder.CreateExtractElement(Vec, I, Name + ".extract");
@@ -213,6 +219,19 @@ DataScalarizerVisitor::createArrayFromVector(IRBuilder<> &Builder, Value *Vec,
213219
return {ArrAlloca, GEPs};
214220
}
215221

222+
/// Returns a pair of Value* with the first being a GEP into ArrAlloca using
223+
/// indices {0, Index}, and the second Value* being a Load of the GEP
224+
static std::pair<Value *, Value *>
225+
dynamicallyLoadArray(IRBuilder<> &Builder, AllocaInst *ArrAlloca, Value *Index,
226+
const Twine &Name = "") {
227+
Type *ArrTy = ArrAlloca->getAllocatedType();
228+
Value *GEP = Builder.CreateInBoundsGEP(
229+
ArrTy, ArrAlloca, {Builder.getInt32(0), Index}, Name + ".index");
230+
Value *Load =
231+
Builder.CreateLoad(ArrTy->getArrayElementType(), GEP, Name + ".load");
232+
return std::make_pair(GEP, Load);
233+
}
234+
216235
bool DataScalarizerVisitor::replaceDynamicInsertElementInst(
217236
InsertElementInst &IEI) {
218237
IRBuilder<> Builder(&IEI);
@@ -224,14 +243,15 @@ bool DataScalarizerVisitor::replaceDynamicInsertElementInst(
224243
AllocaAndGEPs ArrAllocaAndGEPs =
225244
createArrayFromVector(Builder, Vec, IEI.getName());
226245
AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first;
246+
Type *ArrTy = ArrAlloca->getAllocatedType();
227247
SmallVector<Value *, 4> &ArrGEPs = ArrAllocaAndGEPs.second;
228248

229-
Type *ArrTy = ArrAlloca->getAllocatedType();
230-
Value *GEPForStore =
231-
Builder.CreateInBoundsGEP(ArrTy, ArrAlloca, {Builder.getInt32(0), Index},
232-
IEI.getName() + ".dynindex");
233-
Builder.CreateStore(Val, GEPForStore);
249+
auto GEPAndLoad =
250+
dynamicallyLoadArray(Builder, ArrAlloca, Index, IEI.getName());
251+
Value *GEP = GEPAndLoad.first;
252+
Value *Load = GEPAndLoad.second;
234253

254+
Builder.CreateStore(Val, GEP);
235255
Value *NewIEI = PoisonValue::get(Vec->getType());
236256
for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
237257
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), ArrGEPs[I],
@@ -240,6 +260,10 @@ bool DataScalarizerVisitor::replaceDynamicInsertElementInst(
240260
IEI.getName() + ".insert");
241261
}
242262

263+
// Store back the original value so the Alloca can be reused for subsequent
264+
// insertelement instructions on the same vector
265+
Builder.CreateStore(Load, GEP);
266+
243267
IEI.replaceAllUsesWith(NewIEI);
244268
IEI.eraseFromParent();
245269
return true;
@@ -261,12 +285,9 @@ bool DataScalarizerVisitor::replaceDynamicExtractElementInst(
261285
createArrayFromVector(Builder, EEI.getVectorOperand(), EEI.getName());
262286
AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first;
263287

264-
Type *ArrTy = ArrAlloca->getAllocatedType();
265-
Value *GEP = Builder.CreateInBoundsGEP(
266-
ArrTy, ArrAlloca, {Builder.getInt32(0), EEI.getIndexOperand()},
267-
EEI.getName() + ".index");
268-
Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP,
269-
EEI.getName() + ".load");
288+
auto GEPAndLoad = dynamicallyLoadArray(Builder, ArrAlloca,
289+
EEI.getIndexOperand(), EEI.getName());
290+
Value *Load = GEPAndLoad.second;
270291

271292
EEI.replaceAllUsesWith(Load);
272293
EEI.eraseFromParent();

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll

Lines changed: 84 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,107 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
33

4-
; Allocas should be placed in the entry block.
5-
; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector
6-
define void @alloca_placement_and_reuse(<3 x i32> %v1, <3 x i32> %v2, i32 %a, i32 %i, i32 %j) {
7-
; CHECK-LABEL: define void @alloca_placement_and_reuse(
8-
; CHECK-SAME: <3 x i32> [[V1:%.*]], <3 x i32> [[V2:%.*]], i32 [[A:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) {
9-
; CHECK-NEXT: [[AL:%.*]] = alloca [3 x i32], align 4
4+
; Ensure that insertelement instructions have no side effects on each other
5+
; even in the presence of control flow
6+
define void @test_multiple_insert(i32 %c, i32 %i, i32 %j) {
7+
; CHECK-LABEL: define void @test_multiple_insert(
8+
; CHECK-SAME: i32 [[C:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) {
9+
; CHECK-NEXT: [[V0_ALLOCA:%.*]] = alloca [2 x i32], align 4
10+
; CHECK-NEXT: [[V_ALLOCA:%.*]] = alloca [2 x i32], align 4
11+
; CHECK-NEXT: [[V0_0:%.*]] = insertelement <2 x i32> poison, i32 0, i32 0
12+
; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i32> [[V0_0]], i32 0, i32 1
13+
; CHECK-NEXT: [[V0_EXTRACT0:%.*]] = extractelement <2 x i32> [[V0]], i64 0
14+
; CHECK-NEXT: [[V0_INDEX0:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 0
15+
; CHECK-NEXT: store i32 [[V0_EXTRACT0]], ptr [[V0_INDEX0]], align 4
16+
; CHECK-NEXT: [[V0_EXTRACT1:%.*]] = extractelement <2 x i32> [[V0]], i64 1
17+
; CHECK-NEXT: [[V0_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 1
18+
; CHECK-NEXT: store i32 [[V0_EXTRACT1]], ptr [[V0_INDEX1]], align 4
19+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[C]], 1
20+
; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[ELSE:.*]]
21+
; CHECK: [[IF]]:
22+
; CHECK-NEXT: [[V1_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]]
23+
; CHECK-NEXT: [[V1_LOAD:%.*]] = load i32, ptr [[V1_INDEX]], align 4
24+
; CHECK-NEXT: store i32 1, ptr [[V1_INDEX]], align 4
25+
; CHECK-NEXT: [[V1_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4
26+
; CHECK-NEXT: [[V1_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V1_LOAD0]], i32 0
27+
; CHECK-NEXT: [[V1_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4
28+
; CHECK-NEXT: [[V1_INSERT1:%.*]] = insertelement <2 x i32> [[V1_INSERT0]], i32 [[V1_LOAD1]], i32 1
29+
; CHECK-NEXT: store i32 [[V1_LOAD]], ptr [[V1_INDEX]], align 4
30+
; CHECK-NEXT: br label %[[EXIT:.*]]
31+
; CHECK: [[ELSE]]:
32+
; CHECK-NEXT: [[V2_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]]
33+
; CHECK-NEXT: [[V2_LOAD:%.*]] = load i32, ptr [[V2_INDEX]], align 4
34+
; CHECK-NEXT: store i32 2, ptr [[V2_INDEX]], align 4
35+
; CHECK-NEXT: [[V2_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4
36+
; CHECK-NEXT: [[V2_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V2_LOAD0]], i32 0
37+
; CHECK-NEXT: [[V2_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4
38+
; CHECK-NEXT: [[V2_INSERT1:%.*]] = insertelement <2 x i32> [[V2_INSERT0]], i32 [[V2_LOAD1]], i32 1
39+
; CHECK-NEXT: store i32 [[V2_LOAD]], ptr [[V2_INDEX]], align 4
40+
; CHECK-NEXT: br label %[[EXIT]]
41+
; CHECK: [[EXIT]]:
42+
; CHECK-NEXT: [[V:%.*]] = phi <2 x i32> [ [[V1_INSERT1]], %[[IF]] ], [ [[V2_INSERT1]], %[[ELSE]] ]
43+
; CHECK-NEXT: [[V_EXTRACT:%.*]] = extractelement <2 x i32> [[V]], i64 0
44+
; CHECK-NEXT: [[V_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 0
45+
; CHECK-NEXT: store i32 [[V_EXTRACT]], ptr [[V_INDEX]], align 4
46+
; CHECK-NEXT: [[V_EXTRACT10:%.*]] = extractelement <2 x i32> [[V]], i64 1
47+
; CHECK-NEXT: [[V_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 1
48+
; CHECK-NEXT: store i32 [[V_EXTRACT10]], ptr [[V_INDEX1]], align 4
49+
; CHECK-NEXT: [[V3_INDEXJ:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 [[J]]
50+
; CHECK-NEXT: [[V3_LOAD:%.*]] = load i32, ptr [[V3_INDEXJ]], align 4
51+
; CHECK-NEXT: store i32 3, ptr [[V3_INDEXJ]], align 4
52+
; CHECK-NEXT: [[V3_LOAD0:%.*]] = load i32, ptr [[V_INDEX]], align 4
53+
; CHECK-NEXT: [[V3_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V3_LOAD0]], i32 0
54+
; CHECK-NEXT: [[V3_LOAD1:%.*]] = load i32, ptr [[V_INDEX1]], align 4
55+
; CHECK-NEXT: [[V3_INSERT1:%.*]] = insertelement <2 x i32> [[V3_INSERT0]], i32 [[V3_LOAD1]], i32 1
56+
; CHECK-NEXT: store i32 [[V3_LOAD]], ptr [[V3_INDEXJ]], align 4
57+
; CHECK-NEXT: ret void
58+
;
59+
%v0_0 = insertelement <2 x i32> poison, i32 0, i32 0
60+
%v0 = insertelement <2 x i32> %v0_0, i32 0, i32 1
61+
%cond = icmp eq i32 %c, 1
62+
br i1 %cond, label %if, label %else
63+
if:
64+
%v1 = insertelement <2 x i32> %v0, i32 1, i32 %i
65+
br label %exit
66+
else:
67+
%v2 = insertelement <2 x i32> %v0, i32 2, i32 %i
68+
br label %exit
69+
exit:
70+
%v = phi <2 x i32> [ %v1, %if ], [ %v2, %else ]
71+
%v3 = insertelement <2 x i32> %v, i32 3, i32 %j
72+
ret void
73+
}
74+
75+
; Allocas can be reused across insert/extractelement instructions on the same vector
76+
define void @test_alloca_reuse(<3 x i32> %v, i32 %a, i32 %i) {
77+
; CHECK-LABEL: define void @test_alloca_reuse(
78+
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
1079
; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4
11-
; CHECK-NEXT: [[EE2_ALLOCA:%.*]] = alloca [3 x i32], align 4
12-
; CHECK-NEXT: [[EE2_EXTRACT:%.*]] = extractelement <3 x i32> [[V2]], i64 0
13-
; CHECK-NEXT: [[EE2_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 0
14-
; CHECK-NEXT: store i32 [[EE2_EXTRACT]], ptr [[EE2_INDEX]], align 4
15-
; CHECK-NEXT: [[EE2_EXTRACT10:%.*]] = extractelement <3 x i32> [[V2]], i64 1
16-
; CHECK-NEXT: [[EE2_INDEX11:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 1
17-
; CHECK-NEXT: store i32 [[EE2_EXTRACT10]], ptr [[EE2_INDEX11]], align 4
18-
; CHECK-NEXT: [[EE2_EXTRACT12:%.*]] = extractelement <3 x i32> [[V2]], i64 2
19-
; CHECK-NEXT: [[EE2_INDEX13:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 2
20-
; CHECK-NEXT: store i32 [[EE2_EXTRACT12]], ptr [[EE2_INDEX13]], align 4
21-
; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V1]], i64 0
80+
; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0
2281
; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0
2382
; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4
24-
; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V1]], i64 1
83+
; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1
2584
; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1
2685
; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4
27-
; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V1]], i64 2
86+
; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2
2887
; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2
2988
; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4
30-
; CHECK-NEXT: br label %[[BODY:.*]]
31-
; CHECK: [[BODY]]:
3289
; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
3390
; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4
3491
; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
92+
; CHECK-NEXT: [[IE1_LOAD1:%.*]] = load i32, ptr [[IE1_DYNINDEX]], align 4
3593
; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4
3694
; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4
3795
; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0
3896
; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4
3997
; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1
4098
; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4
4199
; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2
42-
; CHECK-NEXT: [[EE2_INDEX14:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 [[J]]
43-
; CHECK-NEXT: [[EE2_LOAD:%.*]] = load i32, ptr [[EE2_INDEX14]], align 4
100+
; CHECK-NEXT: store i32 [[IE1_LOAD1]], ptr [[IE1_DYNINDEX]], align 4
44101
; CHECK-NEXT: ret void
45102
;
46-
%al = alloca [3 x i32], align 4
47-
br label %body
48-
body:
49-
%ee1 = extractelement <3 x i32> %v1, i32 %i
50-
%ie1 = insertelement <3 x i32> %v1, i32 %a, i32 %i
51-
%ee2 = extractelement <3 x i32> %v2, i32 %j
103+
%ee1 = extractelement <3 x i32> %v, i32 %i
104+
%ie1 = insertelement <3 x i32> %v, i32 %a, i32 %i
52105
ret void
53106
}
54107

@@ -90,13 +143,15 @@ define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
90143
; CHECK-NEXT: [[IE_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 2
91144
; CHECK-NEXT: store i32 [[IE_EXTRACT3]], ptr [[IE_INDEX4]], align 4
92145
; CHECK-NEXT: [[IE_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 [[I]]
146+
; CHECK-NEXT: [[IE_LOAD1:%.*]] = load i32, ptr [[IE_DYNINDEX]], align 4
93147
; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX]], align 4
94148
; CHECK-NEXT: [[IE_LOAD:%.*]] = load i32, ptr [[IE_INDEX]], align 4
95149
; CHECK-NEXT: [[IE_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD]], i32 0
96150
; CHECK-NEXT: [[IE_LOAD5:%.*]] = load i32, ptr [[IE_INDEX2]], align 4
97151
; CHECK-NEXT: [[IE_INSERT6:%.*]] = insertelement <3 x i32> [[IE_INSERT]], i32 [[IE_LOAD5]], i32 1
98152
; CHECK-NEXT: [[IE_LOAD7:%.*]] = load i32, ptr [[IE_INDEX4]], align 4
99153
; CHECK-NEXT: [[IE_INSERT8:%.*]] = insertelement <3 x i32> [[IE_INSERT6]], i32 [[IE_LOAD7]], i32 2
154+
; CHECK-NEXT: store i32 [[IE_LOAD1]], ptr [[IE_DYNINDEX]], align 4
100155
; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8]]
101156
;
102157
%ie = insertelement <3 x i32> %v, i32 %a, i32 %i

0 commit comments

Comments
 (0)