Skip to content

Commit a1b945c

Browse files
authored
Loop exit restructurizer: don't iterate over uses while mutating them (microsoft#6644)
The SkipBlockWithBranch function does the following: - Splits the block into three blocks with an if-then-endif structure. - Moves most instructions from the original block into the "then" block - If any of those values are used outside the original block, they are propagated through newly-constructed phis in the 'endif' block. This algorithm had a bug where the uses of a value were being scanned while the uses were also being updated. In some cases a downstream out-of-block use could be skipped. That results in an invalid module because now the original definition is now in the 'then' block, which does not dominate the downstream out-of-block use. Add a test that demonstrates the problem.
1 parent b41d8a9 commit a1b945c

File tree

2 files changed

+271
-12
lines changed

2 files changed

+271
-12
lines changed

lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -322,24 +322,26 @@ static void SkipBlockWithBranch(BasicBlock *bb, Value *cond, Loop *L,
322322
BranchInst::Create(end, body, cond, bb);
323323

324324
for (Instruction &inst : *body) {
325-
PHINode *phi = nullptr;
326325

327326
// For each user that's outside of 'body', replace its use of 'inst' with a
328327
// phi created in 'end'
329-
for (auto it = inst.user_begin(); it != inst.user_end();) {
330-
Instruction *user_inst = cast<Instruction>(*(it++));
331-
if (user_inst == phi)
332-
continue;
328+
SmallPtrSet<Instruction *, 8> users_in_other_blocks;
329+
for (auto *user : inst.users()) {
330+
Instruction *user_inst = cast<Instruction>(user);
333331
if (user_inst->getParent() != body) {
334-
if (!phi) {
335-
phi = PHINode::Create(inst.getType(), 2, "", &*end->begin());
336-
phi->addIncoming(GetDefaultValue(inst.getType()), bb);
337-
phi->addIncoming(&inst, body);
338-
}
332+
users_in_other_blocks.insert(user_inst);
333+
}
334+
}
335+
if (users_in_other_blocks.size() > 0) {
336+
auto *phi = PHINode::Create(inst.getType(), 2, "", &*end->begin());
337+
phi->addIncoming(GetDefaultValue(inst.getType()), bb);
338+
phi->addIncoming(&inst, body);
339+
340+
for (auto *user_inst : users_in_other_blocks) {
339341
user_inst->replaceUsesOfWith(&inst, phi);
340342
}
341-
} // For each user of inst of body
342-
} // For each inst in body
343+
}
344+
} // For each inst in body
343345

344346
L->addBasicBlockToLoop(body, *LI);
345347
L->addBasicBlockToLoop(end, *LI);
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -loop-unroll,StructurizeLoopExits=1 -S | FileCheck %s
2+
3+
; The Loop exit structurizer will wrap the definition of %DerivFineX3 in a conditional block.
4+
; Its value will later be propagated into a phi, and that phi replaces all further uses
5+
; of %DerivFineX3.
6+
;
7+
; Tests that a bug is fixed where the code used to iterate through the uses of a value
8+
; while also updating those uses. The old code would fail to update the definition
9+
; of %g.i.2.i3 and the result would be an invalid module: %DerivFineX3 would not dominate
10+
; all its uses.
11+
12+
13+
; CHECK: define void @main
14+
; CHECK-NOT: %DerivFineX3
15+
; CHECK: "\01?f@@YAXXZ.exit.i":
16+
; CHECK-NEXT: br i1 true, label %dx.struct_exit.cond_end, label %dx.struct_exit.cond_body
17+
18+
; CHECK: dx.struct_exit.cond_body:
19+
; CHECK: %DerivFineX3 = call
20+
; CHECK: br label %dx.struct_exit.cond_end
21+
22+
; CHECK: dx.struct_exit.cond_end:
23+
; CHECK: = phi {{.*}} %DerivFineX3
24+
; CHECK: br
25+
; CHECK-NOT: %DerivFineX3
26+
; CHECK: ret void
27+
28+
29+
;
30+
;
31+
; void f() {
32+
; int l_1 = 10;
33+
; for (int l = 0, l_2 = 0; l < 5 && l_2 < 1; l++, l_2++) {
34+
; while (1 < l_1) { }
35+
; }
36+
; }
37+
;
38+
;
39+
; struct tint_symbol {
40+
; float4 value : SV_Target0;
41+
; };
42+
;
43+
; float4 main_inner() {
44+
; float4 g = float4(0.0f, 0.0f, 0.0f, 0.0f);
45+
; bool2 true2 = (true).xx;
46+
; uint2 _e8 = (0u).xx;
47+
; do {
48+
; if (_e8.x != 2u) {
49+
; f();
50+
; float4 _e15 = ddx_fine(g);
51+
; if (_e8[_e8.x] == 2u) {
52+
; g = _e15;
53+
; } else {
54+
; f();
55+
; }
56+
; switch(_e8.x) {
57+
; case 3u: {
58+
; break;
59+
; }
60+
; case 2u: {
61+
; g = _e15;
62+
; break;
63+
; }
64+
; default: {
65+
; g = _e15;
66+
; }
67+
; }
68+
; f();
69+
; }
70+
; } while(!all(true2));
71+
; return g;
72+
;}
73+
;
74+
;tint_symbol main() {
75+
; float4 inner_result = main_inner();
76+
; tint_symbol wrapper_result = (tint_symbol)0;
77+
; wrapper_result.value = inner_result;
78+
; return wrapper_result;
79+
;}
80+
81+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
82+
target triple = "dxil-ms-dx"
83+
84+
%struct.tint_symbol = type { <4 x float> }
85+
86+
; Function Attrs: nounwind
87+
define void @main(<4 x float>* noalias) #0 {
88+
entry:
89+
%1 = alloca [2 x i32], align 4
90+
%2 = getelementptr inbounds [2 x i32], [2 x i32]* %1, i32 0, i32 0, !dbg !20 ; line:17 col:9
91+
store i32 0, i32* %2, align 4, !dbg !20 ; line:17 col:9
92+
%3 = getelementptr inbounds [2 x i32], [2 x i32]* %1, i32 0, i32 1, !dbg !20 ; line:17 col:9
93+
store i32 0, i32* %3, align 4, !dbg !20 ; line:17 col:9
94+
br label %do.body.i, !dbg !26 ; line:18 col:3
95+
96+
do.body.i: ; preds = %do.cond.i, %entry
97+
%g.i.0.i0 = phi float [ 0.000000e+00, %entry ], [ %g.i.3.i0, %do.cond.i ]
98+
%g.i.0.i1 = phi float [ 0.000000e+00, %entry ], [ %g.i.3.i1, %do.cond.i ]
99+
%g.i.0.i2 = phi float [ 0.000000e+00, %entry ], [ %g.i.3.i2, %do.cond.i ]
100+
%g.i.0.i3 = phi float [ 0.000000e+00, %entry ], [ %g.i.3.i3, %do.cond.i ]
101+
%4 = getelementptr inbounds [2 x i32], [2 x i32]* %1, i32 0, i32 0, !dbg !27 ; line:19 col:9
102+
%5 = load i32, i32* %4, align 4, !dbg !27 ; line:19 col:9
103+
%cmp.i = icmp ne i32 %5, 2, !dbg !28 ; line:19 col:15
104+
br i1 %cmp.i, label %for.cond.i.i, label %do.cond.i, !dbg !27 ; line:19 col:9
105+
106+
for.cond.i.i: ; preds = %do.body.i
107+
br i1 true, label %while.cond.i.i.preheader, label %"\01?f@@YAXXZ.exit.i", !dbg !29 ; line:4 col:3
108+
109+
while.cond.i.i.preheader: ; preds = %for.cond.i.i
110+
br label %while.cond.i.i, !dbg !32 ; line:5 col:5
111+
112+
while.cond.i.i: ; preds = %while.cond.i.i.preheader, %while.cond.i.i
113+
br label %while.cond.i.i, !dbg !32 ; line:5 col:5
114+
115+
"\01?f@@YAXXZ.exit.i": ; preds = %for.cond.i.i
116+
%DerivFineX = call float @dx.op.unary.f32(i32 85, float %g.i.0.i0), !dbg !33 ; line:21 col:21 ; DerivFineX(value)
117+
%DerivFineX1 = call float @dx.op.unary.f32(i32 85, float %g.i.0.i1), !dbg !33 ; line:21 col:21 ; DerivFineX(value)
118+
%DerivFineX2 = call float @dx.op.unary.f32(i32 85, float %g.i.0.i2), !dbg !33 ; line:21 col:21 ; DerivFineX(value)
119+
%DerivFineX3 = call float @dx.op.unary.f32(i32 85, float %g.i.0.i3), !dbg !33 ; line:21 col:21 ; DerivFineX(value)
120+
%6 = getelementptr inbounds [2 x i32], [2 x i32]* %1, i32 0, i32 0, !dbg !34 ; line:22 col:15
121+
%7 = load i32, i32* %6, align 4, !dbg !34 ; line:22 col:15
122+
%8 = getelementptr [2 x i32], [2 x i32]* %1, i32 0, i32 %7, !dbg !35 ; line:22 col:11
123+
%9 = load i32, i32* %8, !dbg !35, !tbaa !36 ; line:22 col:11
124+
%cmp6.i = icmp eq i32 %9, 2, !dbg !40 ; line:22 col:22
125+
br i1 %cmp6.i, label %if.end.i, label %for.cond.i.19.i, !dbg !35 ; line:22 col:11
126+
127+
for.cond.i.19.i: ; preds = %"\01?f@@YAXXZ.exit.i"
128+
br i1 true, label %while.cond.i.24.i.preheader, label %if.end.i, !dbg !41 ; line:4 col:3
129+
130+
while.cond.i.24.i.preheader: ; preds = %for.cond.i.19.i
131+
br label %while.cond.i.24.i, !dbg !43 ; line:5 col:5
132+
133+
while.cond.i.24.i: ; preds = %while.cond.i.24.i.preheader, %while.cond.i.24.i
134+
br label %while.cond.i.24.i, !dbg !43 ; line:5 col:5
135+
136+
if.end.i: ; preds = %for.cond.i.19.i, %"\01?f@@YAXXZ.exit.i"
137+
%g.i.1.i0 = phi float [ %DerivFineX, %"\01?f@@YAXXZ.exit.i" ], [ %g.i.0.i0, %for.cond.i.19.i ]
138+
%g.i.1.i1 = phi float [ %DerivFineX1, %"\01?f@@YAXXZ.exit.i" ], [ %g.i.0.i1, %for.cond.i.19.i ]
139+
%g.i.1.i2 = phi float [ %DerivFineX2, %"\01?f@@YAXXZ.exit.i" ], [ %g.i.0.i2, %for.cond.i.19.i ]
140+
%g.i.1.i3 = phi float [ %DerivFineX3, %"\01?f@@YAXXZ.exit.i" ], [ %g.i.0.i3, %for.cond.i.19.i ]
141+
%10 = getelementptr inbounds [2 x i32], [2 x i32]* %1, i32 0, i32 0, !dbg !44 ; line:27 col:14
142+
%11 = load i32, i32* %10, align 4, !dbg !44 ; line:27 col:14
143+
switch i32 %11, label %sw.default.i [
144+
i32 3, label %for.cond.i.5.i
145+
i32 2, label %sw.bb.10.i
146+
], !dbg !45 ; line:27 col:7
147+
148+
sw.bb.10.i: ; preds = %if.end.i
149+
br label %for.cond.i.5.i, !dbg !46 ; line:33 col:11
150+
151+
sw.default.i: ; preds = %if.end.i
152+
br label %for.cond.i.5.i, !dbg !47 ; line:38 col:7
153+
154+
for.cond.i.5.i: ; preds = %if.end.i, %sw.bb.10.i, %sw.default.i
155+
%g.i.2.i0 = phi float [ %DerivFineX, %sw.default.i ], [ %DerivFineX, %sw.bb.10.i ], [ %g.i.1.i0, %if.end.i ]
156+
%g.i.2.i1 = phi float [ %DerivFineX1, %sw.default.i ], [ %DerivFineX1, %sw.bb.10.i ], [ %g.i.1.i1, %if.end.i ]
157+
%g.i.2.i2 = phi float [ %DerivFineX2, %sw.default.i ], [ %DerivFineX2, %sw.bb.10.i ], [ %g.i.1.i2, %if.end.i ]
158+
%g.i.2.i3 = phi float [ %DerivFineX3, %sw.default.i ], [ %DerivFineX3, %sw.bb.10.i ], [ %g.i.1.i3, %if.end.i ]
159+
br i1 true, label %while.cond.i.10.i.preheader, label %do.cond.i, !dbg !48 ; line:4 col:3
160+
161+
while.cond.i.10.i.preheader: ; preds = %for.cond.i.5.i
162+
br label %while.cond.i.10.i, !dbg !50 ; line:5 col:5
163+
164+
while.cond.i.10.i: ; preds = %while.cond.i.10.i.preheader, %while.cond.i.10.i
165+
br label %while.cond.i.10.i, !dbg !50 ; line:5 col:5
166+
167+
do.cond.i: ; preds = %for.cond.i.5.i, %do.body.i
168+
%g.i.3.i0 = phi float [ %g.i.0.i0, %do.body.i ], [ %g.i.2.i0, %for.cond.i.5.i ]
169+
%g.i.3.i1 = phi float [ %g.i.0.i1, %do.body.i ], [ %g.i.2.i1, %for.cond.i.5.i ]
170+
%g.i.3.i2 = phi float [ %g.i.0.i2, %do.body.i ], [ %g.i.2.i2, %for.cond.i.5.i ]
171+
%g.i.3.i3 = phi float [ %g.i.0.i3, %do.body.i ], [ %g.i.2.i3, %for.cond.i.5.i ]
172+
br i1 false, label %do.body.i, label %"\01?main_inner@@YA?AV?$vector@M$03@@XZ.exit", !dbg !51 ; line:41 col:3
173+
174+
"\01?main_inner@@YA?AV?$vector@M$03@@XZ.exit": ; preds = %do.cond.i
175+
%g.i.3.i3.lcssa = phi float [ %g.i.3.i3, %do.cond.i ]
176+
%g.i.3.i2.lcssa = phi float [ %g.i.3.i2, %do.cond.i ]
177+
%g.i.3.i1.lcssa = phi float [ %g.i.3.i1, %do.cond.i ]
178+
%g.i.3.i0.lcssa = phi float [ %g.i.3.i0, %do.cond.i ]
179+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %g.i.3.i0.lcssa), !dbg !52 ; line:49 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
180+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %g.i.3.i1.lcssa), !dbg !52 ; line:49 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
181+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %g.i.3.i2.lcssa), !dbg !52 ; line:49 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
182+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %g.i.3.i3.lcssa), !dbg !52 ; line:49 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
183+
ret void, !dbg !53 ; line:49 col:3
184+
}
185+
186+
; Function Attrs: nounwind
187+
declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
188+
189+
; Function Attrs: nounwind readnone
190+
declare float @dx.op.unary.f32(i32, float) #1
191+
192+
attributes #0 = { nounwind }
193+
attributes #1 = { nounwind readnone }
194+
195+
!llvm.module.flags = !{!0}
196+
!pauseresume = !{!1}
197+
!llvm.ident = !{!2}
198+
!dx.version = !{!3}
199+
!dx.valver = !{!4}
200+
!dx.shaderModel = !{!5}
201+
!dx.typeAnnotations = !{!6, !9}
202+
!dx.entryPoints = !{!16}
203+
204+
!0 = !{i32 2, !"Debug Info Version", i32 3}
205+
!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
206+
!2 = !{!"dxc(private) 1.8.0.14549 (main, 0781ded87-dirty)"}
207+
!3 = !{i32 1, i32 0}
208+
!4 = !{i32 1, i32 8}
209+
!5 = !{!"ps", i32 6, i32 0}
210+
!6 = !{i32 0, %struct.tint_symbol undef, !7}
211+
!7 = !{i32 16, !8}
212+
!8 = !{i32 6, !"value", i32 3, i32 0, i32 4, !"SV_Target0", i32 7, i32 9}
213+
!9 = !{i32 1, void (<4 x float>*)* @main, !10}
214+
!10 = !{!11, !13}
215+
!11 = !{i32 0, !12, !12}
216+
!12 = !{}
217+
!13 = !{i32 1, !14, !15}
218+
!14 = !{i32 4, !"SV_Target0", i32 7, i32 9}
219+
!15 = !{i32 0}
220+
!16 = !{void (<4 x float>*)* @main, !"main", !17, null, null}
221+
!17 = !{null, !18, null}
222+
!18 = !{!19}
223+
!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 4, i32 0, i8 0, null}
224+
!20 = !DILocation(line: 17, column: 9, scope: !21, inlinedAt: !24)
225+
!21 = !DISubprogram(name: "main_inner", scope: !22, file: !22, line: 14, type: !23, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false)
226+
!22 = !DIFile(filename: "s2.hlsl", directory: "")
227+
!23 = !DISubroutineType(types: !12)
228+
!24 = distinct !DILocation(line: 46, column: 25, scope: !25)
229+
!25 = !DISubprogram(name: "main", scope: !22, file: !22, line: 45, type: !23, isLocal: false, isDefinition: true, scopeLine: 45, flags: DIFlagPrototyped, isOptimized: false, function: void (<4 x float>*)* @main)
230+
!26 = !DILocation(line: 18, column: 3, scope: !21, inlinedAt: !24)
231+
!27 = !DILocation(line: 19, column: 9, scope: !21, inlinedAt: !24)
232+
!28 = !DILocation(line: 19, column: 15, scope: !21, inlinedAt: !24)
233+
!29 = !DILocation(line: 4, column: 3, scope: !30, inlinedAt: !31)
234+
!30 = !DISubprogram(name: "f", scope: !22, file: !22, line: 2, type: !23, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false)
235+
!31 = distinct !DILocation(line: 20, column: 7, scope: !21, inlinedAt: !24)
236+
!32 = !DILocation(line: 5, column: 5, scope: !30, inlinedAt: !31)
237+
!33 = !DILocation(line: 21, column: 21, scope: !21, inlinedAt: !24)
238+
!34 = !DILocation(line: 22, column: 15, scope: !21, inlinedAt: !24)
239+
!35 = !DILocation(line: 22, column: 11, scope: !21, inlinedAt: !24)
240+
!36 = !{!37, !37, i64 0}
241+
!37 = !{!"int", !38, i64 0}
242+
!38 = !{!"omnipotent char", !39, i64 0}
243+
!39 = !{!"Simple C/C++ TBAA"}
244+
!40 = !DILocation(line: 22, column: 22, scope: !21, inlinedAt: !24)
245+
!41 = !DILocation(line: 4, column: 3, scope: !30, inlinedAt: !42)
246+
!42 = distinct !DILocation(line: 25, column: 9, scope: !21, inlinedAt: !24)
247+
!43 = !DILocation(line: 5, column: 5, scope: !30, inlinedAt: !42)
248+
!44 = !DILocation(line: 27, column: 14, scope: !21, inlinedAt: !24)
249+
!45 = !DILocation(line: 27, column: 7, scope: !21, inlinedAt: !24)
250+
!46 = !DILocation(line: 33, column: 11, scope: !21, inlinedAt: !24)
251+
!47 = !DILocation(line: 38, column: 7, scope: !21, inlinedAt: !24)
252+
!48 = !DILocation(line: 4, column: 3, scope: !30, inlinedAt: !49)
253+
!49 = distinct !DILocation(line: 39, column: 7, scope: !21, inlinedAt: !24)
254+
!50 = !DILocation(line: 5, column: 5, scope: !30, inlinedAt: !49)
255+
!51 = !DILocation(line: 41, column: 3, scope: !21, inlinedAt: !24)
256+
!52 = !DILocation(line: 49, column: 10, scope: !25)
257+
!53 = !DILocation(line: 49, column: 3, scope: !25)

0 commit comments

Comments
 (0)