Skip to content

Commit b41d8a9

Browse files
authored
Fix LoopDeletion incorrectly updating PHI with multiple duplicate inputs (microsoft#6643)
LoopDeletion was incorrectly updating PHI nodes in the target block when it had duplicate input edges. This happens, for example, when deleting a loop that uses a switch with multiple cases that exit the same way. After determining that this was the bug, I found this fix in LLVM: https://reviews.llvm.org/D34516 and applied it here.
1 parent a6f4025 commit b41d8a9

File tree

2 files changed

+249
-6
lines changed

2 files changed

+249
-6
lines changed

lib/Transforms/Scalar/LoopDeletion.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,29 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
195195

196196
// Rewrite phis in the exit block to get their inputs from
197197
// the preheader instead of the exiting block.
198-
BasicBlock *exitingBlock = exitingBlocks[0];
199198
BasicBlock::iterator BI = exitBlock->begin();
200199
while (PHINode *P = dyn_cast<PHINode>(BI)) {
201-
int j = P->getBasicBlockIndex(exitingBlock);
202-
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
203-
P->setIncomingBlock(j, preheader);
204-
for (unsigned i = 1; i < exitingBlocks.size(); ++i)
205-
P->removeIncomingValue(exitingBlocks[i]);
200+
// HLSL Change begin - apply https://reviews.llvm.org/D34516
201+
// Set the zero'th element of Phi to be from the preheader and remove all
202+
// other incoming values. Given the loop has dedicated exits, all other
203+
// incoming values must be from the exiting blocks.
204+
int PredIndex = 0;
205+
P->setIncomingBlock(PredIndex, preheader);
206+
// Removes all incoming values from all other exiting blocks (including
207+
// duplicate values from an exiting block).
208+
// Nuke all entries except the zero'th entry which is the preheader entry.
209+
// NOTE! We need to remove Incoming Values in the reverse order as done
210+
// below, to keep the indices valid for deletion (removeIncomingValues
211+
// updates getNumIncomingValues and shifts all values down into the operand
212+
// being deleted).
213+
for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
214+
P->removeIncomingValue(e - i, false);
215+
216+
assert((P->getNumIncomingValues() == 1 &&
217+
P->getIncomingBlock(PredIndex) == preheader) &&
218+
"Should have exactly one value and that's from the preheader!");
206219
++BI;
220+
// HLSL Change end
207221
}
208222

209223
// Update the dominator tree and remove the instructions and blocks that will
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -dxil-loop-deletion,NoSink=0 -S | FileCheck %s
2+
3+
; This test was generated from the following HLSL:
4+
;
5+
; cbuffer cbuffer_g : register(b0) {
6+
; uint4 gu4[1];
7+
; };
8+
;
9+
; float4 f() {
10+
; float4 r = float4(0.0f, 0.0f, 0.0f, 0.0f);
11+
; int i = 0;
12+
; int j = 0;
13+
; while (true) {
14+
; float a = asfloat(gu4[0].y);
15+
; int ai = int(a);
16+
; bool b = (j < ai);
17+
; if (j >= ai) {
18+
; break;
19+
; }
20+
; bool c = (i > 0);
21+
; if (c) {
22+
; break;
23+
; } else {
24+
; bool3 b3 = bool3(b.xxx);
25+
; if (b3[i]) {
26+
; switch(i) {
27+
; case 0: return r;
28+
; case -1: return r;
29+
; }
30+
; if (c) {
31+
; break;
32+
; }
33+
; } else {
34+
; r = float4(0.0f, 0.0f, 0.0f, a);
35+
; }
36+
; }
37+
; i = j;
38+
; j = (j + 1);
39+
; }
40+
; r = (0.0f).xxxx;
41+
; return r;
42+
; }
43+
;
44+
; struct return_val {
45+
; float4 value : SV_Target0;
46+
; };
47+
;
48+
; return_val main() {
49+
; float4 inner_result = f();
50+
; return_val wrapper_result = (return_val)0;
51+
; wrapper_result.value = inner_result;
52+
; return wrapper_result;
53+
; }
54+
;
55+
; When compiling the above with dxc, ASAN reported a use-after-free in simplifycfg,
56+
; which originated from a delete during the dxil-loop-deletion pass. This was due
57+
; to a bug in LoopDeletion::runOnLoop that did not properly handle updated PHIs
58+
; with duplicate input preds. After this test runs, the loop should be deleted,
59+
; and the program optimized to simply write out 0s to the cbuffer.
60+
61+
; CHECK: define void @main
62+
; CHECK-NEXT: entry:
63+
; CHECK-NEXT: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
64+
; CHECK-NEXT: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
65+
; CHECK-NEXT: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)
66+
; CHECK-NEXT: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)
67+
; CHECK-NEXT: ret void
68+
69+
;
70+
; Output signature:
71+
;
72+
; Name Index InterpMode DynIdx
73+
; -------------------- ----- ---------------------- ------
74+
; SV_Target 0
75+
;
76+
; Buffer Definitions:
77+
;
78+
; cbuffer cbuffer_g
79+
; {
80+
;
81+
; struct cbuffer_g
82+
; {
83+
;
84+
; uint4 gu4[1]; ; Offset: 0
85+
;
86+
; } cbuffer_g; ; Offset: 0 Size: 16
87+
;
88+
; }
89+
;
90+
;
91+
; Resource Bindings:
92+
;
93+
; Name Type Format Dim ID HLSL Bind Count
94+
; ------------------------------ ---------- ------- ----------- ------- -------------- ------
95+
; cbuffer_g cbuffer NA NA CB0 cb0 1
96+
;
97+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
98+
target triple = "dxil-ms-dx"
99+
100+
%cbuffer_g = type { [1 x <4 x i32>] }
101+
%dx.types.Handle = type { i8* }
102+
%dx.types.ResourceProperties = type { i32, i32 }
103+
%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
104+
%struct.return_val = type { <4 x float> }
105+
106+
@cbuffer_g = external constant %cbuffer_g
107+
@.hca = internal unnamed_addr constant [3 x i32] [i32 1, i32 1, i32 1]
108+
@llvm.used = appending global [1 x i8*] [i8* bitcast (%cbuffer_g* @cbuffer_g to i8*)], section "llvm.metadata"
109+
110+
; Function Attrs: nounwind readnone
111+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %cbuffer_g*, i32)"(i32, %cbuffer_g*, i32) #0
112+
113+
; Function Attrs: nounwind readnone
114+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %cbuffer_g)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %cbuffer_g) #0
115+
116+
; Function Attrs: nounwind
117+
define void @main(<4 x float>* noalias nocapture readnone) #1 {
118+
entry:
119+
%1 = load %cbuffer_g, %cbuffer_g* @cbuffer_g, align 4, !dbg !25 ; line:45 col:25
120+
%cbuffer_g = call %dx.types.Handle @dx.op.createHandleForLib.cbuffer_g(i32 160, %cbuffer_g %1), !dbg !25 ; line:45 col:25 ; CreateHandleForLib(Resource)
121+
%2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %cbuffer_g, %dx.types.ResourceProperties { i32 13, i32 16 }), !dbg !25 ; line:45 col:25 ; AnnotateHandle(res,props) resource: CBuffer
122+
%3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %2, i32 0), !dbg !29 ; line:10 col:23 ; CBufferLoadLegacy(handle,regIndex)
123+
%4 = extractvalue %dx.types.CBufRet.i32 %3, 1, !dbg !29 ; line:10 col:23
124+
%5 = bitcast i32 %4 to float, !dbg !32 ; line:10 col:15
125+
%conv.i.6 = fptosi float %5 to i32, !dbg !33 ; line:11 col:18
126+
%cmp1.i.8 = icmp sgt i32 %conv.i.6, 0, !dbg !34 ; line:13 col:11
127+
br i1 %cmp1.i.8, label %if.end.i, label %"\01?f@@YA?AV?$vector@M$03@@XZ.exit", !dbg !35 ; line:13 col:9
128+
129+
if.end.i: ; preds = %entry, %if.end.19.i
130+
%6 = phi float [ %9, %if.end.19.i ], [ %5, %entry ]
131+
%j.i.011 = phi i32 [ %add.i, %if.end.19.i ], [ 0, %entry ]
132+
%r.i.0.i310 = phi float [ %r.i.0.i310, %if.end.19.i ], [ 0.000000e+00, %entry ]
133+
%i.i.09 = phi i32 [ %j.i.011, %if.end.19.i ], [ 0, %entry ]
134+
%cmp4.i = icmp sgt i32 %i.i.09, 0, !dbg !36 ; line:16 col:17
135+
br i1 %cmp4.i, label %"\01?f@@YA?AV?$vector@M$03@@XZ.exit", label %if.then.12.i, !dbg !37 ; line:17 col:9
136+
137+
if.then.12.i: ; preds = %if.end.i
138+
switch i32 %i.i.09, label %if.end.19.i [
139+
i32 0, label %"\01?f@@YA?AV?$vector@M$03@@XZ.exit"
140+
i32 -1, label %"\01?f@@YA?AV?$vector@M$03@@XZ.exit"
141+
], !dbg !38 ; line:22 col:9
142+
143+
if.end.19.i: ; preds = %if.then.12.i
144+
%add.i = add nuw nsw i32 %j.i.011, 1, !dbg !39 ; line:34 col:12
145+
%7 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %2, i32 0), !dbg !29 ; line:10 col:23 ; CBufferLoadLegacy(handle,regIndex)
146+
%8 = extractvalue %dx.types.CBufRet.i32 %7, 1, !dbg !29 ; line:10 col:23
147+
%9 = bitcast i32 %8 to float, !dbg !32 ; line:10 col:15
148+
%conv.i = fptosi float %9 to i32, !dbg !33 ; line:11 col:18
149+
%cmp.i = icmp slt i32 %add.i, %conv.i, !dbg !40 ; line:12 col:17
150+
br i1 %cmp.i, label %if.end.i, label %"\01?f@@YA?AV?$vector@M$03@@XZ.exit", !dbg !35 ; line:13 col:9
151+
152+
"\01?f@@YA?AV?$vector@M$03@@XZ.exit": ; preds = %if.then.12.i, %if.then.12.i, %if.end.i, %if.end.19.i, %entry
153+
%retval.i.0.i3 = phi float [ 0.000000e+00, %entry ], [ %r.i.0.i310, %if.then.12.i ], [ %r.i.0.i310, %if.then.12.i ], [ 0.000000e+00, %if.end.i ], [ 0.000000e+00, %if.end.19.i ]
154+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00), !dbg !41 ; line:48 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
155+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00), !dbg !41 ; line:48 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
156+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00), !dbg !41 ; line:48 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
157+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %retval.i.0.i3), !dbg !41 ; line:48 col:10 ; StoreOutput(outputSigId,rowIndex,colIndex,value)
158+
ret void, !dbg !42 ; line:48 col:3
159+
}
160+
161+
; Function Attrs: nounwind
162+
declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
163+
164+
; Function Attrs: nounwind readonly
165+
declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #2
166+
167+
; Function Attrs: nounwind readonly
168+
declare %dx.types.Handle @dx.op.createHandleForLib.cbuffer_g(i32, %cbuffer_g) #2
169+
170+
; Function Attrs: nounwind readnone
171+
declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
172+
173+
attributes #0 = { nounwind readnone }
174+
attributes #1 = { nounwind }
175+
attributes #2 = { nounwind readonly }
176+
177+
!llvm.module.flags = !{!0}
178+
!pauseresume = !{!1}
179+
!llvm.ident = !{!2}
180+
!dx.version = !{!3}
181+
!dx.valver = !{!4}
182+
!dx.shaderModel = !{!5}
183+
!dx.resources = !{!6}
184+
!dx.typeAnnotations = !{!9, !14}
185+
!dx.entryPoints = !{!21}
186+
187+
!0 = !{i32 2, !"Debug Info Version", i32 3}
188+
!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
189+
!2 = !{!"dxc(private) 1.8.0.4514 (d9bd2a706-dirty)"}
190+
!3 = !{i32 1, i32 5}
191+
!4 = !{i32 1, i32 8}
192+
!5 = !{!"ps", i32 6, i32 5}
193+
!6 = !{null, null, !7, null}
194+
!7 = !{!8}
195+
!8 = !{i32 0, %cbuffer_g* @cbuffer_g, !"cbuffer_g", i32 0, i32 0, i32 1, i32 16, null}
196+
!9 = !{i32 0, %struct.return_val undef, !10, %cbuffer_g undef, !12}
197+
!10 = !{i32 16, !11}
198+
!11 = !{i32 6, !"value", i32 3, i32 0, i32 4, !"SV_Target0", i32 7, i32 9}
199+
!12 = !{i32 16, !13}
200+
!13 = !{i32 6, !"gu4", i32 3, i32 0, i32 7, i32 5}
201+
!14 = !{i32 1, void (<4 x float>*)* @main, !15}
202+
!15 = !{!16, !18}
203+
!16 = !{i32 0, !17, !17}
204+
!17 = !{}
205+
!18 = !{i32 1, !19, !20}
206+
!19 = !{i32 4, !"SV_Target0", i32 7, i32 9}
207+
!20 = !{i32 0}
208+
!21 = !{void (<4 x float>*)* @main, !"main", !22, !6, null}
209+
!22 = !{null, !23, null}
210+
!23 = !{!24}
211+
!24 = !{i32 0, !"SV_Target", i8 9, i8 16, !20, i8 0, i32 1, i8 4, i32 0, i8 0, null}
212+
!25 = !DILocation(line: 45, column: 25, scope: !26)
213+
!26 = !DISubprogram(name: "main", scope: !27, file: !27, line: 44, type: !28, isLocal: false, isDefinition: true, scopeLine: 44, flags: DIFlagPrototyped, isOptimized: false, function: void (<4 x float>*)* @main)
214+
!27 = !DIFile(filename: "/mnt/c/Users/amaiorano/Downloads/340196361/standalone_reduced.hlsl", directory: "")
215+
!28 = !DISubroutineType(types: !17)
216+
!29 = !DILocation(line: 10, column: 23, scope: !30, inlinedAt: !31)
217+
!30 = !DISubprogram(name: "f", scope: !27, file: !27, line: 5, type: !28, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false)
218+
!31 = distinct !DILocation(line: 45, column: 25, scope: !26)
219+
!32 = !DILocation(line: 10, column: 15, scope: !30, inlinedAt: !31)
220+
!33 = !DILocation(line: 11, column: 18, scope: !30, inlinedAt: !31)
221+
!34 = !DILocation(line: 13, column: 11, scope: !30, inlinedAt: !31)
222+
!35 = !DILocation(line: 13, column: 9, scope: !30, inlinedAt: !31)
223+
!36 = !DILocation(line: 16, column: 17, scope: !30, inlinedAt: !31)
224+
!37 = !DILocation(line: 17, column: 9, scope: !30, inlinedAt: !31)
225+
!38 = !DILocation(line: 22, column: 9, scope: !30, inlinedAt: !31)
226+
!39 = !DILocation(line: 34, column: 12, scope: !30, inlinedAt: !31)
227+
!40 = !DILocation(line: 12, column: 17, scope: !30, inlinedAt: !31)
228+
!41 = !DILocation(line: 48, column: 10, scope: !26)
229+
!42 = !DILocation(line: 48, column: 3, scope: !26)

0 commit comments

Comments
 (0)