Skip to content

Commit f810e92

Browse files
authored
scalarrepl-param-hlsl: fix zero replacement in entry (microsoft#6855)
This is the same fix as was done in microsoft#6516 except that this is for replace uses of zero-init for instructions in the entry block.
1 parent 06da989 commit f810e92

File tree

3 files changed

+250
-1
lines changed

3 files changed

+250
-1
lines changed

lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3728,7 +3728,9 @@ static bool ReplaceUseOfZeroInitEntry(Instruction *I, Value *V) {
37283728
// I is the last inst in the block after split.
37293729
// Any inst in current block is before I.
37303730
if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
3731-
LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
3731+
// Replace uses of the load with a constant zero.
3732+
Constant *replacement = Constant::getNullValue(LI->getType());
3733+
LI->replaceAllUsesWith(replacement);
37323734
LI->eraseFromParent();
37333735
continue;
37343736
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
2+
3+
; The pass replaces a memcpy from a zero-initialized global that does not have an
4+
; intervening store in the entry block.
5+
; When tracing through geps and bitcasts of uses of that global, the algorithm might
6+
; bottom out at replacing a load of a scalar float. Verify this works.
7+
8+
; In the following code, %1 should be replaced by int 0
9+
; %1 = load float, float* %arrayidx,...
10+
; It only has one use: being stored to one of the elements of @arr_var
11+
12+
; CHECK-LABEL: entry:
13+
; CHECK: [[DEST:%[a-z0-9\.]+]] = getelementptr inbounds [4 x float], [4 x float]* %zero_arr, i32 0, i32 0
14+
; CHECK-NEXT: store float 0.000000e+00, float* [[DEST]]
15+
16+
; Generated from compiling the following HLSL:
17+
; static int arr_var[4] = (int[4])0;
18+
;
19+
; [numthreads(1, 1, 1)]
20+
; void main() {
21+
; int i32_var = 0;
22+
; int f32_var = arr_var[i32_var];
23+
; int zero_arr[4] = (int[4])0;
24+
; arr_var = zero_arr;
25+
; }
26+
27+
;
28+
; Buffer Definitions:
29+
;
30+
; cbuffer $Globals
31+
; {
32+
;
33+
; [0 x i8] (type annotation not present)
34+
;
35+
; }
36+
;
37+
;
38+
; Resource Bindings:
39+
;
40+
; Name Type Format Dim ID HLSL Bind Count
41+
; ------------------------------ ---------- ------- ----------- ------- -------------- ------
42+
; $Globals cbuffer NA NA CB0 cb4294967295 1
43+
;
44+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
45+
target triple = "dxil-ms-dx"
46+
47+
%ConstantBuffer = type opaque
48+
49+
@arr_var = internal global [4 x float] zeroinitializer, align 4
50+
@"$Globals" = external constant %ConstantBuffer
51+
52+
; Function Attrs: nounwind
53+
define void @main() #0 {
54+
entry:
55+
%i32_var = alloca i32, align 4
56+
%f32_var = alloca float, align 4
57+
%zero_arr = alloca [4 x float], align 4
58+
store i32 0, i32* %i32_var, align 4, !dbg !17, !tbaa !21 ; line:5 col:7
59+
%0 = load i32, i32* %i32_var, align 4, !dbg !25, !tbaa !21 ; line:6 col:27
60+
%arrayidx = getelementptr inbounds [4 x float], [4 x float]* @arr_var, i32 0, i32 %0, !dbg !26 ; line:6 col:19
61+
%1 = load float, float* %arrayidx, align 4, !dbg !26, !tbaa !27 ; line:6 col:19
62+
store float %1, float* %f32_var, align 4, !dbg !29, !tbaa !27 ; line:6 col:9
63+
%2 = getelementptr inbounds [4 x float], [4 x float]* %zero_arr, i32 0, i32 0, !dbg !30 ; line:7 col:33
64+
store float 0.000000e+00, float* %2, !dbg !30 ; line:7 col:33
65+
%3 = getelementptr inbounds [4 x float], [4 x float]* %zero_arr, i32 0, i32 1, !dbg !30 ; line:7 col:33
66+
store float 0.000000e+00, float* %3, !dbg !30 ; line:7 col:33
67+
%4 = getelementptr inbounds [4 x float], [4 x float]* %zero_arr, i32 0, i32 2, !dbg !30 ; line:7 col:33
68+
store float 0.000000e+00, float* %4, !dbg !30 ; line:7 col:33
69+
%5 = getelementptr inbounds [4 x float], [4 x float]* %zero_arr, i32 0, i32 3, !dbg !30 ; line:7 col:33
70+
store float 0.000000e+00, float* %5, !dbg !30 ; line:7 col:33
71+
%6 = bitcast [4 x float]* @arr_var to i8*, !dbg !31 ; line:8 col:13
72+
%7 = bitcast [4 x float]* %zero_arr to i8*, !dbg !31 ; line:8 col:13
73+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 16, i32 1, i1 false), !dbg !31 ; line:8 col:13
74+
ret void, !dbg !32 ; line:9 col:1
75+
}
76+
77+
; Function Attrs: nounwind
78+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
79+
80+
attributes #0 = { nounwind }
81+
82+
!llvm.module.flags = !{!0}
83+
!pauseresume = !{!1}
84+
!llvm.ident = !{!2}
85+
!dx.version = !{!3}
86+
!dx.valver = !{!4}
87+
!dx.shaderModel = !{!5}
88+
!dx.typeAnnotations = !{!6}
89+
!dx.entryPoints = !{!10}
90+
!dx.fnprops = !{!14}
91+
!dx.options = !{!15, !16}
92+
93+
!0 = !{i32 2, !"Debug Info Version", i32 3}
94+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
95+
!2 = !{!"dxc(private) 1.8.0.4666 (759e9e1da-dirty)"}
96+
!3 = !{i32 1, i32 0}
97+
!4 = !{i32 1, i32 8}
98+
!5 = !{!"cs", i32 6, i32 0}
99+
!6 = !{i32 1, void ()* @main, !7}
100+
!7 = !{!8}
101+
!8 = !{i32 1, !9, !9}
102+
!9 = !{}
103+
!10 = !{void ()* @main, !"main", null, !11, null}
104+
!11 = !{null, null, !12, null}
105+
!12 = !{!13}
106+
!13 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
107+
!14 = !{void ()* @main, i32 5, i32 1, i32 1, i32 1}
108+
!15 = !{i32 64}
109+
!16 = !{i32 -1}
110+
!17 = !DILocation(line: 5, column: 7, scope: !18)
111+
!18 = !DISubprogram(name: "main", scope: !19, file: !19, line: 4, type: !20, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main)
112+
!19 = !DIFile(filename: "/mnt/c/Users/amaiorano/Downloads/356423093/356423093_reduced.hlsl", directory: "")
113+
!20 = !DISubroutineType(types: !9)
114+
!21 = !{!22, !22, i64 0}
115+
!22 = !{!"int", !23, i64 0}
116+
!23 = !{!"omnipotent char", !24, i64 0}
117+
!24 = !{!"Simple C/C++ TBAA"}
118+
!25 = !DILocation(line: 6, column: 27, scope: !18)
119+
!26 = !DILocation(line: 6, column: 19, scope: !18)
120+
!27 = !{!28, !28, i64 0}
121+
!28 = !{!"float", !23, i64 0}
122+
!29 = !DILocation(line: 6, column: 9, scope: !18)
123+
!30 = !DILocation(line: 7, column: 33, scope: !18)
124+
!31 = !DILocation(line: 8, column: 13, scope: !18)
125+
!32 = !DILocation(line: 9, column: 1, scope: !18)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
2+
3+
; The pass replaces a memcpy from a zero-initialized global that does not have an intervening store.
4+
; When tracing through geps and bitcasts of uses of that global, the algorithm might
5+
; bottom out at replacing a load of a scalar float. Verify this works.
6+
7+
; In the following code, %1 should be replaced by int 0
8+
; %1 = load i32, i32* %arrayidx,...
9+
; It only has one use: being stored to one of the elements of @g_1
10+
11+
; CHECK-LABEL: entry:
12+
; CHECK: [[DEST:%[a-z0-9\.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* %zero_arr, i32 0, i32 0
13+
; CHECK-NEXT: store i32 0, i32* [[DEST]]
14+
15+
; Generated from compiling the following HLSL:
16+
; static int arr_var[4] = (int[4])0;
17+
;
18+
; [numthreads(1, 1, 1)]
19+
; void main() {
20+
; int i32_var = 0;
21+
; int f32_var = arr_var[i32_var];
22+
; int zero_arr[4] = (int[4])0;
23+
; arr_var = zero_arr;
24+
; }
25+
26+
;
27+
; Buffer Definitions:
28+
;
29+
; cbuffer $Globals
30+
; {
31+
;
32+
; [0 x i8] (type annotation not present)
33+
;
34+
; }
35+
;
36+
;
37+
; Resource Bindings:
38+
;
39+
; Name Type Format Dim ID HLSL Bind Count
40+
; ------------------------------ ---------- ------- ----------- ------- -------------- ------
41+
; $Globals cbuffer NA NA CB0 cb4294967295 1
42+
;
43+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
44+
target triple = "dxil-ms-dx"
45+
46+
%ConstantBuffer = type opaque
47+
48+
@arr_var = internal global [4 x i32] zeroinitializer, align 4
49+
@"$Globals" = external constant %ConstantBuffer
50+
51+
; Function Attrs: nounwind
52+
define void @main() #0 {
53+
entry:
54+
%i32_var = alloca i32, align 4
55+
%f32_var = alloca i32, align 4
56+
%zero_arr = alloca [4 x i32], align 4
57+
store i32 0, i32* %i32_var, align 4, !dbg !17, !tbaa !21 ; line:16 col:7
58+
%0 = load i32, i32* %i32_var, align 4, !dbg !25, !tbaa !21 ; line:17 col:25
59+
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* @arr_var, i32 0, i32 %0, !dbg !26 ; line:17 col:17
60+
%1 = load i32, i32* %arrayidx, align 4, !dbg !26, !tbaa !21 ; line:17 col:17
61+
store i32 %1, i32* %f32_var, align 4, !dbg !27, !tbaa !21 ; line:17 col:7
62+
%2 = getelementptr inbounds [4 x i32], [4 x i32]* %zero_arr, i32 0, i32 0, !dbg !28 ; line:18 col:29
63+
store i32 0, i32* %2, !dbg !28 ; line:18 col:29
64+
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %zero_arr, i32 0, i32 1, !dbg !28 ; line:18 col:29
65+
store i32 0, i32* %3, !dbg !28 ; line:18 col:29
66+
%4 = getelementptr inbounds [4 x i32], [4 x i32]* %zero_arr, i32 0, i32 2, !dbg !28 ; line:18 col:29
67+
store i32 0, i32* %4, !dbg !28 ; line:18 col:29
68+
%5 = getelementptr inbounds [4 x i32], [4 x i32]* %zero_arr, i32 0, i32 3, !dbg !28 ; line:18 col:29
69+
store i32 0, i32* %5, !dbg !28 ; line:18 col:29
70+
%6 = bitcast [4 x i32]* @arr_var to i8*, !dbg !29 ; line:19 col:13
71+
%7 = bitcast [4 x i32]* %zero_arr to i8*, !dbg !29 ; line:19 col:13
72+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 16, i32 1, i1 false), !dbg !29 ; line:19 col:13
73+
ret void, !dbg !30 ; line:20 col:1
74+
}
75+
76+
; Function Attrs: nounwind
77+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
78+
79+
attributes #0 = { nounwind }
80+
81+
!llvm.module.flags = !{!0}
82+
!pauseresume = !{!1}
83+
!llvm.ident = !{!2}
84+
!dx.version = !{!3}
85+
!dx.valver = !{!4}
86+
!dx.shaderModel = !{!5}
87+
!dx.typeAnnotations = !{!6}
88+
!dx.entryPoints = !{!10}
89+
!dx.fnprops = !{!14}
90+
!dx.options = !{!15, !16}
91+
92+
!0 = !{i32 2, !"Debug Info Version", i32 3}
93+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
94+
!2 = !{!"dxc(private) 1.8.0.4666 (759e9e1da-dirty)"}
95+
!3 = !{i32 1, i32 0}
96+
!4 = !{i32 1, i32 8}
97+
!5 = !{!"cs", i32 6, i32 0}
98+
!6 = !{i32 1, void ()* @main, !7}
99+
!7 = !{!8}
100+
!8 = !{i32 1, !9, !9}
101+
!9 = !{}
102+
!10 = !{void ()* @main, !"main", null, !11, null}
103+
!11 = !{null, null, !12, null}
104+
!12 = !{!13}
105+
!13 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
106+
!14 = !{void ()* @main, i32 5, i32 1, i32 1, i32 1}
107+
!15 = !{i32 64}
108+
!16 = !{i32 -1}
109+
!17 = !DILocation(line: 16, column: 7, scope: !18)
110+
!18 = !DISubprogram(name: "main", scope: !19, file: !19, line: 15, type: !20, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main)
111+
!19 = !DIFile(filename: "/mnt/c/Users/amaiorano/Downloads/356423093/356423093_reduced.hlsl", directory: "")
112+
!20 = !DISubroutineType(types: !9)
113+
!21 = !{!22, !22, i64 0}
114+
!22 = !{!"int", !23, i64 0}
115+
!23 = !{!"omnipotent char", !24, i64 0}
116+
!24 = !{!"Simple C/C++ TBAA"}
117+
!25 = !DILocation(line: 17, column: 25, scope: !18)
118+
!26 = !DILocation(line: 17, column: 17, scope: !18)
119+
!27 = !DILocation(line: 17, column: 7, scope: !18)
120+
!28 = !DILocation(line: 18, column: 29, scope: !18)
121+
!29 = !DILocation(line: 19, column: 13, scope: !18)
122+
!30 = !DILocation(line: 20, column: 1, scope: !18)

0 commit comments

Comments
 (0)