|
| 1 | +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s |
| 2 | + |
| 3 | +; The pass replaces a memcpy from a zero-initialized global that does not have an intervening store. |
| 4 | +; When tracing through geps and bitcasts of uses of that global, the algorithm might |
| 5 | +; bottom out at replacing a load of a scalar float. Verify this works. |
| 6 | + |
| 7 | +; In the following code, %2 should be replaced by float 0.0 |
| 8 | +; %2 = load float, float* %src_in_g,... |
| 9 | +; It only has one use: being stored to one of the elements of @g_1 |
| 10 | + |
| 11 | +; CHECK: for.body.i: |
| 12 | +; CHECK: [[DEST:%[a-z0-9\.]+]] = getelementptr inbounds [10 x float], [10 x float]* @g_1, i32 0 |
| 13 | +; CHECK: store float 0.000000e+00, float* [[DEST]] |
| 14 | +; CHECK: end.block: |
| 15 | + |
| 16 | + |
| 17 | +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" |
| 18 | +target triple = "dxil-ms-dx" |
| 19 | + |
| 20 | +%struct.ByteAddressBuffer = type { i32 } |
| 21 | +%ConstantBuffer = type opaque |
| 22 | +%struct.PSOut = type { <4 x float> } |
| 23 | + |
| 24 | +@"\01?g_2@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 |
| 25 | +@g = internal global [10 x float] zeroinitializer, align 4 |
| 26 | +@g_1 = internal global [10 x float] zeroinitializer, align 4 |
| 27 | +@"$Globals" = external constant %ConstantBuffer |
| 28 | + |
| 29 | +; Function Attrs: nounwind |
| 30 | +define void @frag_main(%struct.PSOut* noalias sret %agg.result) #0 { |
| 31 | +entry: |
| 32 | + %i.i = alloca i32, align 4 |
| 33 | + %copy.i = alloca [10 x float], align 4 |
| 34 | + %wrapper_result = alloca %struct.PSOut, align 4 |
| 35 | + store i32 0, i32* %i.i, align 4, !dbg !23, !tbaa !29 ; line:10 col:12 |
| 36 | + br label %for.cond.i, !dbg !33 ; line:10 col:8 |
| 37 | + |
| 38 | +for.cond.i: ; preds = %for.body.i, %entry |
| 39 | + %0 = load i32, i32* %i.i, align 4, !dbg !34, !tbaa !29 ; line:10 col:19 |
| 40 | + %cmp.i = icmp slt i32 %0, 10, !dbg !35 ; line:10 col:21 |
| 41 | + br i1 %cmp.i, label %for.body.i, label %end.block, !dbg !36 ; line:10 col:3 |
| 42 | + |
| 43 | +for.body.i: ; preds = %for.cond.i |
| 44 | + %1 = load i32, i32* %i.i, align 4, !dbg !37, !tbaa !29 ; line:11 col:16 |
| 45 | + %src_in_g = getelementptr inbounds [10 x float], [10 x float]* @g, i32 0, i32 %1, !dbg !38 ; line:11 col:14 |
| 46 | + %2 = load float, float* %src_in_g, align 4, !dbg !38, !tbaa !39 ; line:11 col:14 |
| 47 | + %3 = load i32, i32* %i.i, align 4, !dbg !41, !tbaa !29 ; line:11 col:9 |
| 48 | + %dest = getelementptr inbounds [10 x float], [10 x float]* @g_1, i32 0, i32 %3, !dbg !42 ; line:11 col:5 |
| 49 | + store float %2, float* %dest, align 4, !dbg !43, !tbaa !39 ; line:11 col:12 |
| 50 | + %4 = load i32, i32* %i.i, align 4, !dbg !44, !tbaa !29 ; line:10 col:28 |
| 51 | + %inc.i = add nsw i32 %4, 1, !dbg !44 ; line:10 col:28 |
| 52 | + store i32 %inc.i, i32* %i.i, align 4, !dbg !44, !tbaa !29 ; line:10 col:28 |
| 53 | + br label %for.cond.i, !dbg !36 ; line:10 col:3 |
| 54 | + |
| 55 | +end.block: ; preds = %for.cond.i |
| 56 | + %5 = bitcast [10 x float]* %copy.i to i8*, !dbg !45 ; line:13 col:20 |
| 57 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ([10 x float]* @g to i8*), i64 40, i32 1, i1 false) #0, !dbg !45 ; line:13 col:20 |
| 58 | + %6 = bitcast [10 x float]* %copy.i to i8*, !dbg !46 ; line:14 col:7 |
| 59 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([10 x float]* @g to i8*), i8* %6, i64 40, i32 1, i1 false) #0, !dbg !46 ; line:14 col:7 |
| 60 | + %value = getelementptr inbounds %struct.PSOut, %struct.PSOut* %wrapper_result, i32 0, i32 0, !dbg !47 ; line:20 col:18 |
| 61 | + store <4 x float> zeroinitializer, <4 x float>* %value, align 4, !dbg !48, !tbaa !49 ; line:20 col:24 |
| 62 | + %7 = bitcast %struct.PSOut* %agg.result to i8*, !dbg !50 ; line:21 col:10 |
| 63 | + %8 = bitcast %struct.PSOut* %wrapper_result to i8*, !dbg !50 ; line:21 col:10 |
| 64 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 16, i32 1, i1 false), !dbg !50 ; line:21 col:10 |
| 65 | + ret void, !dbg !51 ; line:21 col:3 |
| 66 | +} |
| 67 | + |
| 68 | +; Function Attrs: nounwind |
| 69 | +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 |
| 70 | + |
| 71 | +attributes #0 = { nounwind } |
| 72 | + |
| 73 | +!llvm.module.flags = !{!0} |
| 74 | +!pauseresume = !{!1} |
| 75 | +!llvm.ident = !{!2} |
| 76 | +!dx.version = !{!3} |
| 77 | +!dx.valver = !{!4} |
| 78 | +!dx.shaderModel = !{!5} |
| 79 | +!dx.typeAnnotations = !{!6, !9} |
| 80 | +!dx.entryPoints = !{!14} |
| 81 | +!dx.fnprops = !{!20} |
| 82 | +!dx.options = !{!21, !22} |
| 83 | + |
| 84 | +!0 = !{i32 2, !"Debug Info Version", i32 3} |
| 85 | +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} |
| 86 | +!2 = !{!"dxc(private) 1.8.0.14549 (main, 0781ded87-dirty)"} |
| 87 | +!3 = !{i32 1, i32 0} |
| 88 | +!4 = !{i32 1, i32 8} |
| 89 | +!5 = !{!"ps", i32 6, i32 0} |
| 90 | +!6 = !{i32 0, %struct.PSOut undef, !7} |
| 91 | +!7 = !{i32 16, !8} |
| 92 | +!8 = !{i32 6, !"value", i32 3, i32 0, i32 4, !"SV_Target0", i32 7, i32 9} |
| 93 | +!9 = !{i32 1, void (%struct.PSOut*)* @frag_main, !10} |
| 94 | +!10 = !{!11, !13} |
| 95 | +!11 = !{i32 0, !12, !12} |
| 96 | +!12 = !{} |
| 97 | +!13 = !{i32 1, !12, !12} |
| 98 | +!14 = !{void (%struct.PSOut*)* @frag_main, !"frag_main", null, !15, null} |
| 99 | +!15 = !{!16, null, !18, null} |
| 100 | +!16 = !{!17} |
| 101 | +!17 = !{i32 0, %struct.ByteAddressBuffer* @"\01?g_2@@3UByteAddressBuffer@@A", !"g_2", i32 0, i32 0, i32 1, i32 11, i32 0, null} |
| 102 | +!18 = !{!19} |
| 103 | +!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} |
| 104 | +!20 = !{void (%struct.PSOut*)* @frag_main, i32 0, i1 false} |
| 105 | +!21 = !{i32 144} |
| 106 | +!22 = !{i32 -1} |
| 107 | +!23 = !DILocation(line: 10, column: 12, scope: !24, inlinedAt: !27) |
| 108 | +!24 = !DISubprogram(name: "inner", scope: !25, file: !25, line: 9, type: !26, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false) |
| 109 | +!25 = !DIFile(filename: "float.hlsl", directory: "") |
| 110 | +!26 = !DISubroutineType(types: !12) |
| 111 | +!27 = distinct !DILocation(line: 20, column: 26, scope: !28) |
| 112 | +!28 = !DISubprogram(name: "frag_main", scope: !25, file: !25, line: 18, type: !26, isLocal: false, isDefinition: true, scopeLine: 18, flags: DIFlagPrototyped, isOptimized: false, function: void (%struct.PSOut*)* @frag_main) |
| 113 | +!29 = !{!30, !30, i64 0} |
| 114 | +!30 = !{!"int", !31, i64 0} |
| 115 | +!31 = !{!"omnipotent char", !32, i64 0} |
| 116 | +!32 = !{!"Simple C/C++ TBAA"} |
| 117 | +!33 = !DILocation(line: 10, column: 8, scope: !24, inlinedAt: !27) |
| 118 | +!34 = !DILocation(line: 10, column: 19, scope: !24, inlinedAt: !27) |
| 119 | +!35 = !DILocation(line: 10, column: 21, scope: !24, inlinedAt: !27) |
| 120 | +!36 = !DILocation(line: 10, column: 3, scope: !24, inlinedAt: !27) |
| 121 | +!37 = !DILocation(line: 11, column: 16, scope: !24, inlinedAt: !27) |
| 122 | +!38 = !DILocation(line: 11, column: 14, scope: !24, inlinedAt: !27) |
| 123 | +!39 = !{!40, !40, i64 0} |
| 124 | +!40 = !{!"float", !31, i64 0} |
| 125 | +!41 = !DILocation(line: 11, column: 9, scope: !24, inlinedAt: !27) |
| 126 | +!42 = !DILocation(line: 11, column: 5, scope: !24, inlinedAt: !27) |
| 127 | +!43 = !DILocation(line: 11, column: 12, scope: !24, inlinedAt: !27) |
| 128 | +!44 = !DILocation(line: 10, column: 28, scope: !24, inlinedAt: !27) |
| 129 | +!45 = !DILocation(line: 13, column: 20, scope: !24, inlinedAt: !27) |
| 130 | +!46 = !DILocation(line: 14, column: 7, scope: !24, inlinedAt: !27) |
| 131 | +!47 = !DILocation(line: 20, column: 18, scope: !28) |
| 132 | +!48 = !DILocation(line: 20, column: 24, scope: !28) |
| 133 | +!49 = !{!31, !31, i64 0} |
| 134 | +!50 = !DILocation(line: 21, column: 10, scope: !28) |
| 135 | +!51 = !DILocation(line: 21, column: 3, scope: !28) |
0 commit comments