|
| 1 | +; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s |
| 2 | + |
| 3 | +; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad. |
| 4 | + |
| 5 | +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" |
| 6 | +target triple = "dxilv1.6-unknown-shadermodel6.6-compute" |
| 7 | + |
| 8 | +%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) } |
| 9 | + |
| 10 | +@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
| 11 | +@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
| 12 | + |
| 13 | +; Function Attrs: convergent noinline norecurse |
| 14 | +; CHECK-LABEL define void @main() |
| 15 | +define void @main() local_unnamed_addr #0 { |
| 16 | +entry: |
| 17 | + %tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4 |
| 18 | + %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false) |
| 19 | + store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4 |
| 20 | + %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false) |
| 21 | + store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4 |
| 22 | + ; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group() |
| 23 | + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() |
| 24 | + ; CHECK-NOT: load {{.*}} ptr @In |
| 25 | + %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
| 26 | + ; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t |
| 27 | + %2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) |
| 28 | + ; CHECK-NOT: load {{.*}} ptr @In |
| 29 | + %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
| 30 | + %4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) |
| 31 | + %add.i = fadd <4 x float> %2, %4 |
| 32 | + store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4 |
| 33 | + call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i) |
| 34 | + ret void |
| 35 | +} |
| 36 | + |
| 37 | +; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) |
| 38 | +declare i32 @llvm.dx.flattened.thread.id.in.group() #1 |
| 39 | + |
| 40 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn |
| 41 | +; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] |
| 42 | +declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2 |
| 43 | + |
| 44 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn |
| 45 | +; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] |
| 46 | +declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2 |
| 47 | + |
| 48 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) |
| 49 | +declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3 |
| 50 | + |
| 51 | +; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) } |
| 52 | +; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) } |
| 53 | + |
| 54 | +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 55 | +attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) } |
| 56 | +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } |
| 57 | +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) } |
| 58 | + |
| 59 | +!llvm.module.flags = !{!0, !1} |
| 60 | +!dx.valver = !{!2} |
| 61 | +!llvm.ident = !{!3} |
| 62 | + |
| 63 | +!0 = !{i32 1, !"wchar_size", i32 4} |
| 64 | +!1 = !{i32 7, !"frame-pointer", i32 2} |
| 65 | +!2 = !{i32 1, i32 8} |
| 66 | +!3 = !{! "clang version 20.0.0git ([email protected]:llvm/llvm-project.git 54dc966bd3d375d7c1604fac5fdac20989c1072a)"} |
| 67 | +!4 = !{!5} |
| 68 | +!5 = distinct !{!5, !6, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"} |
| 69 | +!6 = distinct !{!6, !"_ZN4hlsl8RWBufferIDv4_fEixEi"} |
| 70 | +!7 = !{!8, !9, i64 0} |
| 71 | +!8 = !{!"_ZTSN4hlsl8RWBufferIDv4_fEE", !9, i64 0} |
| 72 | +!9 = !{!"omnipotent char", !10, i64 0} |
| 73 | +!10 = !{!"Simple C++ TBAA"} |
| 74 | +!11 = !{!12} |
| 75 | +!12 = distinct !{!12, !13, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"} |
| 76 | +!13 = distinct !{!13, !"_ZN4hlsl8RWBufferIDv4_fEixEi"} |
| 77 | +!14 = !{!15} |
| 78 | +!15 = distinct !{!15, !16, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"} |
| 79 | +!16 = distinct !{!16, !"_ZN4hlsl8RWBufferIDv4_fEixEi"} |
| 80 | +!17 = !{!18, !9, i64 0} |
| 81 | +!18 = !{!"_ZTSN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EE", !9, i64 0, !19, i64 4} |
| 82 | +!19 = !{!"int", !9, i64 0} |
| 83 | +!20 = !{!21} |
| 84 | +!21 = distinct !{!21, !22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_: %agg.result"} |
| 85 | +!22 = distinct !{!22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_"} |
0 commit comments