Skip to content

Commit 3775e77

Browse files
author
Greg Roth
committed
[DirectX] Mark buffer load/store as mem read/write
By giving these intrinsics their appropriate attributes, loads of allocas that are stored on the other side of these calls can be eliminated. Adds a test that verifies that the unneeded loads can be eliminated and also that the attributes are set properly. Fixes #104271 This may be the first part of a broader audit of
1 parent af44976 commit 3775e77

File tree

2 files changed

+88
-3
lines changed

2 files changed

+88
-3
lines changed

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ def int_dx_handle_fromBinding
2828
[IntrNoMem]>;
2929

3030
def int_dx_typedBufferLoad
31-
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
31+
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
3232
def int_dx_typedBufferLoad_checkbit
3333
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
34-
[llvm_any_ty, llvm_i32_ty]>;
34+
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
3535
def int_dx_typedBufferStore
36-
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
36+
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>;
3737

3838
// Cast between target extension handle types and dxil-style opaque handles
3939
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s
2+
3+
; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
4+
5+
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
6+
target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
7+
8+
%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
9+
10+
@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
11+
@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
12+
13+
; Function Attrs: convergent noinline norecurse
14+
; CHECK-LABEL define void @main()
15+
define void @main() local_unnamed_addr #0 {
16+
entry:
17+
%tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4
18+
%In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
19+
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
20+
%Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
21+
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
22+
; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group()
23+
%0 = call i32 @llvm.dx.flattened.thread.id.in.group()
24+
; CHECK-NOT: load {{.*}} ptr @In
25+
%1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
26+
; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
27+
%2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
28+
; CHECK-NOT: load {{.*}} ptr @In
29+
%3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
30+
%4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
31+
%add.i = fadd <4 x float> %2, %4
32+
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4
33+
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
34+
ret void
35+
}
36+
37+
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
38+
declare i32 @llvm.dx.flattened.thread.id.in.group() #1
39+
40+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
41+
; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
42+
declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2
43+
44+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
45+
; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
46+
declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2
47+
48+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
49+
declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3
50+
51+
; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) }
52+
; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) }
53+
54+
attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
55+
attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) }
56+
attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
57+
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
58+
59+
!llvm.module.flags = !{!0, !1}
60+
!dx.valver = !{!2}
61+
!llvm.ident = !{!3}
62+
63+
!0 = !{i32 1, !"wchar_size", i32 4}
64+
!1 = !{i32 7, !"frame-pointer", i32 2}
65+
!2 = !{i32 1, i32 8}
66+
!3 = !{!"clang version 20.0.0git ([email protected]:llvm/llvm-project.git 54dc966bd3d375d7c1604fac5fdac20989c1072a)"}
67+
!4 = !{!5}
68+
!5 = distinct !{!5, !6, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
69+
!6 = distinct !{!6, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
70+
!7 = !{!8, !9, i64 0}
71+
!8 = !{!"_ZTSN4hlsl8RWBufferIDv4_fEE", !9, i64 0}
72+
!9 = !{!"omnipotent char", !10, i64 0}
73+
!10 = !{!"Simple C++ TBAA"}
74+
!11 = !{!12}
75+
!12 = distinct !{!12, !13, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
76+
!13 = distinct !{!13, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
77+
!14 = !{!15}
78+
!15 = distinct !{!15, !16, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
79+
!16 = distinct !{!16, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
80+
!17 = !{!18, !9, i64 0}
81+
!18 = !{!"_ZTSN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EE", !9, i64 0, !19, i64 4}
82+
!19 = !{!"int", !9, i64 0}
83+
!20 = !{!21}
84+
!21 = distinct !{!21, !22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_: %agg.result"}
85+
!22 = distinct !{!22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_"}

0 commit comments

Comments
 (0)