Skip to content

Commit d013656

Browse files
author
Greg Roth
committed
[DirectX] Eliminate resource global variables from module
By giving these intrinsics their appropriate attributes, loads of globals that are stored on the other side of these calls can be eliminated by the EarlyCSE pass. Stores to the same globals and the globals themselves require more direct intervention as part of the handleFromBinding lowering. Adds a test that verifies that the unneeded globals and their uses can be eliminated and also that the attributes are set properly. Fixes #104271
1 parent 3775e77 commit d013656

File tree

2 files changed

+37
-9
lines changed

2 files changed

+37
-9
lines changed

llvm/lib/Target/DirectX/DXILOpLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,22 @@ class OpLowerer {
204204
CleanupCasts.clear();
205205
}
206206

207+
// Remove the resource global associated with the handleFromBinding call instruction
208+
// and their uses as they aren't needed anymore.
209+
void removeResourceGlobals(CallInst *CI) {
210+
for (User *User : make_early_inc_range(CI->users())) {
211+
if(StoreInst *Store = dyn_cast<StoreInst>(User)) {
212+
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Store->getOperand(1))) {
213+
Store->eraseFromParent();
214+
assert(GV->use_empty() && "Buffer global still has users");
215+
GV->removeDeadConstantUsers();
216+
GV->eraseFromParent();
217+
}
218+
}
219+
}
220+
}
221+
222+
207223
[[nodiscard]] bool lowerToCreateHandle(Function &F) {
208224
IRBuilder<> &IRB = OpBuilder.getIRB();
209225
Type *Int8Ty = IRB.getInt8Ty();
@@ -228,6 +244,8 @@ class OpLowerer {
228244

229245
Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
230246

247+
removeResourceGlobals(CI);
248+
231249
CI->replaceAllUsesWith(Cast);
232250
CI->eraseFromParent();
233251
return Error::success();
@@ -272,6 +290,8 @@ class OpLowerer {
272290

273291
Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
274292

293+
removeResourceGlobals(CI);
294+
275295
CI->replaceAllUsesWith(Cast);
276296
CI->eraseFromParent();
277297

llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,63 @@
1-
; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s
1+
; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
2+
; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
3+
; finish compiling to verify that dxil-op-lower removes the globals entirely
4+
; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
5+
; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
26

37
; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
8+
; Also that DXILOpLowering eliminates the globals entirely.
49

510
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
611
target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
712

813
%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
914

15+
; LLC-NOT: @In = global
16+
; LLC-NOT: @Out = global
1017
@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
1118
@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
1219

1320
; Function Attrs: convergent noinline norecurse
1421
; CHECK-LABEL define void @main()
1522
define void @main() local_unnamed_addr #0 {
1623
entry:
17-
%tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4
24+
; LLC: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
25+
; LLC: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
1826
%In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
1927
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
2028
%Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
2129
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
22-
; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group()
30+
; CSE: call i32 @llvm.dx.flattened.thread.id.in.group()
2331
%0 = call i32 @llvm.dx.flattened.thread.id.in.group()
2432
; CHECK-NOT: load {{.*}} ptr @In
2533
%1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
26-
; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
34+
; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
2735
%2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
2836
; CHECK-NOT: load {{.*}} ptr @In
2937
%3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
3038
%4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
3139
%add.i = fadd <4 x float> %2, %4
32-
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4
3340
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
41+
; CHECK: ret void
3442
ret void
3543
}
3644

3745
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
3846
declare i32 @llvm.dx.flattened.thread.id.in.group() #1
3947

4048
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
41-
; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
49+
; CSE: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
4250
declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2
4351

4452
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
45-
; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
53+
; CSE: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
4654
declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2
4755

4856
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
4957
declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3
5058

51-
; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) }
52-
; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) }
59+
; CSE: attributes [[ROAttr]] = { {{.*}} memory(read) }
60+
; CSE: attributes [[WOAttr]] = { {{.*}} memory(write) }
5361

5462
attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
5563
attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) }

0 commit comments

Comments
 (0)