Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ def int_dx_handle_fromBinding
[IntrNoMem]>;

def int_dx_typedBufferLoad
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty],
[IntrReadMem]>;
def int_dx_typedBufferLoad_checkbit
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
[llvm_any_ty, llvm_i32_ty]>;
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
def int_dx_typedBufferStore
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty],
[IntrWriteMem]>;

// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,25 @@ class OpLowerer {
CleanupCasts.clear();
}

// Remove the resource global associated with the handleFromBinding call
// instruction and their uses as they aren't needed anymore.
// TODO: We should verify that all the globals get removed.
// It's expected we'll need a custom pass in the future that will eliminate
// the need for this here.
void removeResourceGlobals(CallInst *CI) {
for (User *User : make_early_inc_range(CI->users())) {
if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
Value *V = Store->getOperand(1);
Store->eraseFromParent();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
if (GV->use_empty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth leaving a TODO comment around here somewhere that says we should really validate that all of the globals do eventually get removed, since otherwise we'll generate a broken module. Actually implementing that validation can probably be left for later for now, since it would be quite difficult to do locally here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

GV->removeDeadConstantUsers();
GV->eraseFromParent();
}
}
}
}

[[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Expand All @@ -228,6 +247,8 @@ class OpLowerer {

Value *Cast = createTmpHandleCast(*OpCall, CI->getType());

removeResourceGlobals(CI);

CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
return Error::success();
Expand Down Expand Up @@ -272,6 +293,8 @@ class OpLowerer {

Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());

removeResourceGlobals(CI);

CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();

Expand Down
44 changes: 44 additions & 0 deletions llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
; Finish compiling to verify that dxil-op-lower removes the globals entirely.
; RUN: opt -mtriple=dxil-pc-shadermodel6.0-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
; RUN: opt -mtriple=dxil-pc-shadermodel6.6-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s
; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s

; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
; Also that DXILOpLowering eliminates the globals entirely.

%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }

; DXOP-NOT: @In = global
; DXOP-NOT: @Out = global
@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4

; CHECK-LABEL define void @main()
define void @main() local_unnamed_addr #0 {
entry:
; DXOP: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
; DXOP: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
%In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
%Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
; CSE: call i32 @llvm.dx.flattened.thread.id.in.group()
%0 = call i32 @llvm.dx.flattened.thread.id.in.group()
; CHECK-NOT: load {{.*}} ptr @In
%1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
%2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
; CHECK-NOT: load {{.*}} ptr @In
%3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
%4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
%add.i = fadd <4 x float> %2, %4
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
; CHECK: ret void
ret void
}

attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }

Loading