diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp index 73abfe7c48584..306db6a558779 100644 --- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp +++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp @@ -87,17 +87,50 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) { for (LoadInst *LI : LoadsToProcess) { Value *V = LI->getPointerOperand(); - auto *GV = dyn_cast(LI->getPointerOperand()); + auto *GV = dyn_cast(V); // If we didn't find the global, we may need to walk through a level of // indirection. This generally happens at -O0. - if (!GV) + if (!GV) { if (auto *NestedLI = dyn_cast(V)) { BasicBlock::iterator BBI(NestedLI); Value *Loaded = FindAvailableLoadedValue( NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr); GV = dyn_cast_or_null(Loaded); + } else if (auto *NestedAlloca = dyn_cast(V)) { + for (auto &Use : NestedAlloca->uses()) { + auto *Store = dyn_cast(Use.getUser()); + if (!Store) + continue; + + Value *StoredVal = Store->getValueOperand(); + if (!StoredVal) + continue; + + // Try direct global match + GV = dyn_cast(StoredVal); + if (GV) + break; + + // If it's a load, check its source + if (auto *Load = dyn_cast(StoredVal)) { + GV = dyn_cast(Load->getPointerOperand()); + if (GV) + break; + + // If loading from an unmodified stack copy of the global, reuse the + // global's value. Note: we are just repeating what we are doing for + // the load case for the alloca store pattern. + BasicBlock::iterator BBI(Load); + Value *Loaded = FindAvailableLoadedValue(Load, Load->getParent(), + BBI, 0, nullptr, nullptr); + GV = dyn_cast(Loaded); + if (GV) + break; + } + } } + } auto It = HandleMap.find(GV); if (It == HandleMap.end()) { diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll new file mode 100644 index 0000000000000..7c0813b0b4e36 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s + +%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) } +@global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4 +@name = private unnamed_addr constant [5 x i8] c"dest\00", align 1 + + +; NOTE: intent of this test is to confirm load target("dx.RawBuffer", i32, 1, 0) +; is replaced with call @llvm.dx.resource.getpointer +define void @CSMain() local_unnamed_addr { +; CHECK-LABEL: define void @CSMain() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8 +; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) +; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0) +; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %alloca = alloca target("dx.RawBuffer", i32, 1, 0), align 8 + %handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) + store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4 + %val = load i32, ptr @global, align 4 + store i32 %val , ptr %alloca, align 8 + %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8 + %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0) + store i32 0, ptr %buff, align 4 + ret void +}