diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl index 29093c09c39d0..4d701733310ed 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl @@ -76,7 +76,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi // CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]]) // CHECK-NEXT: ret ptr addrspace(8) [[TMP0]] // __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) { @@ -85,7 +85,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, // CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]]) // CHECK-NEXT: ret ptr addrspace(8) [[TMP0]] // __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) { diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 1c589fb944141..297dd296aa6c8 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1356,8 +1356,15 @@ struct InformationCache { /// Return the flat address space if the associated target has. LLVM_ABI std::optional getFlatAddressSpace() const; + virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA, + Value &AssociatedValue, const Use *U, + const Instruction *I) const { + return false; + } + virtual unsigned getMaxAddrSpace() const { return ~0U; } + private: struct FunctionInfo { LLVM_ABI ~FunctionInfo(); @@ -2045,6 +2052,19 @@ struct Attributor { SimplificationCallbacks[IRP].emplace_back(CB); } + using AlignmentCallbackTy = + std::function &)>; + void registerAlignmentCallback(const IRPosition &IRP, + const AlignmentCallbackTy &CB) { + AlignmentCallBacks[IRP].emplace_back(CB); + } + + SmallVector + getAlignmentCallback(const IRPosition &IRP) { + return AlignmentCallBacks.lookup(IRP); + } + /// Return true if there is a simplification callback for \p IRP. bool hasSimplificationCallback(const IRPosition &IRP) { return SimplificationCallbacks.count(IRP); @@ -2096,6 +2116,9 @@ struct Attributor { DenseMap> SimplificationCallbacks; + /// The vector with AAAlign callbacks registered by outside AAs. + DenseMap> AlignmentCallBacks; + /// The vector with all simplification callbacks for global variables /// registered by outside AAs. DenseMap(I)) { + if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) + return true; + } + return false; + unsigned getMaxAddrSpace() const override { return AMDGPUAS::MAX_AMDGPU_ADDRESS; } @@ -1385,7 +1395,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, - &AAIndirectCallInfo::ID, &AAInstanceInfo::ID}); + &AAIndirectCallInfo::ID, &AAInstanceInfo::ID, &AAAlign::ID}); AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = Options.IsClosedWorld; @@ -1433,10 +1443,32 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, Ptr = RMW->getPointerOperand(); else if (auto *CmpX = dyn_cast(&I)) Ptr = CmpX->getPointerOperand(); + else if (auto *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) { + IRPosition IRP = IRPosition::value(*II); + + Attributor::AlignmentCallbackTy ACB = + [](const IRPosition &IRP, const AbstractAttribute *AA, + SmallVectorImpl &Values) { + Instruction *I = IRP.getCtxI(); + if (!I) + return; + if (auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == + Intrinsic::amdgcn_make_buffer_rsrc) + Values.push_back( + AA::ValueAndContext{*I->getOperand(0), nullptr}); + } + }; + A.registerAlignmentCallback(IRP, ACB); + + A.getOrCreateAAFor(IRP); + } if (Ptr) { A.getOrCreateAAFor(IRPosition::value(*Ptr)); A.getOrCreateAAFor(IRPosition::value(*Ptr)); + } } } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index ed2ac4dbfeecd..07666ab0a27bd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5204,6 +5204,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, TrackUse = true; return 0; } + if (A.getInfoCache().shouldTrackUse(&QueryingAA, AssociatedValue, U, I)) { + TrackUse = true; + return 0; + } MaybeAlign MA; if (const auto *CB = dyn_cast(I)) { @@ -5502,7 +5506,32 @@ struct AAAlignCallSiteReturned final using Base = AACalleeToCallSite; AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + ChangeStatus updateImpl(Attributor &A) override { + SmallVector Values; + SmallVector AligmentCBs = + A.getAlignmentCallback(getIRPosition()); + + for (Attributor::AlignmentCallbackTy CB : AligmentCBs) + CB(getIRPosition(), this, Values); + + if (!Values.empty()) { + StateType T; + for (AA::ValueAndContext &VAC : Values) { + const AAAlign *AA = A.getAAFor( + *this, IRPosition::value(*VAC.getValue()), DepClassTy::REQUIRED); + if (AA && this != AA) { + const AAAlign::StateType &DS = AA->getState(); + T ^= DS; + } + if (!T.isValidState()) + return indicatePessimisticFixpoint(); + } + + return clampStateAndIndicateChange(getState(), T); + } + return Base::updateImpl(A); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } }; diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll new file mode 100644 index 0000000000000..446aed9f2819a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s + +define float @align_back_prop(ptr addrspace(1) align 4 %x) { +; CHECK-LABEL: define float @align_back_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: ret float [[Y]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 8 + ret float %y +} + +define float @align_foward_prop(ptr addrspace(1) align 8 %x) { +; CHECK-LABEL: define float @align_foward_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: ret float [[Y]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 4 + ret float %y +} + +define float @align_mix_prop(ptr addrspace(1) align 4 %x) { +; CHECK-LABEL: define float @align_mix_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8 +; CHECK-NEXT: ret float [[Z]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 2 + %z = load float, ptr addrspace(1) %x, align 8 + ret float %z +}