Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
&AAAMDGPUClusterDims::ID});
&AAAMDGPUClusterDims::ID, &AAAlign::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
Expand Down Expand Up @@ -1657,6 +1657,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
Ptr = RMW->getPointerOperand();
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
Ptr = CmpX->getPointerOperand();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
A.getOrCreateAAFor<AAAlign>(IRPosition::value(*II));

if (Ptr) {
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
Expand Down
38 changes: 34 additions & 4 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5279,6 +5279,12 @@ struct AAAlignImpl : AAAlign {

/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// For make.buffer.rsrc, the alignment strictly equals to the base's
// alignment
if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
return;
SmallVector<Attribute, 4> Attrs;
A.getAttrs(getIRPosition(), {Attribute::Alignment}, Attrs);
for (const Attribute &Attr : Attrs)
Expand All @@ -5300,25 +5306,38 @@ struct AAAlignImpl : AAAlign {
if (isa<ConstantData>(AssociatedValue))
return ChangeStatus::UNCHANGED;

// For use of amdgcn.make.buffer.rsrc, the alignment equals to
// min(base, load/store)
bool IsMakeBufferRsrc = false;
if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
IsMakeBufferRsrc = true;
for (const Use &U : AssociatedValue.uses()) {
if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
if (SI->getPointerOperand() == &AssociatedValue)
if (SI->getAlign() < getAssumedAlign()) {
if (IsMakeBufferRsrc) {
SI->setAlignment(std::min(SI->getAlign(), getAssumedAlign()));
} else if (SI->getAlign() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, Store,
"Number of times alignment added to a store");
SI->setAlignment(getAssumedAlign());
InstrChanged = ChangeStatus::CHANGED;
}
} else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
if (LI->getPointerOperand() == &AssociatedValue)
if (LI->getAlign() < getAssumedAlign()) {
if (IsMakeBufferRsrc) {
LI->setAlignment(std::min(LI->getAlign(), getAssumedAlign()));
} else if (LI->getAlign() < getAssumedAlign()) {
LI->setAlignment(getAssumedAlign());
STATS_DECLTRACK(AAAlign, Load,
"Number of times alignment added to a load");
InstrChanged = ChangeStatus::CHANGED;
}
} else if (auto *RMW = dyn_cast<AtomicRMWInst>(U.getUser())) {
if (RMW->getPointerOperand() == &AssociatedValue) {
if (IsMakeBufferRsrc) {
RMW->setAlignment(std::min(RMW->getAlign(), getAssumedAlign()));
} else if (RMW->getPointerOperand() == &AssociatedValue) {
if (RMW->getAlign() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, AtomicRMW,
"Number of times alignment added to atomicrmw");
Expand All @@ -5328,7 +5347,9 @@ struct AAAlignImpl : AAAlign {
}
}
} else if (auto *CAS = dyn_cast<AtomicCmpXchgInst>(U.getUser())) {
if (CAS->getPointerOperand() == &AssociatedValue) {
if (IsMakeBufferRsrc) {
CAS->setAlignment(std::min(CAS->getAlign(), getAssumedAlign()));
} else if (CAS->getPointerOperand() == &AssociatedValue) {
if (CAS->getAlign() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, AtomicCmpXchg,
"Number of times alignment added to cmpxchg");
Expand Down Expand Up @@ -5554,6 +5575,15 @@ struct AAAlignCallSiteReturned final
std::min(this->getAssumedAlign(), Alignment).value());
break;
}
case Intrinsic::amdgcn_make_buffer_rsrc: {
const auto *AlignAA =
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
if (AlignAA && AlignAA->isValidState())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does TargetTransformInfo have some kind of alignment propagation already? I thought it did

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @arsenm , I had a look but cannot be sure. There are lots of align related function but mainly used for legality check and cost computation.

I'm not really familiar with the struct, if you could be more specific?? So that I can have some directions to search~~

Thanks

return clampStateAndIndicateChange<StateType>(
this->getState(), AlignAA->getAssumedAlign().value());
break;
}
default:
break;
}
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s

define float @load_gt_base(ptr align 4 %p) {
; CHECK-LABEL: define float @load_gt_base(
; CHECK-SAME: ptr align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 4 [[P]], i16 0, i64 0, i32 0)
; CHECK-NEXT: [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
; CHECK-NEXT: ret float [[LOADED]]
;
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
%loaded = load float, ptr addrspace(7) %ptr, align 8
ret float %loaded
}

define float @load_lt_base(ptr align 8 %p) {
; CHECK-LABEL: define float @load_lt_base(
; CHECK-SAME: ptr align 8 [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 8 [[P]], i16 0, i64 0, i32 0)
; CHECK-NEXT: [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
; CHECK-NEXT: ret float [[LOADED]]
;
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
%loaded = load float, ptr addrspace(7) %ptr, align 4
ret float %loaded
}
4 changes: 2 additions & 2 deletions llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() {
define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
; AMDGCN-NEXT: [[RSRC:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
; AMDGCN-NEXT: ret void
Expand All @@ -321,7 +321,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
; AMDGCN-NEXT: [[RSRC:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
; AMDGCN-NEXT: ret void
Expand Down
Loading