Skip to content

Commit e442904

Browse files
Shoreshenshiltian
andauthored
[AMDGPU] Apply alignment attr for make.buffer.rsrc (#166914)
Calculating alignment for `make.buffer.rsrc` intrinsic. The logic is the alignment on use of return value of `make.buffer.rsrc` should be capped by the base operand's alignment of `make.buffer.rsrc`. For example: ```ll define float @foo(ptr addrspace(1) align X %ptr) { %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 C, i32 0) %y = load float, ptr addrspace(7) %fat.ptr, align Y ret float %y } ``` We hopes that `Y = min(X, Y)` --- After discussion, it seems improper for letting `Y = min(X, Y)` since it contradict with the semantic of align on load. So we would apply the origin behavior of align, which is letting `X` and `Y` both equal to `max(X, Y)` --------- Co-authored-by: Shilei Tian <[email protected]>
1 parent 6ec8c43 commit e442904

File tree

4 files changed

+67
-4
lines changed

4 files changed

+67
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
16031603
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
16041604
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
16051605
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1606-
&AAAMDGPUClusterDims::ID});
1606+
&AAAMDGPUClusterDims::ID, &AAAlign::ID});
16071607

16081608
AttributorConfig AC(CGUpdater);
16091609
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1661,6 +1661,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
16611661
if (Ptr) {
16621662
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
16631663
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1664+
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1665+
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1666+
A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1667+
}
16641668
}
16651669
}
16661670
}

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5220,6 +5220,13 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
52205220
return AlignAA->getKnownAlign().value();
52215221
break;
52225222
}
5223+
case Intrinsic::amdgcn_make_buffer_rsrc: {
5224+
const auto *AlignAA = A.getAAFor<AAAlign>(
5225+
QueryingAA, IRPosition::value(*II), DepClassTy::NONE);
5226+
if (AlignAA)
5227+
return AlignAA->getKnownAlign().value();
5228+
break;
5229+
}
52235230
default:
52245231
break;
52255232
}
@@ -5543,7 +5550,7 @@ struct AAAlignCallSiteReturned final
55435550
const auto *AlignAA =
55445551
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
55455552
DepClassTy::REQUIRED);
5546-
if (AlignAA && AlignAA->isValidState()) {
5553+
if (AlignAA) {
55475554
Alignment = std::max(AlignAA->getAssumedAlign(), Alignment);
55485555
Valid = true;
55495556
}
@@ -5554,6 +5561,18 @@ struct AAAlignCallSiteReturned final
55545561
std::min(this->getAssumedAlign(), Alignment).value());
55555562
break;
55565563
}
5564+
// FIXME: Should introduce target specific sub-attributes and letting
5565+
// getAAfor<AAAlign> lead to create sub-attribute to handle target
5566+
// specific intrinsics.
5567+
case Intrinsic::amdgcn_make_buffer_rsrc: {
5568+
const auto *AlignAA =
5569+
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
5570+
DepClassTy::REQUIRED);
5571+
if (AlignAA)
5572+
return clampStateAndIndicateChange<StateType>(
5573+
this->getState(), AlignAA->getAssumedAlign().value());
5574+
break;
5575+
}
55575576
default:
55585577
break;
55595578
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
3+
4+
define float @align_back_prop(ptr addrspace(1) align 4 %x) {
5+
; CHECK-LABEL: define float @align_back_prop(
6+
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
8+
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
9+
; CHECK-NEXT: ret float [[Y]]
10+
;
11+
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
12+
%y = load float, ptr addrspace(7) %fat.ptr, align 8
13+
ret float %y
14+
}
15+
16+
define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
17+
; CHECK-LABEL: define float @align_foward_prop(
18+
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
19+
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
20+
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
21+
; CHECK-NEXT: ret float [[Y]]
22+
;
23+
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
24+
%y = load float, ptr addrspace(7) %fat.ptr, align 4
25+
ret float %y
26+
}
27+
28+
define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
29+
; CHECK-LABEL: define float @align_mix_prop(
30+
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
31+
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
32+
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
33+
; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
34+
; CHECK-NEXT: ret float [[Z]]
35+
;
36+
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
37+
%y = load float, ptr addrspace(7) %fat.ptr, align 2
38+
%z = load float, ptr addrspace(1) %x, align 8
39+
ret float %z
40+
}

llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() {
305305

306306
define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
307307
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
308-
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
308+
; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
309309
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
310310
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
311311
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
@@ -320,7 +320,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
320320

321321
define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
322322
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
323-
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
323+
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
324324
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
325325
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
326326
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]

0 commit comments

Comments
 (0)