Skip to content

Commit f17ce96

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent 357bd61 commit f17ce96

File tree

4 files changed

+186
-21
lines changed

4 files changed

+186
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/UniformityAnalysis.h"
1718
#include "llvm/CodeGen/TargetPassConfig.h"
1819
#include "llvm/IR/IntrinsicsAMDGPU.h"
1920
#include "llvm/IR/IntrinsicsR600.h"
@@ -1014,6 +1015,97 @@ struct AAAMDGPUNoAGPR
10141015

10151016
const char AAAMDGPUNoAGPR::ID = 0;
10161017

1018+
struct AAAMDGPUInreg
1019+
: public IRAttribute<Attribute::InReg,
1020+
StateWrapper<BooleanState, AbstractAttribute>,
1021+
AAAMDGPUInreg> {
1022+
AAAMDGPUInreg(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
1023+
1024+
/// Create an abstract attribute view for the position \p IRP.
1025+
static AAAMDGPUInreg &createForPosition(const IRPosition &IRP, Attributor &A);
1026+
1027+
/// See AbstractAttribute::getName()
1028+
const std::string getName() const override { return "AAAMDGPUInreg"; }
1029+
1030+
const std::string getAsStr(Attributor *A) const override {
1031+
return getAssumed() ? "inreg" : "non-inreg";
1032+
}
1033+
1034+
void trackStatistics() const override {}
1035+
1036+
/// See AbstractAttribute::getIdAddr()
1037+
const char *getIdAddr() const override { return &ID; }
1038+
1039+
/// This function should return true if the type of the \p AA is AAAMDGPUInreg
1040+
static bool classof(const AbstractAttribute *AA) {
1041+
return (AA->getIdAddr() == &ID);
1042+
}
1043+
1044+
/// Unique ID (due to the unique address)
1045+
static const char ID;
1046+
};
1047+
1048+
const char AAAMDGPUInreg::ID = 0;
1049+
1050+
namespace {
1051+
1052+
struct AAAMDGPUInregArgument : public AAAMDGPUInreg {
1053+
AAAMDGPUInregArgument(const IRPosition &IRP, Attributor &A)
1054+
: AAAMDGPUInreg(IRP, A) {}
1055+
1056+
void initialize(Attributor &A) override {
1057+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1058+
indicateOptimisticFixpoint();
1059+
}
1060+
1061+
ChangeStatus updateImpl(Attributor &A) override {
1062+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1063+
1064+
auto Pred = [&](AbstractCallSite ACS) -> bool {
1065+
CallBase *CB = ACS.getInstruction();
1066+
Value *V = CB->getArgOperandUse(ArgNo);
1067+
if (auto *G = dyn_cast<GlobalValue>(V))
1068+
return true;
1069+
if (auto *I = dyn_cast<Instruction>(V)) {
1070+
auto AU = A.getInfoCache()
1071+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(
1072+
*I->getFunction());
1073+
return AU && AU->isUniform(I);
1074+
}
1075+
if (auto *Arg = dyn_cast<Argument>(V)) {
1076+
auto *AA =
1077+
A.getOrCreateAAFor<AAAMDGPUInreg>(IRPosition::argument(*Arg));
1078+
return AA && AA->isValidState();
1079+
}
1080+
// For unforeseen cases, we need to assume it is not uniform thus not
1081+
// qualified for inreg.
1082+
return false;
1083+
};
1084+
1085+
bool UsedAssumedInformation = false;
1086+
if (!A.checkForAllCallSites(Pred, *this, /*RequireAllCallSites=*/true,
1087+
UsedAssumedInformation))
1088+
return indicatePessimisticFixpoint();
1089+
1090+
if (!UsedAssumedInformation)
1091+
return indicateOptimisticFixpoint();
1092+
1093+
return ChangeStatus::UNCHANGED;
1094+
}
1095+
};
1096+
1097+
} // namespace
1098+
1099+
AAAMDGPUInreg &AAAMDGPUInreg::createForPosition(const IRPosition &IRP,
1100+
Attributor &A) {
1101+
switch (IRP.getPositionKind()) {
1102+
case IRPosition::IRP_ARGUMENT:
1103+
return *new (A.Allocator) AAAMDGPUInregArgument(IRP, A);
1104+
default:
1105+
llvm_unreachable("not a valid position for AAAMDGPUInreg");
1106+
}
1107+
}
1108+
10171109
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10181110
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10191111
for (unsigned I = 0;
@@ -1046,7 +1138,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10461138
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10471139
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
10481140
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1049-
&AAInstanceInfo::ID});
1141+
&AAInstanceInfo::ID, &AAAMDGPUInreg::ID});
10501142

10511143
AttributorConfig AC(CGUpdater);
10521144
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1090,6 +1182,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10901182
IRPosition::value(*SI->getPointerOperand()));
10911183
}
10921184
}
1185+
1186+
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) {
1187+
for (auto &Arg : F.args())
1188+
A.getOrCreateAAFor<AAAMDGPUInreg>(IRPosition::argument(Arg));
1189+
}
10931190
}
10941191

10951192
ChangeStatus Change = A.run();
@@ -1118,6 +1215,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
11181215

11191216
void getAnalysisUsage(AnalysisUsage &AU) const override {
11201217
AU.addRequired<CycleInfoWrapperPass>();
1218+
AU.addRequired<UniformityInfoWrapperPass>();
11211219
}
11221220

11231221
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2+
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor %s -o - | FileCheck %s
3+
4+
@g1 = protected addrspace(1) externally_initialized global i32 0, align 4
5+
@g2 = protected addrspace(1) externally_initialized global i32 0, align 4
6+
7+
;.
8+
; CHECK: @g1 = protected addrspace(1) externally_initialized global i32 0, align 4
9+
; CHECK: @g2 = protected addrspace(1) externally_initialized global i32 0, align 4
10+
;.
11+
define internal fastcc void @f(ptr %x, ptr %y) {
12+
; CHECK-LABEL: define {{[^@]+}}@f
13+
; CHECK-SAME: (ptr inreg [[X:%.*]], ptr inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; CHECK-NEXT: entry:
15+
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
16+
; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g1, align 4
17+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[Y]], align 4
18+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) @g2, align 4
19+
; CHECK-NEXT: ret void
20+
;
21+
entry:
22+
%x.val = load i32, ptr %x, align 4
23+
store i32 %x.val, ptr addrspace(1) @g1, align 4
24+
%load = load i32, ptr %y, align 4
25+
store i32 %load, ptr addrspace(1) @g2, align 4
26+
ret void
27+
}
28+
29+
define protected amdgpu_kernel void @kernel(ptr addrspace(1) %x2, i32 %z) {
30+
; CHECK-LABEL: define {{[^@]+}}@kernel
31+
; CHECK-SAME: (ptr addrspace(1) [[X2:%.*]], i32 [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
32+
; CHECK-NEXT: entry:
33+
; CHECK-NEXT: [[X2_CAST:%.*]] = addrspacecast ptr addrspace(1) [[X2]] to ptr
34+
; CHECK-NEXT: [[QUEUE_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
35+
; CHECK-NEXT: [[QUEUE_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[QUEUE_PTR]] to ptr
36+
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
37+
; CHECK-NEXT: [[IMPLICITARG_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[IMPLICITARG_PTR]] to ptr
38+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[Z]], 0
39+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], ptr [[QUEUE_PTR_CAST]], ptr [[X2_CAST]]
40+
; CHECK-NEXT: tail call fastcc void @f(ptr [[COND]], ptr noundef [[IMPLICITARG_PTR_CAST]])
41+
; CHECK-NEXT: [[DOTVAL:%.*]] = load i32, ptr addrspace(4) [[QUEUE_PTR]], align 4
42+
; CHECK-NEXT: tail call fastcc void @f(ptr [[COND]], ptr noundef [[IMPLICITARG_PTR_CAST]])
43+
; CHECK-NEXT: ret void
44+
;
45+
entry:
46+
%x2.cast = addrspacecast ptr addrspace(1) %x2 to ptr
47+
%queue.ptr = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
48+
%queue.ptr.cast = addrspacecast ptr addrspace(4) %queue.ptr to ptr
49+
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
50+
%implicitarg.ptr.cast = addrspacecast ptr addrspace(4) %implicitarg.ptr to ptr
51+
%cmp = icmp sgt i32 %z, 0
52+
%cond = select i1 %cmp, ptr %queue.ptr.cast, ptr %x2.cast
53+
tail call fastcc void @f(ptr %cond, ptr noundef %implicitarg.ptr.cast)
54+
%.val = load i32, ptr addrspace(4) %queue.ptr, align 4
55+
tail call fastcc void @f(ptr %cond, ptr noundef %implicitarg.ptr.cast)
56+
ret void
57+
}
58+
59+
declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr()
60+
61+
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
62+
;.
63+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
64+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
65+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
66+
;.

llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
@recursive.kernel.lds = addrspace(3) global i16 poison
99

1010
;.
11-
; CHECK: @[[LLVM_AMDGCN_KERNEL_K0_F0_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_K0_F0_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
12-
; CHECK: @[[LLVM_AMDGCN_KERNEL_K1_F0_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_K1_F0_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
13-
; CHECK: @[[LLVM_AMDGCN_KERNEL_KERNEL_LDS_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_KERNEL_LDS_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
14-
; CHECK: @[[LLVM_AMDGCN_KERNEL_KERNEL_LDS_RECURSION_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_KERNEL_LDS_RECURSION_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
15-
; CHECK: @[[LLVM_AMDGCN_LDS_OFFSET_TABLE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(4) constant [3 x [2 x i32]]
11+
; CHECK: @llvm.amdgcn.kernel.k0_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0_f0.lds.t poison, align 2, !absolute_symbol [[META0:![0-9]+]]
12+
; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
13+
; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
14+
; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
15+
; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]] [[2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds to i32), i32 poison], [2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1_f0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds, i32 0, i32 1) to i32)], [2 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds to i32)]]
1616
;.
1717
define internal void @lds_use_through_indirect() {
1818
; CHECK-LABEL: define internal void @lds_use_through_indirect(
@@ -105,7 +105,7 @@ define internal void @f0_transitive() {
105105

106106
define amdgpu_kernel void @k0_f0() {
107107
; CHECK-LABEL: define amdgpu_kernel void @k0_f0(
108-
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id !2 {
108+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
109109
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds) ]
110110
; CHECK-NEXT: call void @f0_transitive()
111111
; CHECK-NEXT: ret void
@@ -116,8 +116,8 @@ define amdgpu_kernel void @k0_f0() {
116116

117117
define amdgpu_kernel void @k1_f0() {
118118
; CHECK-LABEL: define amdgpu_kernel void @k1_f0(
119-
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id !3 {
120-
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope !4, !noalias !7
119+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
120+
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
121121
; CHECK-NEXT: call void @f0_transitive()
122122
; CHECK-NEXT: [[FPTR:%.*]] = load volatile ptr, ptr addrspace(1) null, align 8
123123
; CHECK-NEXT: call void [[FPTR]]()
@@ -168,7 +168,7 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
168168

169169
define internal void @mutual_recursion_1(i16 %arg) {
170170
; CHECK-LABEL: define internal void @mutual_recursion_1(
171-
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
171+
; CHECK-SAME: i16 inreg [[ARG:%.*]]) #[[ATTR0]] {
172172
; CHECK-NEXT: call void @mutual_recursion_0(i16 [[ARG]])
173173
; CHECK-NEXT: ret void
174174
;
@@ -178,7 +178,7 @@ define internal void @mutual_recursion_1(i16 %arg) {
178178

179179
define amdgpu_kernel void @kernel_lds_recursion() {
180180
; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion(
181-
; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id !9 {
181+
; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
182182
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ]
183183
; CHECK-NEXT: call void @mutual_recursion_0(i16 0)
184184
; CHECK-NEXT: ret void
@@ -199,15 +199,16 @@ define amdgpu_kernel void @kernel_lds_recursion() {
199199
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
200200
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
201201
;.
202-
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
203-
; CHECK: [[META1:![0-9]+]] = !{i32 0}
204-
; CHECK: [[META2:![0-9]+]] = !{i32 1}
205-
; CHECK: [[META3:![0-9]+]] = !{!5}
206-
; CHECK: [[META4:![0-9]+]] = distinct !{!5, !6}
207-
; CHECK: [[META5:![0-9]+]] = distinct !{!6}
208-
; CHECK: [[META6:![0-9]+]] = !{!8}
209-
; CHECK: [[META7:![0-9]+]] = distinct !{!8, !6}
210-
; CHECK: [[META8:![0-9]+]] = !{i32 2}
202+
; CHECK: [[META0]] = !{i32 0, i32 1}
203+
; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
204+
; CHECK: [[META2]] = !{i32 0}
205+
; CHECK: [[META3]] = !{i32 1}
206+
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
207+
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
208+
; CHECK: [[META6]] = distinct !{[[META6]]}
209+
; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
210+
; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
211+
; CHECK: [[META9]] = !{i32 2}
211212
;.
212213
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
213214
; TABLE: {{.*}}

0 commit comments

Comments
 (0)