Skip to content

Commit 01ac00b

Browse files
authored
AMDGPU: Render non-0 values for amdgpu-agpr-alloc
(#162300) This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
1 parent 0d6c5e0 commit 01ac00b

File tree

2 files changed

+384
-93
lines changed

2 files changed

+384
-93
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 67 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12881288
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12891289
}
12901290

1291-
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
1292-
struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1293-
using Base = StateWrapper<BooleanState, AbstractAttribute>;
1294-
AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1291+
struct AAAMDGPUMinAGPRAlloc
1292+
: public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1293+
using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1294+
AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12951295

1296-
static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
1297-
Attributor &A) {
1296+
static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1297+
Attributor &A) {
12981298
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1299-
return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
1300-
llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
1299+
return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1300+
llvm_unreachable(
1301+
"AAAMDGPUMinAGPRAlloc is only valid for function position");
13011302
}
13021303

13031304
void initialize(Attributor &A) override {
@@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13101311
}
13111312

13121313
const std::string getAsStr(Attributor *A) const override {
1313-
return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
1314+
std::string Str = "amdgpu-agpr-alloc=";
1315+
raw_string_ostream OS(Str);
1316+
OS << getAssumed();
1317+
return OS.str();
13141318
}
13151319

13161320
void trackStatistics() const override {}
13171321

13181322
ChangeStatus updateImpl(Attributor &A) override {
1319-
// TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1323+
DecIntegerState<> Maximum;
13201324

1321-
auto CheckForNoAGPRs = [&](Instruction &I) {
1325+
// Check for cases which require allocation of AGPRs. The only cases where
1326+
// AGPRs are required are if there are direct references to AGPRs, so inline
1327+
// assembly and special intrinsics.
1328+
auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13221329
const auto &CB = cast<CallBase>(I);
13231330
const Value *CalleeOp = CB.getCalledOperand();
1324-
const Function *Callee = dyn_cast<Function>(CalleeOp);
1325-
if (!Callee) {
1326-
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1327-
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
1328-
return false;
1331+
1332+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333+
// Technically, the inline asm could be invoking a call to an unknown
1334+
// external function that requires AGPRs, but ignore that.
1335+
unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1336+
Maximum.takeAssumedMaximum(NumRegs);
1337+
return true;
13291338
}
13301339

1331-
switch (Callee->getIntrinsicID()) {
1340+
switch (CB.getIntrinsicID()) {
13321341
case Intrinsic::not_intrinsic:
13331342
break;
13341343
case Intrinsic::write_register:
@@ -1340,7 +1349,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13401349
->getOperand(0));
13411350
auto [Kind, RegIdx, NumRegs] =
13421351
AMDGPU::parseAsmPhysRegName(RegName->getString());
1343-
return Kind != 'a';
1352+
if (Kind == 'a')
1353+
Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1354+
1355+
return true;
13441356
}
13451357
default:
13461358
// Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1349,40 +1361,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13491361
}
13501362

13511363
// TODO: Handle callsite attributes
1352-
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
1353-
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
1354-
return CalleeInfo && CalleeInfo->isValidState() &&
1355-
CalleeInfo->getAssumed();
1364+
auto *CBEdges = A.getAAFor<AACallEdges>(
1365+
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1366+
if (!CBEdges || CBEdges->hasUnknownCallee()) {
1367+
Maximum.indicatePessimisticFixpoint();
1368+
return false;
1369+
}
1370+
1371+
for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1372+
const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1373+
*this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1374+
if (!CalleeInfo || !CalleeInfo->isValidState()) {
1375+
Maximum.indicatePessimisticFixpoint();
1376+
return false;
1377+
}
1378+
1379+
Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1380+
}
1381+
1382+
return true;
13561383
};
13571384

13581385
bool UsedAssumedInformation = false;
1359-
if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
1386+
if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
13601387
UsedAssumedInformation))
13611388
return indicatePessimisticFixpoint();
1362-
return ChangeStatus::UNCHANGED;
1389+
1390+
return clampStateAndIndicateChange(getState(), Maximum);
13631391
}
13641392

13651393
ChangeStatus manifest(Attributor &A) override {
1366-
if (!getAssumed())
1367-
return ChangeStatus::UNCHANGED;
13681394
LLVMContext &Ctx = getAssociatedFunction()->getContext();
1369-
return A.manifestAttrs(getIRPosition(),
1370-
{Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")});
1395+
SmallString<4> Buffer;
1396+
raw_svector_ostream OS(Buffer);
1397+
OS << getAssumed();
1398+
1399+
return A.manifestAttrs(
1400+
getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
13711401
}
13721402

1373-
StringRef getName() const override { return "AAAMDGPUNoAGPR"; }
1403+
StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
13741404
const char *getIdAddr() const override { return &ID; }
13751405

13761406
/// This function should return true if the type of the \p AA is
1377-
/// AAAMDGPUNoAGPRs
1407+
/// AAAMDGPUMinAGPRAllocs
13781408
static bool classof(const AbstractAttribute *AA) {
13791409
return (AA->getIdAddr() == &ID);
13801410
}
13811411

13821412
static const char ID;
13831413
};
13841414

1385-
const char AAAMDGPUNoAGPR::ID = 0;
1415+
const char AAAMDGPUMinAGPRAlloc::ID = 0;
13861416

13871417
/// An abstract attribute to propagate the function attribute
13881418
/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1550,10 +1580,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15501580
DenseSet<const char *> Allowed(
15511581
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15521582
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1553-
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1554-
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1555-
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1556-
&AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1583+
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1584+
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1585+
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1586+
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1587+
&AAAMDGPUClusterDims::ID});
15571588

15581589
AttributorConfig AC(CGUpdater);
15591590
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1595,7 +1626,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15951626
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
15961627

15971628
if (ST.hasGFX90AInsts())
1598-
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
1629+
A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
15991630

16001631
for (auto &I : instructions(F)) {
16011632
Value *Ptr = nullptr;

0 commit comments

Comments
 (0)