Skip to content

Commit 2886cd6

Browse files
committed
AMDGPU: Render non-0 values for amdgpu-agpr-alloc
This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
1 parent 34b16dd commit 2886cd6

File tree

2 files changed

+380
-88
lines changed

2 files changed

+380
-88
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 67 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,16 +1273,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12731273
return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
12741274
}
12751275

1276-
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
1277-
struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1278-
using Base = StateWrapper<BooleanState, AbstractAttribute>;
1279-
AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1276+
struct AAAMDGPUMinAGPRAlloc
1277+
: public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1278+
using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1279+
AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12801280

1281-
static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
1282-
Attributor &A) {
1281+
static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1282+
Attributor &A) {
12831283
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1284-
return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
1285-
llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
1284+
return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1285+
llvm_unreachable(
1286+
"AAAMDGPUMinAGPRAlloc is only valid for function position");
12861287
}
12871288

12881289
void initialize(Attributor &A) override {
@@ -1295,25 +1296,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
12951296
}
12961297

12971298
const std::string getAsStr(Attributor *A) const override {
1298-
return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
1299+
std::string Str = "amdgpu-agpr-alloc=";
1300+
raw_string_ostream OS(Str);
1301+
OS << getAssumed();
1302+
return OS.str();
12991303
}
13001304

13011305
void trackStatistics() const override {}
13021306

13031307
ChangeStatus updateImpl(Attributor &A) override {
1304-
// TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1308+
DecIntegerState<> Maximum;
13051309

1306-
auto CheckForNoAGPRs = [&](Instruction &I) {
1310+
// Check for cases which require allocation of AGPRs. The only cases where
1311+
// AGPRs are required are if there are direct references to AGPRs, so inline
1312+
// assembly and special intrinsics.
1313+
auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13071314
const auto &CB = cast<CallBase>(I);
13081315
const Value *CalleeOp = CB.getCalledOperand();
1309-
const Function *Callee = dyn_cast<Function>(CalleeOp);
1310-
if (!Callee) {
1311-
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1312-
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
1313-
return false;
1316+
1317+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1318+
// Technically, the inline asm could be invoking a call to an unknown
1319+
// external function that requires AGPRs, but ignore that.
1320+
unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1321+
Maximum.takeAssumedMaximum(NumRegs);
1322+
return true;
13141323
}
13151324

1316-
switch (Callee->getIntrinsicID()) {
1325+
switch (CB.getIntrinsicID()) {
13171326
case Intrinsic::not_intrinsic:
13181327
break;
13191328
case Intrinsic::write_register:
@@ -1323,7 +1332,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13231332
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
13241333
auto [Kind, RegIdx, NumRegs] =
13251334
AMDGPU::parseAsmPhysRegName(RegName->getString());
1326-
return Kind != 'a';
1335+
if (Kind == 'a')
1336+
Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1337+
1338+
return true;
13271339
}
13281340
default:
13291341
// Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1332,40 +1344,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13321344
}
13331345

13341346
// TODO: Handle callsite attributes
1335-
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
1336-
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
1337-
return CalleeInfo && CalleeInfo->isValidState() &&
1338-
CalleeInfo->getAssumed();
1347+
auto *CBEdges = A.getAAFor<AACallEdges>(
1348+
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1349+
if (!CBEdges || CBEdges->hasUnknownCallee()) {
1350+
Maximum.indicatePessimisticFixpoint();
1351+
return false;
1352+
}
1353+
1354+
for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1355+
const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1356+
*this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1357+
if (!CalleeInfo || !CalleeInfo->isValidState()) {
1358+
Maximum.indicatePessimisticFixpoint();
1359+
return false;
1360+
}
1361+
1362+
Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1363+
}
1364+
1365+
return true;
13391366
};
13401367

13411368
bool UsedAssumedInformation = false;
1342-
if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
1369+
if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
13431370
UsedAssumedInformation))
13441371
return indicatePessimisticFixpoint();
1345-
return ChangeStatus::UNCHANGED;
1372+
1373+
return clampStateAndIndicateChange(getState(), Maximum);
13461374
}
13471375

13481376
ChangeStatus manifest(Attributor &A) override {
1349-
if (!getAssumed())
1350-
return ChangeStatus::UNCHANGED;
13511377
LLVMContext &Ctx = getAssociatedFunction()->getContext();
1352-
return A.manifestAttrs(getIRPosition(),
1353-
{Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")});
1378+
SmallString<4> Buffer;
1379+
raw_svector_ostream OS(Buffer);
1380+
OS << getAssumed();
1381+
1382+
return A.manifestAttrs(
1383+
getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
13541384
}
13551385

1356-
StringRef getName() const override { return "AAAMDGPUNoAGPR"; }
1386+
StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
13571387
const char *getIdAddr() const override { return &ID; }
13581388

13591389
/// This function should return true if the type of the \p AA is
1360-
/// AAAMDGPUNoAGPRs
1390+
/// AAAMDGPUMinAGPRAllocs
13611391
static bool classof(const AbstractAttribute *AA) {
13621392
return (AA->getIdAddr() == &ID);
13631393
}
13641394

13651395
static const char ID;
13661396
};
13671397

1368-
const char AAAMDGPUNoAGPR::ID = 0;
1398+
const char AAAMDGPUMinAGPRAlloc::ID = 0;
13691399

13701400
/// An abstract attribute to propagate the function attribute
13711401
/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1533,10 +1563,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15331563
DenseSet<const char *> Allowed(
15341564
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15351565
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1536-
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1537-
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1538-
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1539-
&AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1566+
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1567+
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1568+
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1569+
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1570+
&AAAMDGPUClusterDims::ID});
15401571

15411572
AttributorConfig AC(CGUpdater);
15421573
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1578,7 +1609,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15781609
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
15791610

15801611
if (ST.hasGFX90AInsts())
1581-
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
1612+
A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
15821613

15831614
for (auto &I : instructions(F)) {
15841615
Value *Ptr = nullptr;

0 commit comments

Comments
 (0)