@@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1288
1288
return std::min (MaxVirtReg + MaxPhysReg, 256u );
1289
1289
}
1290
1290
1291
- // TODO: Migrate to range merge of amdgpu-agpr-alloc.
1292
- struct AAAMDGPUNoAGPR : public StateWrapper <BooleanState , AbstractAttribute> {
1293
- using Base = StateWrapper<BooleanState , AbstractAttribute>;
1294
- AAAMDGPUNoAGPR (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1291
+ struct AAAMDGPUMinAGPRAlloc
1292
+ : public StateWrapper<DecIntegerState<> , AbstractAttribute> {
1293
+ using Base = StateWrapper<DecIntegerState<> , AbstractAttribute>;
1294
+ AAAMDGPUMinAGPRAlloc (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1295
1295
1296
- static AAAMDGPUNoAGPR &createForPosition (const IRPosition &IRP,
1297
- Attributor &A) {
1296
+ static AAAMDGPUMinAGPRAlloc &createForPosition (const IRPosition &IRP,
1297
+ Attributor &A) {
1298
1298
if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1299
- return *new (A.Allocator ) AAAMDGPUNoAGPR (IRP, A);
1300
- llvm_unreachable (" AAAMDGPUNoAGPR is only valid for function position" );
1299
+ return *new (A.Allocator ) AAAMDGPUMinAGPRAlloc (IRP, A);
1300
+ llvm_unreachable (
1301
+ " AAAMDGPUMinAGPRAlloc is only valid for function position" );
1301
1302
}
1302
1303
1303
1304
void initialize (Attributor &A) override {
@@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1310
1311
}
1311
1312
1312
1313
const std::string getAsStr (Attributor *A) const override {
1313
- return getAssumed () ? " amdgpu-no-agpr" : " amdgpu-maybe-agpr" ;
1314
+ std::string Str = " amdgpu-agpr-alloc=" ;
1315
+ raw_string_ostream OS (Str);
1316
+ OS << getAssumed ();
1317
+ return OS.str ();
1314
1318
}
1315
1319
1316
1320
void trackStatistics () const override {}
1317
1321
1318
1322
ChangeStatus updateImpl (Attributor &A) override {
1319
- // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1323
+ DecIntegerState<> Maximum;
1320
1324
1321
- auto CheckForNoAGPRs = [&](Instruction &I) {
1325
+ // Check for cases which require allocation of AGPRs. The only cases where
1326
+ // AGPRs are required are if there are direct references to AGPRs, so inline
1327
+ // assembly and special intrinsics.
1328
+ auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1322
1329
const auto &CB = cast<CallBase>(I);
1323
1330
const Value *CalleeOp = CB.getCalledOperand ();
1324
- const Function *Callee = dyn_cast<Function>(CalleeOp);
1325
- if (!Callee) {
1326
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1327
- return inlineAsmGetNumRequiredAGPRs (IA, CB) == 0 ;
1328
- return false ;
1331
+
1332
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333
+ // Technically, the inline asm could be invoking a call to an unknown
1334
+ // external function that requires AGPRs, but ignore that.
1335
+ unsigned NumRegs = inlineAsmGetNumRequiredAGPRs (IA, CB);
1336
+ Maximum.takeAssumedMaximum (NumRegs);
1337
+ return true ;
1329
1338
}
1330
1339
1331
- switch (Callee-> getIntrinsicID ()) {
1340
+ switch (CB. getIntrinsicID ()) {
1332
1341
case Intrinsic::not_intrinsic:
1333
1342
break ;
1334
1343
case Intrinsic::write_register:
@@ -1340,7 +1349,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1340
1349
->getOperand (0 ));
1341
1350
auto [Kind, RegIdx, NumRegs] =
1342
1351
AMDGPU::parseAsmPhysRegName (RegName->getString ());
1343
- return Kind != ' a' ;
1352
+ if (Kind == ' a' )
1353
+ Maximum.takeAssumedMaximum (std::min (RegIdx + NumRegs, 256u ));
1354
+
1355
+ return true ;
1344
1356
}
1345
1357
default :
1346
1358
// Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1349,40 +1361,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1349
1361
}
1350
1362
1351
1363
// TODO: Handle callsite attributes
1352
- const auto *CalleeInfo = A.getAAFor <AAAMDGPUNoAGPR>(
1353
- *this , IRPosition::function (*Callee), DepClassTy::REQUIRED);
1354
- return CalleeInfo && CalleeInfo->isValidState () &&
1355
- CalleeInfo->getAssumed ();
1364
+ auto *CBEdges = A.getAAFor <AACallEdges>(
1365
+ *this , IRPosition::callsite_function (CB), DepClassTy::REQUIRED);
1366
+ if (!CBEdges || CBEdges->hasUnknownCallee ()) {
1367
+ Maximum.indicatePessimisticFixpoint ();
1368
+ return false ;
1369
+ }
1370
+
1371
+ for (const Function *PossibleCallee : CBEdges->getOptimisticEdges ()) {
1372
+ const auto *CalleeInfo = A.getAAFor <AAAMDGPUMinAGPRAlloc>(
1373
+ *this , IRPosition::function (*PossibleCallee), DepClassTy::REQUIRED);
1374
+ if (!CalleeInfo || !CalleeInfo->isValidState ()) {
1375
+ Maximum.indicatePessimisticFixpoint ();
1376
+ return false ;
1377
+ }
1378
+
1379
+ Maximum.takeAssumedMaximum (CalleeInfo->getAssumed ());
1380
+ }
1381
+
1382
+ return true ;
1356
1383
};
1357
1384
1358
1385
bool UsedAssumedInformation = false ;
1359
- if (!A.checkForAllCallLikeInstructions (CheckForNoAGPRs , *this ,
1386
+ if (!A.checkForAllCallLikeInstructions (CheckForMinAGPRAllocs , *this ,
1360
1387
UsedAssumedInformation))
1361
1388
return indicatePessimisticFixpoint ();
1362
- return ChangeStatus::UNCHANGED;
1389
+
1390
+ return clampStateAndIndicateChange (getState (), Maximum);
1363
1391
}
1364
1392
1365
1393
ChangeStatus manifest (Attributor &A) override {
1366
- if (!getAssumed ())
1367
- return ChangeStatus::UNCHANGED;
1368
1394
LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
1369
- return A.manifestAttrs (getIRPosition (),
1370
- {Attribute::get (Ctx, " amdgpu-agpr-alloc" , " 0" )});
1395
+ SmallString<4 > Buffer;
1396
+ raw_svector_ostream OS (Buffer);
1397
+ OS << getAssumed ();
1398
+
1399
+ return A.manifestAttrs (
1400
+ getIRPosition (), {Attribute::get (Ctx, " amdgpu-agpr-alloc" , OS.str ())});
1371
1401
}
1372
1402
1373
- StringRef getName () const override { return " AAAMDGPUNoAGPR " ; }
1403
+ StringRef getName () const override { return " AAAMDGPUMinAGPRAlloc " ; }
1374
1404
const char *getIdAddr () const override { return &ID; }
1375
1405
1376
1406
// / This function should return true if the type of the \p AA is
1377
- // / AAAMDGPUNoAGPRs
1407
+ // / AAAMDGPUMinAGPRAllocs
1378
1408
static bool classof (const AbstractAttribute *AA) {
1379
1409
return (AA->getIdAddr () == &ID);
1380
1410
}
1381
1411
1382
1412
static const char ID;
1383
1413
};
1384
1414
1385
- const char AAAMDGPUNoAGPR ::ID = 0 ;
1415
+ const char AAAMDGPUMinAGPRAlloc ::ID = 0 ;
1386
1416
1387
1417
// / An abstract attribute to propagate the function attribute
1388
1418
// / "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1550,10 +1580,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1550
1580
DenseSet<const char *> Allowed (
1551
1581
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1552
1582
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1553
- &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1554
- &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1555
- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1556
- &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1583
+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1584
+ &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1585
+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1586
+ &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1587
+ &AAAMDGPUClusterDims::ID});
1557
1588
1558
1589
AttributorConfig AC (CGUpdater);
1559
1590
AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1595,7 +1626,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1595
1626
A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
1596
1627
1597
1628
if (ST.hasGFX90AInsts ())
1598
- A.getOrCreateAAFor <AAAMDGPUNoAGPR >(IRPosition::function (*F));
1629
+ A.getOrCreateAAFor <AAAMDGPUMinAGPRAlloc >(IRPosition::function (*F));
1599
1630
1600
1631
for (auto &I : instructions (F)) {
1601
1632
Value *Ptr = nullptr ;
0 commit comments