@@ -1273,16 +1273,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12731273 return std::min (std::max (MaxVirtReg, MaxPhysReg), 256u );
12741274}
12751275
1276- // TODO: Migrate to range merge of amdgpu-agpr-alloc.
1277- struct AAAMDGPUNoAGPR : public StateWrapper <BooleanState , AbstractAttribute> {
1278- using Base = StateWrapper<BooleanState , AbstractAttribute>;
1279- AAAMDGPUNoAGPR (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1276+ struct AAAMDGPUMinAGPRAlloc
1277+ : public StateWrapper<DecIntegerState<> , AbstractAttribute> {
1278+ using Base = StateWrapper<DecIntegerState<> , AbstractAttribute>;
1279+ AAAMDGPUMinAGPRAlloc (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12801280
1281- static AAAMDGPUNoAGPR &createForPosition (const IRPosition &IRP,
1282- Attributor &A) {
1281+ static AAAMDGPUMinAGPRAlloc &createForPosition (const IRPosition &IRP,
1282+ Attributor &A) {
12831283 if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1284- return *new (A.Allocator ) AAAMDGPUNoAGPR (IRP, A);
1285- llvm_unreachable (" AAAMDGPUNoAGPR is only valid for function position" );
1284+ return *new (A.Allocator ) AAAMDGPUMinAGPRAlloc (IRP, A);
1285+ llvm_unreachable (
1286+ " AAAMDGPUMinAGPRAlloc is only valid for function position" );
12861287 }
12871288
12881289 void initialize (Attributor &A) override {
@@ -1295,25 +1296,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
12951296 }
12961297
12971298 const std::string getAsStr (Attributor *A) const override {
1298- return getAssumed () ? " amdgpu-no-agpr" : " amdgpu-maybe-agpr" ;
1299+ std::string Str = " amdgpu-agpr-alloc=" ;
1300+ raw_string_ostream OS (Str);
1301+ OS << getAssumed ();
1302+ return OS.str ();
12991303 }
13001304
13011305 void trackStatistics () const override {}
13021306
13031307 ChangeStatus updateImpl (Attributor &A) override {
1304- // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1308+ DecIntegerState<> Maximum;
13051309
1306- auto CheckForNoAGPRs = [&](Instruction &I) {
1310+ // Check for cases which require allocation of AGPRs. The only cases where
1311+ // AGPRs are required are if there are direct references to AGPRs, so inline
1312+ // assembly and special intrinsics.
1313+ auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13071314 const auto &CB = cast<CallBase>(I);
13081315 const Value *CalleeOp = CB.getCalledOperand ();
1309- const Function *Callee = dyn_cast<Function>(CalleeOp);
1310- if (!Callee) {
1311- if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1312- return inlineAsmGetNumRequiredAGPRs (IA, CB) == 0 ;
1313- return false ;
1316+
1317+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1318+ // Technically, the inline asm could be invoking a call to an unknown
1319+ // external function that requires AGPRs, but ignore that.
1320+ unsigned NumRegs = inlineAsmGetNumRequiredAGPRs (IA, CB);
1321+ Maximum.takeAssumedMaximum (NumRegs);
1322+ return true ;
13141323 }
13151324
1316- switch (Callee-> getIntrinsicID ()) {
1325+ switch (CB. getIntrinsicID ()) {
13171326 case Intrinsic::not_intrinsic:
13181327 break ;
13191328 case Intrinsic::write_register:
@@ -1323,7 +1332,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13231332 cast<MetadataAsValue>(CB.getArgOperand (0 ))->getMetadata ());
13241333 auto [Kind, RegIdx, NumRegs] =
13251334 AMDGPU::parseAsmPhysRegName (RegName->getString ());
1326- return Kind != ' a' ;
1335+ if (Kind == ' a' )
1336+ Maximum.takeAssumedMaximum (std::min (RegIdx + NumRegs, 256u ));
1337+
1338+ return true ;
13271339 }
13281340 default :
13291341 // Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1332,40 +1344,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13321344 }
13331345
13341346 // TODO: Handle callsite attributes
1335- const auto *CalleeInfo = A.getAAFor <AAAMDGPUNoAGPR>(
1336- *this , IRPosition::function (*Callee), DepClassTy::REQUIRED);
1337- return CalleeInfo && CalleeInfo->isValidState () &&
1338- CalleeInfo->getAssumed ();
1347+ auto *CBEdges = A.getAAFor <AACallEdges>(
1348+ *this , IRPosition::callsite_function (CB), DepClassTy::REQUIRED);
1349+ if (!CBEdges || CBEdges->hasUnknownCallee ()) {
1350+ Maximum.indicatePessimisticFixpoint ();
1351+ return false ;
1352+ }
1353+
1354+ for (const Function *PossibleCallee : CBEdges->getOptimisticEdges ()) {
1355+ const auto *CalleeInfo = A.getAAFor <AAAMDGPUMinAGPRAlloc>(
1356+ *this , IRPosition::function (*PossibleCallee), DepClassTy::REQUIRED);
1357+ if (!CalleeInfo || !CalleeInfo->isValidState ()) {
1358+ Maximum.indicatePessimisticFixpoint ();
1359+ return false ;
1360+ }
1361+
1362+ Maximum.takeAssumedMaximum (CalleeInfo->getAssumed ());
1363+ }
1364+
1365+ return true ;
13391366 };
13401367
13411368 bool UsedAssumedInformation = false ;
1342- if (!A.checkForAllCallLikeInstructions (CheckForNoAGPRs , *this ,
1369+ if (!A.checkForAllCallLikeInstructions (CheckForMinAGPRAllocs , *this ,
13431370 UsedAssumedInformation))
13441371 return indicatePessimisticFixpoint ();
1345- return ChangeStatus::UNCHANGED;
1372+
1373+ return clampStateAndIndicateChange (getState (), Maximum);
13461374 }
13471375
13481376 ChangeStatus manifest (Attributor &A) override {
1349- if (!getAssumed ())
1350- return ChangeStatus::UNCHANGED;
13511377 LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
1352- return A.manifestAttrs (getIRPosition (),
1353- {Attribute::get (Ctx, " amdgpu-agpr-alloc" , " 0" )});
1378+ SmallString<4 > Buffer;
1379+ raw_svector_ostream OS (Buffer);
1380+ OS << getAssumed ();
1381+
1382+ return A.manifestAttrs (
1383+ getIRPosition (), {Attribute::get (Ctx, " amdgpu-agpr-alloc" , OS.str ())});
13541384 }
13551385
1356- StringRef getName () const override { return " AAAMDGPUNoAGPR " ; }
1386+ StringRef getName () const override { return " AAAMDGPUMinAGPRAlloc " ; }
13571387 const char *getIdAddr () const override { return &ID; }
13581388
13591389 // / This function should return true if the type of the \p AA is
1360- // / AAAMDGPUNoAGPRs
1390+ // / AAAMDGPUMinAGPRAllocs
13611391 static bool classof (const AbstractAttribute *AA) {
13621392 return (AA->getIdAddr () == &ID);
13631393 }
13641394
13651395 static const char ID;
13661396};
13671397
1368- const char AAAMDGPUNoAGPR ::ID = 0 ;
1398+ const char AAAMDGPUMinAGPRAlloc ::ID = 0 ;
13691399
13701400// / An abstract attribute to propagate the function attribute
13711401// / "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1533,10 +1563,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15331563 DenseSet<const char *> Allowed (
15341564 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15351565 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1536- &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1537- &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1538- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1539- &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1566+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1567+ &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1568+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1569+ &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1570+ &AAAMDGPUClusterDims::ID});
15401571
15411572 AttributorConfig AC (CGUpdater);
15421573 AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1578,7 +1609,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15781609 A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
15791610
15801611 if (ST.hasGFX90AInsts ())
1581- A.getOrCreateAAFor <AAAMDGPUNoAGPR >(IRPosition::function (*F));
1612+ A.getOrCreateAAFor <AAAMDGPUMinAGPRAlloc >(IRPosition::function (*F));
15821613
15831614 for (auto &I : instructions (F)) {
15841615 Value *Ptr = nullptr ;
0 commit comments