@@ -77,6 +77,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
77
77
case Intrinsic::amdgcn_workgroup_id_z:
78
78
case Intrinsic::r600_read_tgid_z:
79
79
return WORKGROUP_ID_Z;
80
+ case Intrinsic::amdgcn_cluster_id_x:
81
+ NonKernelOnly = true ;
82
+ return CLUSTER_ID_X;
83
+ case Intrinsic::amdgcn_cluster_id_y:
84
+ return CLUSTER_ID_Y;
85
+ case Intrinsic::amdgcn_cluster_id_z:
86
+ return CLUSTER_ID_Z;
80
87
case Intrinsic::amdgcn_lds_kernel_id:
81
88
return LDS_KERNEL_ID;
82
89
case Intrinsic::amdgcn_dispatch_ptr:
@@ -1296,6 +1303,157 @@ struct AAAMDGPUNoAGPR
1296
1303
1297
1304
const char AAAMDGPUNoAGPR::ID = 0 ;
1298
1305
1306
+ // / An abstract attribute to propagate the function attribute
1307
+ // / "amdgpu-cluster-dims" from kernel entry functions to device functions.
1308
+ struct AAAMDGPUClusterDims
1309
+ : public StateWrapper<BooleanState, AbstractAttribute> {
1310
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
1311
+ AAAMDGPUClusterDims (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1312
+
1313
+ // / Create an abstract attribute view for the position \p IRP.
1314
+ static AAAMDGPUClusterDims &createForPosition (const IRPosition &IRP,
1315
+ Attributor &A);
1316
+
1317
+ // / See AbstractAttribute::getName().
1318
+ StringRef getName () const override { return " AAAMDGPUClusterDims" ; }
1319
+
1320
+ // / See AbstractAttribute::getIdAddr().
1321
+ const char *getIdAddr () const override { return &ID; }
1322
+
1323
+ // / This function should return true if the type of the \p AA is
1324
+ // / AAAMDGPUClusterDims.
1325
+ static bool classof (const AbstractAttribute *AA) {
1326
+ return AA->getIdAddr () == &ID;
1327
+ }
1328
+
1329
+ virtual const AMDGPU::ClusterDimsAttr &getClusterDims () const = 0;
1330
+
1331
+ // / Unique ID (due to the unique address)
1332
+ static const char ID;
1333
+ };
1334
+
1335
+ const char AAAMDGPUClusterDims::ID = 0 ;
1336
+
1337
+ struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1338
+ AAAMDGPUClusterDimsFunction (const IRPosition &IRP, Attributor &A)
1339
+ : AAAMDGPUClusterDims(IRP, A) {}
1340
+
1341
+ void initialize (Attributor &A) override {
1342
+ Function *F = getAssociatedFunction ();
1343
+ assert (F && " empty associated function" );
1344
+
1345
+ Attr = AMDGPU::ClusterDimsAttr::get (*F);
1346
+
1347
+ // No matter what a kernel function has, it is final.
1348
+ if (AMDGPU::isEntryFunctionCC (F->getCallingConv ())) {
1349
+ if (Attr.isUnknown ())
1350
+ indicatePessimisticFixpoint ();
1351
+ else
1352
+ indicateOptimisticFixpoint ();
1353
+ }
1354
+ }
1355
+
1356
+ const std::string getAsStr (Attributor *A) const override {
1357
+ if (!getAssumed () || Attr.isUnknown ())
1358
+ return " unknown" ;
1359
+ if (Attr.isNoCluster ())
1360
+ return " no" ;
1361
+ if (Attr.isVariableDims ())
1362
+ return " variable" ;
1363
+ return Attr.to_string ();
1364
+ }
1365
+
1366
+ void trackStatistics () const override {}
1367
+
1368
+ ChangeStatus updateImpl (Attributor &A) override {
1369
+ auto OldState = Attr;
1370
+
1371
+ auto CheckCallSite = [&](AbstractCallSite CS) {
1372
+ const auto *CallerAA = A.getAAFor <AAAMDGPUClusterDims>(
1373
+ *this , IRPosition::function (*CS.getInstruction ()->getFunction ()),
1374
+ DepClassTy::REQUIRED);
1375
+ if (!CallerAA || !CallerAA->isValidState ())
1376
+ return false ;
1377
+
1378
+ return merge (CallerAA->getClusterDims ());
1379
+ };
1380
+
1381
+ bool UsedAssumedInformation = false ;
1382
+ if (!A.checkForAllCallSites (CheckCallSite, *this ,
1383
+ /* RequireAllCallSites=*/ true ,
1384
+ UsedAssumedInformation))
1385
+ return indicatePessimisticFixpoint ();
1386
+
1387
+ return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1388
+ }
1389
+
1390
+ ChangeStatus manifest (Attributor &A) override {
1391
+ if (Attr.isUnknown ())
1392
+ return ChangeStatus::UNCHANGED;
1393
+ return A.manifestAttrs (
1394
+ getIRPosition (),
1395
+ {Attribute::get (getAssociatedFunction ()->getContext (), AttrName,
1396
+ Attr.to_string ())},
1397
+ /* ForceReplace=*/ true );
1398
+ }
1399
+
1400
+ const AMDGPU::ClusterDimsAttr &getClusterDims () const override {
1401
+ return Attr;
1402
+ }
1403
+
1404
+ private:
1405
+ bool merge (const AMDGPU::ClusterDimsAttr &Other) {
1406
+ // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1407
+ // propagation.
1408
+ if (Attr.isUnknown () && Other.isUnknown ())
1409
+ return true ;
1410
+
1411
+ // Case 2: The other is determined, but we are unknown yet, we simply take
1412
+ // the other's value.
1413
+ if (Attr.isUnknown ()) {
1414
+ Attr = Other;
1415
+ return true ;
1416
+ }
1417
+
1418
+ // Case 3: We are determined but the other is unknown yet, we simply keep
1419
+ // everything unchanged.
1420
+ if (Other.isUnknown ())
1421
+ return true ;
1422
+
1423
+ // After this point, both are determined.
1424
+
1425
+ // Case 4: If they are same, we do nothing.
1426
+ if (Attr == Other)
1427
+ return true ;
1428
+
1429
+ // Now they are not same.
1430
+
1431
+ // Case 5: If either of us uses cluster (but not both; otherwise case 4
1432
+ // would hold), then it is unknown whether cluster will be used, and the
1433
+ // state is final, unlike case 1.
1434
+ if (Attr.isNoCluster () || Other.isNoCluster ()) {
1435
+ Attr.setUnknown ();
1436
+ return false ;
1437
+ }
1438
+
1439
+ // Case 6: Both of us use cluster, but the dims are different, so the result
1440
+ // is, cluster is used, but we just don't have a fixed dims.
1441
+ Attr.setVariableDims ();
1442
+ return true ;
1443
+ }
1444
+
1445
+ AMDGPU::ClusterDimsAttr Attr;
1446
+
1447
+ static constexpr const char AttrName[] = " amdgpu-cluster-dims" ;
1448
+ };
1449
+
1450
+ AAAMDGPUClusterDims &
1451
+ AAAMDGPUClusterDims::createForPosition (const IRPosition &IRP, Attributor &A) {
1452
+ if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1453
+ return *new (A.Allocator ) AAAMDGPUClusterDimsFunction (IRP, A);
1454
+ llvm_unreachable (" AAAMDGPUClusterDims is only valid for function position" );
1455
+ }
1456
+
1299
1457
static bool runImpl (Module &M, AnalysisGetter &AG, TargetMachine &TM,
1300
1458
AMDGPUAttributorOptions Options,
1301
1459
ThinOrFullLTOPhase LTOPhase) {
@@ -1314,7 +1472,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1314
1472
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1315
1473
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1316
1474
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1317
- &AAIndirectCallInfo::ID});
1475
+ &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID });
1318
1476
1319
1477
AttributorConfig AC (CGUpdater);
1320
1478
AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1352,6 +1510,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1352
1510
A.getOrCreateAAFor <AAAMDWavesPerEU>(IRPosition::function (*F));
1353
1511
}
1354
1512
1513
+ const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(*F);
1514
+ if (!F->isDeclaration () && ST.hasClusters ())
1515
+ A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
1516
+
1355
1517
for (auto &I : instructions (F)) {
1356
1518
Value *Ptr = nullptr ;
1357
1519
if (auto *LI = dyn_cast<LoadInst>(&I))
0 commit comments