|
13 | 13 | #include "AMDGPU.h"
|
14 | 14 | #include "GCNSubtarget.h"
|
15 | 15 | #include "Utils/AMDGPUBaseInfo.h"
|
| 16 | +#include "llvm/Analysis/TargetTransformInfo.h" |
16 | 17 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
17 | 18 | #include "llvm/IR/IntrinsicsR600.h"
|
18 | 19 | #include "llvm/Target/TargetMachine.h"
|
@@ -1296,6 +1297,116 @@ struct AAAMDGPUNoAGPR
|
1296 | 1297 |
|
1297 | 1298 | const char AAAMDGPUNoAGPR::ID = 0;
|
1298 | 1299 |
|
| 1300 | +struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> { |
| 1301 | + using Base = StateWrapper<BooleanState, AbstractAttribute>; |
| 1302 | + AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
| 1303 | + |
| 1304 | + /// Create an abstract attribute view for the position \p IRP. |
| 1305 | + static AAAMDGPUUniform &createForPosition(const IRPosition &IRP, |
| 1306 | + Attributor &A); |
| 1307 | + |
| 1308 | + /// See AbstractAttribute::getName() |
| 1309 | + StringRef getName() const override { return "AAAMDGPUUniform"; } |
| 1310 | + |
| 1311 | + const std::string getAsStr(Attributor *A) const override { |
| 1312 | + return getAssumed() ? "uniform" : "divergent"; |
| 1313 | + } |
| 1314 | + |
| 1315 | + void trackStatistics() const override {} |
| 1316 | + |
| 1317 | + /// See AbstractAttribute::getIdAddr() |
| 1318 | + const char *getIdAddr() const override { return &ID; } |
| 1319 | + |
| 1320 | + /// This function should return true if the type of the \p AA is |
| 1321 | + /// AAAMDGPUUniform |
| 1322 | + static bool classof(const AbstractAttribute *AA) { |
| 1323 | + return (AA->getIdAddr() == &ID); |
| 1324 | + } |
| 1325 | + |
| 1326 | + /// Unique ID (due to the unique address) |
| 1327 | + static const char ID; |
| 1328 | +}; |
| 1329 | + |
| 1330 | +const char AAAMDGPUUniform::ID = 0; |
| 1331 | + |
| 1332 | +/// This AA is to infer the inreg attribute for a function argument. |
| 1333 | +struct AAAMDGPUUniformArgument : public AAAMDGPUUniform { |
| 1334 | + AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A) |
| 1335 | + : AAAMDGPUUniform(IRP, A) {} |
| 1336 | + |
| 1337 | + void initialize(Attributor &A) override { |
| 1338 | + Argument *Arg = getAssociatedArgument(); |
| 1339 | + CallingConv::ID CC = Arg->getParent()->getCallingConv(); |
| 1340 | + if (Arg->hasAttribute(Attribute::InReg)) { |
| 1341 | + indicateOptimisticFixpoint(); |
| 1342 | + return; |
| 1343 | + } |
| 1344 | + |
| 1345 | + if (AMDGPU::isEntryFunctionCC(CC)) { |
| 1346 | + // We only use isArgPassedInSGPR on kernel entry function argument, so |
| 1347 | + // even if we will use SPGR for non-uniform i1 argument passing, it will |
| 1348 | + // not affect this. |
| 1349 | + if (AMDGPU::isArgPassedInSGPR(Arg)) |
| 1350 | + indicateOptimisticFixpoint(); |
| 1351 | + else |
| 1352 | + indicatePessimisticFixpoint(); |
| 1353 | + } |
| 1354 | + } |
| 1355 | + |
| 1356 | + ChangeStatus updateImpl(Attributor &A) override { |
| 1357 | + unsigned ArgNo = getAssociatedArgument()->getArgNo(); |
| 1358 | + |
| 1359 | + auto isUniform = [&](AbstractCallSite ACS) -> bool { |
| 1360 | + CallBase *CB = ACS.getInstruction(); |
| 1361 | + Value *V = CB->getArgOperand(ArgNo); |
| 1362 | + if (isa<Constant>(V)) |
| 1363 | + return true; |
| 1364 | + if (auto *Arg = dyn_cast<Argument>(V)) { |
| 1365 | + auto *AA = A.getOrCreateAAFor<AAAMDGPUUniform>( |
| 1366 | + IRPosition::argument(*Arg), this, DepClassTy::REQUIRED); |
| 1367 | + return AA && AA->isValidState(); |
| 1368 | + } |
| 1369 | + const TargetTransformInfo *TTI = |
| 1370 | + A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>( |
| 1371 | + *CB->getFunction()); |
| 1372 | + return TTI->isAlwaysUniform(V); |
| 1373 | + }; |
| 1374 | + |
| 1375 | + bool UsedAssumedInformation = true; |
| 1376 | + if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true, |
| 1377 | + UsedAssumedInformation)) |
| 1378 | + return indicatePessimisticFixpoint(); |
| 1379 | + |
| 1380 | + if (!UsedAssumedInformation) |
| 1381 | + return indicateOptimisticFixpoint(); |
| 1382 | + |
| 1383 | + return ChangeStatus::UNCHANGED; |
| 1384 | + } |
| 1385 | + |
| 1386 | + ChangeStatus manifest(Attributor &A) override { |
| 1387 | + Argument *Arg = getAssociatedArgument(); |
| 1388 | + // If the argument already has inreg attribute, we will not do anything |
| 1389 | + // about it. |
| 1390 | + if (Arg->hasAttribute(Attribute::InReg)) |
| 1391 | + return ChangeStatus::UNCHANGED; |
| 1392 | + if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv())) |
| 1393 | + return ChangeStatus::UNCHANGED; |
| 1394 | + LLVMContext &Ctx = Arg->getContext(); |
| 1395 | + return A.manifestAttrs(getIRPosition(), |
| 1396 | + {Attribute::get(Ctx, Attribute::InReg)}); |
| 1397 | + } |
| 1398 | +}; |
| 1399 | + |
| 1400 | +AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP, |
| 1401 | + Attributor &A) { |
| 1402 | + switch (IRP.getPositionKind()) { |
| 1403 | + case IRPosition::IRP_ARGUMENT: |
| 1404 | + return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A); |
| 1405 | + default: |
| 1406 | + llvm_unreachable("not a valid position for AAAMDGPUUniform"); |
| 1407 | + } |
| 1408 | +} |
| 1409 | + |
1299 | 1410 | /// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
|
1300 | 1411 | /// based on the finalized 'amdgpu-flat-work-group-size' attribute.
|
1301 | 1412 | /// Both attributes start with narrow ranges that expand during iteration.
|
@@ -1382,7 +1493,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1382 | 1493 | &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
|
1383 | 1494 | &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
|
1384 | 1495 | &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
|
1385 |
| - &AAIndirectCallInfo::ID}); |
| 1496 | + &AAIndirectCallInfo::ID, &AAAMDGPUUniform::ID}); |
1386 | 1497 |
|
1387 | 1498 | AttributorConfig AC(CGUpdater);
|
1388 | 1499 | AC.IsClosedWorldModule = Options.IsClosedWorld;
|
@@ -1435,6 +1546,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1435 | 1546 | A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
|
1436 | 1547 | A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
|
1437 | 1548 | }
|
| 1549 | + |
| 1550 | + if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { |
| 1551 | + for (auto &Arg : F->args()) |
| 1552 | + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg)); |
| 1553 | + } |
1438 | 1554 | }
|
1439 | 1555 | }
|
1440 | 1556 |
|
|
0 commit comments