@@ -265,6 +265,18 @@ class AMDGPUInformationCache : public InformationCache {
265265 return !HasAperture && (Access & ADDR_SPACE_CAST);
266266 }
267267
268+ bool checkConstForAddrSpaceCastFromPrivate (const Constant *C) {
269+ SmallPtrSet<const Constant *, 8 > Visited;
270+ uint8_t Access = getConstantAccess (C, Visited);
271+
272+ if (Access & ADDR_SPACE_CAST)
273+ if (const auto *CE = dyn_cast<ConstantExpr>(C))
274+ if (CE->getOperand (0 )->getType ()->getPointerAddressSpace () ==
275+ AMDGPUAS::PRIVATE_ADDRESS)
276+ return true ;
277+ return false ;
278+ }
279+
268280private:
269281 // / Used to determine if the Constant needs the queue pointer.
270282 DenseMap<const Constant *, uint8_t > ConstantStatus;
@@ -529,6 +541,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
529541 if (isAssumed (COMPLETION_ACTION) && funcRetrievesCompletionAction (A, COV))
530542 removeAssumedBits (COMPLETION_ACTION);
531543
544+ if (isAssumed (FLAT_SCRATCH_INIT) && needFlatScratchInit (A))
545+ removeAssumedBits (FLAT_SCRATCH_INIT);
546+
532547 return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
533548 : ChangeStatus::UNCHANGED;
534549 }
@@ -687,6 +702,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
687702 return !A.checkForAllCallLikeInstructions (DoesNotRetrieve, *this ,
688703 UsedAssumedInformation);
689704 }
705+
706+ // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
707+ // not to be set.
708+ bool needFlatScratchInit (Attributor &A) {
709+ assert (isAssumed (FLAT_SCRATCH_INIT)); // only called if the bit is still set
710+
711+ // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
712+ // there is a cast from PRIVATE_ADDRESS.
713+ auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
714+ return cast<AddrSpaceCastInst>(I).getSrcAddressSpace () !=
715+ AMDGPUAS::PRIVATE_ADDRESS;
716+ };
717+
718+ bool UsedAssumedInformation = false ;
719+ if (!A.checkForAllInstructions (AddrSpaceCastNotFromPrivate, *this ,
720+ {Instruction::AddrSpaceCast},
721+ UsedAssumedInformation))
722+ return true ;
723+
724+ // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
725+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
726+
727+ Function *F = getAssociatedFunction ();
728+ for (Instruction &I : instructions (F)) {
729+ for (const Use &U : I.operands ()) {
730+ if (const auto *C = dyn_cast<Constant>(U)) {
731+ if (InfoCache.checkConstForAddrSpaceCastFromPrivate (C))
732+ return true ;
733+ }
734+ }
735+ }
736+
737+ // Finally check callees.
738+
739+ // This is called on each callee; false means callee shouldn't have
740+ // no-flat-scratch-init.
741+ auto CheckForNoFlatScratchInit = [&](Instruction &I) {
742+ const auto &CB = cast<CallBase>(I);
743+ const Function *Callee = CB.getCalledFunction ();
744+
745+ // Callee == 0 for inline asm or indirect call with known callees.
746+ // In the latter case, updateImpl() already checked the callees and we
747+ // know their FLAT_SCRATCH_INIT bit is set.
748+ // If function has indirect call with unknown callees, the bit is
749+ // already removed in updateImpl() and execution won't reach here.
750+ if (!Callee)
751+ return true ;
752+
753+ return Callee->getIntrinsicID () !=
754+ Intrinsic::amdgcn_addrspacecast_nonnull;
755+ };
756+
757+ UsedAssumedInformation = false ;
758+ // If any callee is false (i.e. need FlatScratchInit),
759+ // checkForAllCallLikeInstructions returns false, in which case this
760+ // function returns true.
761+ return !A.checkForAllCallLikeInstructions (CheckForNoFlatScratchInit, *this ,
762+ UsedAssumedInformation);
763+ }
690764};
691765
692766AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
0 commit comments