@@ -144,6 +144,20 @@ static bool funcRequiresHostcallPtr(const Function &F) {
144144}
145145
146146namespace {
147+
148+ std::optional<std::pair<unsigned , unsigned >> static parseRangeAttribute (
149+ StringRef Attr, bool OnlyFirstRequired = false ) {
150+ std::pair<unsigned , unsigned > Val;
151+ std::pair<StringRef, StringRef> Strs = Attr.split (' ,' );
152+ if (Strs.first .trim ().getAsInteger (0 , Val.first ))
153+ return std::nullopt ;
154+ if (Strs.second .trim ().getAsInteger (0 , Val.second )) {
155+ if (!OnlyFirstRequired || !Strs.second .trim ().empty ())
156+ return std::nullopt ;
157+ }
158+ return Val;
159+ }
160+
147161class AMDGPUInformationCache : public InformationCache {
148162public:
149163 AMDGPUInformationCache (const Module &M, AnalysisGetter &AG,
@@ -168,9 +182,18 @@ class AMDGPUInformationCache : public InformationCache {
168182 return ST.supportsGetDoorbellID ();
169183 }
170184
171- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
185+ std::optional<std::pair<unsigned , unsigned >>
186+ getFlatWorkGroupSizeAttr (const Function &F) const {
187+ Attribute Attr = F.getFnAttribute (" amdgpu-flat-work-group-size" );
188+ if (!Attr.isStringAttribute ())
189+ return std::nullopt ;
190+ return parseRangeAttribute (Attr.getValueAsString ());
191+ }
192+
193+ std::pair<unsigned , unsigned >
194+ getDefaultFlatWorkGroupSize (const Function &F) const {
172195 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173- return ST.getFlatWorkGroupSizes (F );
196+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174197 }
175198
176199 std::pair<unsigned , unsigned >
@@ -733,6 +756,35 @@ struct AAAMDSizeRangeAttribute
733756 return Change;
734757 }
735758
759+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
760+ // / attribute if it is not same as default.
761+ ChangeStatus
762+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
763+ std::pair<unsigned , unsigned > Default) {
764+ auto [Min, Max] = Default;
765+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
766+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
767+
768+ // Clamp the range to the default value.
769+ if (Lower < Min)
770+ Lower = Min;
771+ if (Upper > Max + 1 )
772+ Upper = Max + 1 ;
773+
774+ // No manifest if the value is invalid or same as default after clamp.
775+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
776+ return ChangeStatus::UNCHANGED;
777+
778+ Function *F = getAssociatedFunction ();
779+ LLVMContext &Ctx = F->getContext ();
780+ SmallString<10 > Buffer;
781+ raw_svector_ostream OS (Buffer);
782+ OS << Lower << ' ,' << Upper - 1 ;
783+ return A.manifestAttrs (getIRPosition (),
784+ {Attribute::get (Ctx, AttrName, OS.str ())},
785+ /* ForceReplace=*/ true );
786+ }
787+
736788 ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
737789 unsigned Max) {
738790 // Don't add the attribute if it's the implied default.
@@ -767,13 +819,21 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
767819 void initialize (Attributor &A) override {
768820 Function *F = getAssociatedFunction ();
769821 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
770- unsigned MinGroupSize, MaxGroupSize;
771- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
772- intersectKnown (
773- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
774822
775- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
776- indicatePessimisticFixpoint ();
823+ bool HasAttr = false ;
824+ auto [Min, Max] = InfoCache.getDefaultFlatWorkGroupSize (*F);
825+
826+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
827+ std::tie (Min, Max) = *Attr;
828+ HasAttr = true ;
829+ }
830+
831+ ConstantRange Range (APInt (32 , Min), APInt (32 , Max + 1 ));
832+ IntegerRangeState RangeState (Range);
833+ clampStateAndIndicateChange (this ->getState (), RangeState);
834+
835+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
836+ indicateOptimisticFixpoint ();
777837 }
778838
779839 ChangeStatus updateImpl (Attributor &A) override {
@@ -787,9 +847,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
787847 ChangeStatus manifest (Attributor &A) override {
788848 Function *F = getAssociatedFunction ();
789849 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
790- unsigned Min, Max;
791- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
792- return emitAttributeIfNotDefault (A, Min, Max);
850+ return emitAttributeIfNotDefaultAfterClamp (
851+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
793852 }
794853
795854 // / See AbstractAttribute::getName()
0 commit comments