@@ -168,9 +168,15 @@ class AMDGPUInformationCache : public InformationCache {
168168 return ST.supportsGetDoorbellID ();
169169 }
170170
171- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
171+ std::optional<std::pair<unsigned , unsigned >>
172+ getFlatWorkGroupSizeAttr (const Function &F) const {
173+ return AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
174+ }
175+
176+ std::pair<unsigned , unsigned >
177+ getDefaultFlatWorkGroupSize (const Function &F) const {
172178 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173- return ST.getFlatWorkGroupSizes (F );
179+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174180 }
175181
176182 std::pair<unsigned , unsigned >
@@ -733,6 +739,35 @@ struct AAAMDSizeRangeAttribute
733739 return Change;
734740 }
735741
742+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
743+ // / attribute if it is not same as default.
744+ ChangeStatus
745+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
746+ std::pair<unsigned , unsigned > Default) {
747+ auto [Min, Max] = Default;
748+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
749+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
750+
751+ // Clamp the range to the default value.
752+ if (Lower < Min)
753+ Lower = Min;
754+ if (Upper > Max + 1 )
755+ Upper = Max + 1 ;
756+
757+ // No manifest if the value is invalid or same as default after clamp.
758+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
759+ return ChangeStatus::UNCHANGED;
760+
761+ Function *F = getAssociatedFunction ();
762+ LLVMContext &Ctx = F->getContext ();
763+ SmallString<10 > Buffer;
764+ raw_svector_ostream OS (Buffer);
765+ OS << Lower << ' ,' << Upper - 1 ;
766+ return A.manifestAttrs (getIRPosition (),
767+ {Attribute::get (Ctx, AttrName, OS.str ())},
768+ /* ForceReplace=*/ true );
769+ }
770+
736771 ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
737772 unsigned Max) {
738773 // Don't add the attribute if it's the implied default.
@@ -767,13 +802,32 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
767802 void initialize (Attributor &A) override {
768803 Function *F = getAssociatedFunction ();
769804 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
770- unsigned MinGroupSize, MaxGroupSize;
771- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
772- intersectKnown (
773- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
774805
775- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
776- indicatePessimisticFixpoint ();
806+ bool HasAttr = false ;
807+ auto [Min, Max] = InfoCache.getDefaultFlatWorkGroupSize (*F);
808+ auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange (*F);
809+
810+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
811+ // We will only consider an attribute different from max because the front
812+ // end always emits the attribure, unfortunately, and sometimes it emits
813+ // the max range.
814+ if (*Attr != MaxRange) {
815+ std::tie (Min, Max) = *Attr;
816+ HasAttr = true ;
817+ }
818+ }
819+
820+ // We don't want to directly clamp the state if it the max range because it
821+ // is basically the worst state.
822+ if (MaxRange == std::make_pair (Min, Max))
823+ return ;
824+
825+ ConstantRange Range (APInt (32 , Min), APInt (32 , Max + 1 ));
826+ IntegerRangeState RangeState (Range);
827+ clampStateAndIndicateChange (this ->getState (), RangeState);
828+
829+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
830+ indicateOptimisticFixpoint ();
777831 }
778832
779833 ChangeStatus updateImpl (Attributor &A) override {
@@ -787,9 +841,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
787841 ChangeStatus manifest (Attributor &A) override {
788842 Function *F = getAssociatedFunction ();
789843 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
790- unsigned Min, Max;
791- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
792- return emitAttributeIfNotDefault (A, Min, Max);
844+ return emitAttributeIfNotDefaultAfterClamp (
845+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
793846 }
794847
795848 // / See AbstractAttribute::getName()
0 commit comments