@@ -168,9 +168,18 @@ class AMDGPUInformationCache : public InformationCache {
168168 return ST.supportsGetDoorbellID ();
169169 }
170170
171- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
171+ std::optional<std::pair<unsigned , unsigned >>
172+ getFlatWorkGroupSizeAttr (const Function &F) const {
173+ auto R = AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
174+ if (!R)
175+ return std::nullopt ;
176+ return std::make_pair (R->first , *(R->second ));
177+ }
178+
179+ std::pair<unsigned , unsigned >
180+ getDefaultFlatWorkGroupSize (const Function &F) const {
172181 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173- return ST.getFlatWorkGroupSizes (F );
182+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174183 }
175184
176185 std::pair<unsigned , unsigned >
@@ -733,6 +742,35 @@ struct AAAMDSizeRangeAttribute
733742 return Change;
734743 }
735744
745+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
746+ // / attribute if it is not same as default.
747+ ChangeStatus
748+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
749+ std::pair<unsigned , unsigned > Default) {
750+ auto [Min, Max] = Default;
751+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
752+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
753+
754+ // Clamp the range to the default value.
755+ if (Lower < Min)
756+ Lower = Min;
757+ if (Upper > Max + 1 )
758+ Upper = Max + 1 ;
759+
760+ // No manifest if the value is invalid or same as default after clamp.
761+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
762+ return ChangeStatus::UNCHANGED;
763+
764+ Function *F = getAssociatedFunction ();
765+ LLVMContext &Ctx = F->getContext ();
766+ SmallString<10 > Buffer;
767+ raw_svector_ostream OS (Buffer);
768+ OS << Lower << ' ,' << Upper - 1 ;
769+ return A.manifestAttrs (getIRPosition (),
770+ {Attribute::get (Ctx, AttrName, OS.str ())},
771+ /* ForceReplace=*/ true );
772+ }
773+
736774 ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
737775 unsigned Max) {
738776 // Don't add the attribute if it's the implied default.
@@ -767,13 +805,33 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
767805 void initialize (Attributor &A) override {
768806 Function *F = getAssociatedFunction ();
769807 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
770- unsigned MinGroupSize, MaxGroupSize;
771- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
772- intersectKnown (
773- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
774808
775- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
776- indicatePessimisticFixpoint ();
809+ bool HasAttr = false ;
810+ auto Range = InfoCache.getDefaultFlatWorkGroupSize (*F);
811+ auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange (*F);
812+
813+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
814+ // We only consider an attribute that is not max range because the front
815+ // end always emits the attribute, unfortunately, and sometimes it emits
816+ // the max range.
817+ if (*Attr != MaxRange) {
818+ Range = *Attr;
819+ HasAttr = true ;
820+ }
821+ }
822+
823+ // We don't want to directly clamp the state if it's the max range because
824+ // that is basically the worst state.
825+ if (Range == MaxRange)
826+ return ;
827+
828+ auto [Min, Max] = Range;
829+ ConstantRange CR (APInt (32 , Min), APInt (32 , Max + 1 ));
830+ IntegerRangeState IRS (CR);
831+ clampStateAndIndicateChange (this ->getState (), IRS);
832+
833+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
834+ indicateOptimisticFixpoint ();
777835 }
778836
779837 ChangeStatus updateImpl (Attributor &A) override {
@@ -787,9 +845,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
787845 ChangeStatus manifest (Attributor &A) override {
788846 Function *F = getAssociatedFunction ();
789847 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
790- unsigned Min, Max;
791- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
792- return emitAttributeIfNotDefault (A, Min, Max);
848+ return emitAttributeIfNotDefaultAfterClamp (
849+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
793850 }
794851
795852 // / See AbstractAttribute::getName()
0 commit comments