@@ -168,9 +168,18 @@ class AMDGPUInformationCache : public InformationCache {
168168 return ST.supportsGetDoorbellID ();
169169 }
170170
171- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
171+ std::optional<std::pair<unsigned , unsigned >>
172+ getFlatWorkGroupSizeAttr (const Function &F) const {
173+ auto R = AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
174+ if (!R)
175+ return std::nullopt ;
176+ return std::make_pair (R->first , *(R->second ));
177+ }
178+
179+ std::pair<unsigned , unsigned >
180+ getDefaultFlatWorkGroupSize (const Function &F) const {
172181 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173- return ST.getFlatWorkGroupSizes (F );
182+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174183 }
175184
176185 std::pair<unsigned , unsigned >
@@ -738,6 +747,35 @@ struct AAAMDSizeRangeAttribute
738747 return Change;
739748 }
740749
750+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
751+ // / attribute if it is not same as default.
752+ ChangeStatus
753+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
754+ std::pair<unsigned , unsigned > Default) {
755+ auto [Min, Max] = Default;
756+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
757+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
758+
759+ // Clamp the range to the default value.
760+ if (Lower < Min)
761+ Lower = Min;
762+ if (Upper > Max + 1 )
763+ Upper = Max + 1 ;
764+
765+ // No manifest if the value is invalid or same as default after clamp.
766+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
767+ return ChangeStatus::UNCHANGED;
768+
769+ Function *F = getAssociatedFunction ();
770+ LLVMContext &Ctx = F->getContext ();
771+ SmallString<10 > Buffer;
772+ raw_svector_ostream OS (Buffer);
773+ OS << Lower << ' ,' << Upper - 1 ;
774+ return A.manifestAttrs (getIRPosition (),
775+ {Attribute::get (Ctx, AttrName, OS.str ())},
776+ /* ForceReplace=*/ true );
777+ }
778+
741779 ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
742780 unsigned Max) {
743781 // Don't add the attribute if it's the implied default.
@@ -772,13 +810,33 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
772810 void initialize (Attributor &A) override {
773811 Function *F = getAssociatedFunction ();
774812 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
775- unsigned MinGroupSize, MaxGroupSize;
776- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
777- intersectKnown (
778- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
779813
780- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
781- indicatePessimisticFixpoint ();
814+ bool HasAttr = false ;
815+ auto Range = InfoCache.getDefaultFlatWorkGroupSize (*F);
816+ auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange (*F);
817+
818+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
819+ // We only consider an attribute that is not max range because the front
820+ // end always emits the attribute, unfortunately, and sometimes it emits
821+ // the max range.
822+ if (*Attr != MaxRange) {
823+ Range = *Attr;
824+ HasAttr = true ;
825+ }
826+ }
827+
828+ // We don't want to directly clamp the state if it's the max range because
829+ // that is basically the worst state.
830+ if (Range == MaxRange)
831+ return ;
832+
833+ auto [Min, Max] = Range;
834+ ConstantRange CR (APInt (32 , Min), APInt (32 , Max + 1 ));
835+ IntegerRangeState IRS (CR);
836+ clampStateAndIndicateChange (this ->getState (), IRS);
837+
838+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
839+ indicateOptimisticFixpoint ();
782840 }
783841
784842 ChangeStatus updateImpl (Attributor &A) override {
@@ -792,9 +850,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
792850 ChangeStatus manifest (Attributor &A) override {
793851 Function *F = getAssociatedFunction ();
794852 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
795- unsigned Min, Max;
796- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
797- return emitAttributeIfNotDefault (A, Min, Max);
853+ return emitAttributeIfNotDefaultAfterClamp (
854+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
798855 }
799856
800857 // / See AbstractAttribute::getName()
0 commit comments