@@ -168,9 +168,18 @@ class AMDGPUInformationCache : public InformationCache {
168168 return ST.supportsGetDoorbellID ();
169169 }
170170
171- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
171+ std::optional<std::pair<unsigned , unsigned >>
172+ getFlatWorkGroupSizeAttr (const Function &F) const {
173+ auto R = AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
174+ if (!R)
175+ return std::nullopt ;
176+ return std::make_pair (R->first , *(R->second ));
177+ }
178+
179+ std::pair<unsigned , unsigned >
180+ getDefaultFlatWorkGroupSize (const Function &F) const {
172181 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173- return ST.getFlatWorkGroupSizes (F );
182+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174183 }
175184
176185 std::pair<unsigned , unsigned >
@@ -812,6 +821,35 @@ struct AAAMDSizeRangeAttribute
812821 return Change;
813822 }
814823
824+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
825+ // / attribute if it is not same as default.
826+ ChangeStatus
827+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
828+ std::pair<unsigned , unsigned > Default) {
829+ auto [Min, Max] = Default;
830+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
831+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
832+
833+ // Clamp the range to the default value.
834+ if (Lower < Min)
835+ Lower = Min;
836+ if (Upper > Max + 1 )
837+ Upper = Max + 1 ;
838+
839+ // No manifest if the value is invalid or same as default after clamp.
840+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
841+ return ChangeStatus::UNCHANGED;
842+
843+ Function *F = getAssociatedFunction ();
844+ LLVMContext &Ctx = F->getContext ();
845+ SmallString<10 > Buffer;
846+ raw_svector_ostream OS (Buffer);
847+ OS << Lower << ' ,' << Upper - 1 ;
848+ return A.manifestAttrs (getIRPosition (),
849+ {Attribute::get (Ctx, AttrName, OS.str ())},
850+ /* ForceReplace=*/ true );
851+ }
852+
815853 ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
816854 unsigned Max) {
817855 // Don't add the attribute if it's the implied default.
@@ -846,13 +884,33 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
846884 void initialize (Attributor &A) override {
847885 Function *F = getAssociatedFunction ();
848886 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
849- unsigned MinGroupSize, MaxGroupSize;
850- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
851- intersectKnown (
852- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
853887
854- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
855- indicatePessimisticFixpoint ();
888+ bool HasAttr = false ;
889+ auto Range = InfoCache.getDefaultFlatWorkGroupSize (*F);
890+ auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange (*F);
891+
892+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
893+ // We only consider an attribute that is not max range because the front
894+ // end always emits the attribute, unfortunately, and sometimes it emits
895+ // the max range.
896+ if (*Attr != MaxRange) {
897+ Range = *Attr;
898+ HasAttr = true ;
899+ }
900+ }
901+
902+ // We don't want to directly clamp the state if it's the max range because
903+ // that is basically the worst state.
904+ if (Range == MaxRange)
905+ return ;
906+
907+ auto [Min, Max] = Range;
908+ ConstantRange CR (APInt (32 , Min), APInt (32 , Max + 1 ));
909+ IntegerRangeState IRS (CR);
910+ clampStateAndIndicateChange (this ->getState (), IRS);
911+
912+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
913+ indicateOptimisticFixpoint ();
856914 }
857915
858916 ChangeStatus updateImpl (Attributor &A) override {
@@ -866,9 +924,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
866924 ChangeStatus manifest (Attributor &A) override {
867925 Function *F = getAssociatedFunction ();
868926 auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
869- unsigned Min, Max;
870- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
871- return emitAttributeIfNotDefault (A, Min, Max);
927+ return emitAttributeIfNotDefaultAfterClamp (
928+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
872929 }
873930
874931 // / See AbstractAttribute::getName()
0 commit comments