@@ -1140,7 +1140,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11401140 if (!CallerAA || !CallerAA->isValidState ())
11411141 return false ;
11421142
1143- auto Assumed = this ->getAssumed ();
1143+ ConstantRange Assumed = this ->getAssumed ();
11441144 unsigned Min = std::max (Assumed.getLower ().getZExtValue (),
11451145 CallerAA->getAssumed ().getLower ().getZExtValue ());
11461146 unsigned Max = std::max (Assumed.getUpper ().getZExtValue (),
@@ -1308,37 +1308,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13081308 }
13091309}
13101310
1311- static void checkWavesPerEU (Module &M, TargetMachine &TM) {
1311+ // / The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1312+ // / the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1313+ // / during attributor run because the two attributes grow in opposite direction,
1314+ // / we should not use any intermediate value to calculate waves per eu until we
1315+ // / have a determined flat workgroup size.
1316+ static void updateWavesPerEU (Module &M, TargetMachine &TM) {
13121317 for (Function &F : M) {
13131318 const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
13141319
13151320 auto FlatWgrpSizeAttr =
13161321 AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
1317- auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute (
1318- F, " amdgpu-waves-per-eu" , /* OnlyFirstRequired=*/ true );
13191322
13201323 unsigned MinWavesPerEU = ST.getMinWavesPerEU ();
13211324 unsigned MaxWavesPerEU = ST.getMaxWavesPerEU ();
13221325
1323- unsigned MinFlatWgrpSize = 1U ;
1324- unsigned MaxFlatWgrpSize = 1024U ;
1326+ unsigned MinFlatWgrpSize = ST. getMinFlatWorkGroupSize () ;
1327+ unsigned MaxFlatWgrpSize = ST. getMaxFlatWorkGroupSize () ;
13251328 if (FlatWgrpSizeAttr.has_value ()) {
13261329 MinFlatWgrpSize = FlatWgrpSizeAttr->first ;
13271330 MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second );
13281331 }
13291332
13301333 // Start with the max range.
13311334 unsigned Min = MinWavesPerEU;
1332- unsigned Max = MaxWavesPerEU ;
1335+ unsigned Max = MinWavesPerEU ;
13331336
1334- // If the attribute exists, set them to the value from the attribute.
1335- if (WavesPerEUAttr.has_value ()) {
1336- Min = WavesPerEUAttr->first ;
1337- if (WavesPerEUAttr->second .has_value ())
1338- Max = *(WavesPerEUAttr->second );
1339- }
1340-
1341- // Compute the range from flat workgroup size.
1337+ // Compute the range from flat workgroup size. `getWavesPerEU` will also
1338+ // account for the 'amdgpu-waves-er-eu' attribute.
13421339 auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
13431340 ST.getWavesPerEU (F, std::make_pair (MinFlatWgrpSize, MaxFlatWgrpSize));
13441341
@@ -1441,7 +1438,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14411438 if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14421439 LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14431440 LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1444- checkWavesPerEU (M, TM);
1441+ updateWavesPerEU (M, TM);
14451442
14461443 return Changed;
14471444}
0 commit comments