@@ -1140,7 +1140,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11401140      if  (!CallerAA || !CallerAA->isValidState ())
11411141        return  false ;
11421142
1143-       auto  Assumed = this ->getAssumed ();
1143+       ConstantRange  Assumed = this ->getAssumed ();
11441144      unsigned  Min = std::max (Assumed.getLower ().getZExtValue (),
11451145                              CallerAA->getAssumed ().getLower ().getZExtValue ());
11461146      unsigned  Max = std::max (Assumed.getUpper ().getZExtValue (),
@@ -1308,37 +1308,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13081308  }
13091309}
13101310
1311- static  void  checkWavesPerEU (Module &M, TargetMachine &TM) {
1311+ // / The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1312+ // / the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1313+ // / during attributor run because the two attributes grow in opposite direction,
1314+ // / we should not use any intermediate value to calculate waves per eu until we
1315+ // / have a determined flat workgroup size.
1316+ static  void  updateWavesPerEU (Module &M, TargetMachine &TM) {
13121317  for  (Function &F : M) {
13131318    const  GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
13141319
13151320    auto  FlatWgrpSizeAttr =
13161321        AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size"  );
1317-     auto  WavesPerEUAttr = AMDGPU::getIntegerPairAttribute (
1318-         F, " amdgpu-waves-per-eu"  , /* OnlyFirstRequired=*/ true );
13191322
13201323    unsigned  MinWavesPerEU = ST.getMinWavesPerEU ();
13211324    unsigned  MaxWavesPerEU = ST.getMaxWavesPerEU ();
13221325
1323-     unsigned  MinFlatWgrpSize = 1U ;
1324-     unsigned  MaxFlatWgrpSize = 1024U ;
1326+     unsigned  MinFlatWgrpSize = ST. getMinFlatWorkGroupSize () ;
1327+     unsigned  MaxFlatWgrpSize = ST. getMaxFlatWorkGroupSize () ;
13251328    if  (FlatWgrpSizeAttr.has_value ()) {
13261329      MinFlatWgrpSize = FlatWgrpSizeAttr->first ;
13271330      MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second );
13281331    }
13291332
13301333    //  Start with the max range.
13311334    unsigned  Min = MinWavesPerEU;
1332-     unsigned  Max = MaxWavesPerEU ;
1335+     unsigned  Max = MinWavesPerEU ;
13331336
1334-     //  If the attribute exists, set them to the value from the attribute.
1335-     if  (WavesPerEUAttr.has_value ()) {
1336-       Min = WavesPerEUAttr->first ;
1337-       if  (WavesPerEUAttr->second .has_value ())
1338-         Max = *(WavesPerEUAttr->second );
1339-     }
1340- 
1341-     //  Compute the range from flat workgroup size.
1337+     //  Compute the range from flat workgroup size. `getWavesPerEU` will also
1338+     //  account for the 'amdgpu-waves-er-eu' attribute.
13421339    auto  [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
13431340        ST.getWavesPerEU (F, std::make_pair (MinFlatWgrpSize, MaxFlatWgrpSize));
13441341
@@ -1441,7 +1438,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14411438  if  (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14421439                  LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14431440                  LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1444-     checkWavesPerEU (M, TM);
1441+     updateWavesPerEU (M, TM);
14451442
14461443  return  Changed;
14471444}
0 commit comments