@@ -367,6 +367,9 @@ struct GenericKernelTy {
367367 return ExecutionMode == OMP_TGT_EXEC_MODE_XTEAM_RED;
368368 }
369369
370+ // / Indicate if the input block size is within the limit.
371+ virtual bool isValidBlockSize (uint32_t BlockSize) const { return true ; }
372+
370373protected:
371374 // / Get the execution mode name of the kernel.
372375 const char *getExecutionModeName () const {
@@ -1345,8 +1348,10 @@ struct KernelRunRecordTy {
13451348
13461349 // Get parameters for next kernel launch.
13471350 std::pair<uint32_t , uint32_t >
1348- getLaunchParamsForKernel (std::string KernelName ,
1351+ getLaunchParamsForKernel (const GenericKernelTy &Kernel ,
13491352 GenericDeviceTy &GenericDevice) {
1353+ std::string KernelName = Kernel.getName ();
1354+
13501355 // If the kernel reaches the run limit,
13511356 // return the current optimal launch parameters.
13521357 if (reachedRunLimitForKernel (KernelName)) {
@@ -1360,7 +1365,10 @@ struct KernelRunRecordTy {
13601365
13611366 if (IdxCUMulti >= CUMultiplierCandidate.size ()) {
13621367 // No more element to search.
1368+ // Max run counter to stop further runs.
13631369 // Return current optimal launch parameters.
1370+ TuningData[KernelName].RunCounters = RunLimiter + 1 ;
1371+
13641372 return {TuningData[KernelName].MinEntry .NumTeams ,
13651373 TuningData[KernelName].MinEntry .NumThreads };
13661374 }
@@ -1374,7 +1382,9 @@ struct KernelRunRecordTy {
13741382 IdxThread++;
13751383 TuningData[KernelName].IdxThread = IdxThread;
13761384
1377- if (IdxThread >= ThreadCandidate.size ()) {
1385+ // Threads should be within the limit.
1386+ if (IdxThread >= ThreadCandidate.size () ||
1387+ !Kernel.isValidBlockSize (ThreadCandidate[IdxThread])) {
13781388 TuningData[KernelName].IdxThread = 0 ;
13791389 TuningData[KernelName].IdxCUMultiplier ++;
13801390 }
0 commit comments