We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d6d480f commit f2be466Copy full SHA for f2be466
offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1294,7 +1294,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
1294
1295
uint64_t PreferredNumBlocks = TripCountNumBlocks;
1296
// Occupancy-based setting overrides block reuse.
1297
- if (OMPX_GenericSPMDOccupancyBasedOpt) {
+ if (OMPX_GenericSPMDOccupancyBasedOpt && NumTeamsEnvVar == 0 && NumTeamsClause[0] == 0) {
1298
PreferredNumBlocks =
1299
std::min(PreferredNumBlocks,
1300
OptimizeNumTeamsBaseOccupancy(GenericDevice, NumThreads));
0 commit comments