@@ -1097,17 +1097,16 @@ struct AMDGPUKernelTy : public GenericKernelTy {
10971097 int32_t NumTeamsEnvVar = GenericDevice.getOMPNumTeams ();
10981098 // CU mulitiplier from envar.
10991099 uint32_t EnvarCUMultiplier = GenericDevice.getXTeamRedTeamsPerCU ();
1100- // Disabled if the value is 0.
1101- if (EnvarCUMultiplier == 0 ) {
1102- EnvarCUMultiplier = UINT_MAX;
1103- }
11041100
11051101 if (GenericDevice.isFastReductionEnabled ()) {
11061102 // When fast reduction is enabled, the number of teams is capped by
11071103 // the MaxCUMultiplier constant.
1108- MaxNumGroups =
1109- DeviceNumCUs * std::min (llvm::omp::xteam_red::MaxCUMultiplier,
1110- static_cast <int16_t >(EnvarCUMultiplier));
1104+ // When envar is enabled, use it for computing MaxNumGroup.
1105+ if (EnvarCUMultiplier > 0 ) {
1106+ MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
1107+ } else {
1108+ MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
1109+ }
11111110 } else {
11121111 // When fast reduction is not enabled, the number of teams is capped
11131112 // by the metadata that clang CodeGen created. The number of teams
@@ -1118,7 +1117,13 @@ struct AMDGPUKernelTy : public GenericKernelTy {
11181117 // ConstWGSize is the block size that CodeGen used.
11191118 uint32_t CUMultiplier =
11201119 llvm::omp::xteam_red::getXteamRedCUMultiplier (ConstWGSize);
1121- MaxNumGroups = DeviceNumCUs * std::min (CUMultiplier, EnvarCUMultiplier);
1120+
1121+ if (EnvarCUMultiplier > 0 ) {
1122+ MaxNumGroups =
1123+ DeviceNumCUs * std::min (CUMultiplier, EnvarCUMultiplier);
1124+ } else {
1125+ MaxNumGroups = DeviceNumCUs * CUMultiplier;
1126+ }
11221127 }
11231128
11241129 // If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no
0 commit comments