Skip to content

Commit acb1f7e

Browse files
authored
SWDEV-526855 - Modify the SIMDPerCU calculation for gfx1250/1. (#275)
1 parent c5ced8c commit acb1f7e

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

hipamd/src/hip_platform.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,14 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
404404
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
405405
GprWaves = std::min(VgprWaves, SgprWaves);
406406
}
407-
uint32_t simdPerCU = (device.isa().versionMajor() <= 9) ? device.info().simdPerCU_
408-
: (wrkGrpInfo->isWGPMode_ ? 4 : 2);
407+
408+
// The table contains SIMD per CU, not per WGP, so when WGP mode is set on kernel metadata,
409+
// multiply the number of SIMDs by 2, to account for 2CUs in 1 WGP.
410+
uint32_t simdPerCU = device.isa().simdPerCU();
411+
if (wrkGrpInfo->isWGPMode_) {
412+
simdPerCU *= 2;
413+
}
414+
409415
const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves);
410416
const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;
411417

0 commit comments

Comments
 (0)