@@ -2183,7 +2183,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
21832183 SG->initSchedGroup (SyncedInstrs[SG->getSyncID ()]);
21842184
21852185 // Interleave MFMA with DS_READ prefetch
2186- for (unsigned I = 0 ; I < DSRCount - 4 ; ++I) {
2186+ for (unsigned I = 4 ; I < DSRCount; ++I) {
21872187 SG = &SyncedSchedGroups[PipelineSyncID].emplace_back (
21882188 SchedGroupMask::DS_READ, 1 , PipelineSyncID, DAG, TII);
21892189 SG->initSchedGroup (SyncedInstrs[SG->getSyncID ()]);
@@ -2196,7 +2196,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
21962196 // Phase 2a: Loop carried dependency with V_PERM
21972197 // Schedule VPerm & DS_WRITE as closely as possible to the VMEM_READ they
21982198 // depend on. Interleave MFMA to keep XDL unit busy throughout.
2199- for (unsigned I = 0 ; I < DSWWithPermCount - DSWWithSharedVMEMCount ; ++I) {
2199+ for (unsigned I = DSWWithSharedVMEMCount ; I < DSWWithPermCount; ++I) {
22002200 SG = &SyncedSchedGroups[PipelineSyncID].emplace_back (
22012201 SchedGroupMask::VALU, 4 , PipelineSyncID, DAG, TII);
22022202 SG->addRule (std::make_shared<IsPermForDSW>(TII, SG->getSGID (), true ));
@@ -2233,7 +2233,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
22332233 // Phase 2b: Loop carried dependency without V_PERM
22342234 // Schedule DS_WRITE as closely as possible to the VMEM_READ they depend on.
22352235 // Interleave MFMA to keep XDL unit busy throughout.
2236- for (unsigned I = 0 ; I < DSWCount - DSWWithPermCount ; I++) {
2236+ for (unsigned I = DSWWithPermCount ; I < DSWCount; I++) {
22372237 SG = &SyncedSchedGroups[PipelineSyncID].emplace_back (
22382238 SchedGroupMask::DS_WRITE, 1 , PipelineSyncID, DAG, TII);
22392239 SG->initSchedGroup (SyncedInstrs[SG->getSyncID ()]);
0 commit comments