@@ -1306,19 +1306,21 @@ bool WaitcntBrackets::canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait) {
13061306
13071307void WaitcntBrackets::applyXcnt (const AMDGPU::Waitcnt &Wait) {
13081308 if (hasRedundantXCntWithKmCnt (Wait)) {
1309- if (hasPendingEvent (VMEM_GROUP))
1309+ if (hasPendingEvent (VMEM_GROUP)) {
13101310 // Only clear the SMEM_GROUP event, but VMEM_GROUP could still require
13111311 // handling.
13121312 PendingEvents &= ~(1 << SMEM_GROUP);
1313- else
1313+ } else {
13141314 applyWaitcnt (X_CNT, 0 );
1315+ }
13151316 return ;
13161317 }
1317- if (canOptimizeXCntWithLoadCnt (Wait))
1318+ if (canOptimizeXCntWithLoadCnt (Wait)) {
13181319 // On entry to a block with multiple predescessors, there may
13191320 // be pending SMEM and VMEM events active at the same time.
13201321 // In such cases, only clear one active event at a time.
13211322 return applyWaitcnt (X_CNT, std::min (Wait.XCnt , Wait.LoadCnt ));
1323+ }
13221324 applyWaitcnt (X_CNT, Wait.XCnt );
13231325}
13241326
@@ -1748,10 +1750,11 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
17481750 for (auto CT : inst_counter_types (NUM_EXTENDED_INST_CNTS)) {
17491751 if ((CT == KM_CNT && ScoreBrackets.hasRedundantXCntWithKmCnt (PreCombine)) ||
17501752 (CT == LOAD_CNT &&
1751- ScoreBrackets.canOptimizeXCntWithLoadCnt (PreCombine)))
1753+ ScoreBrackets.canOptimizeXCntWithLoadCnt (PreCombine))) {
17521754 // Xcnt may need to be updated depending on a pre-existing KM/LOAD_CNT
17531755 // due to taking the backedge of a block.
17541756 ScoreBrackets.applyXcnt (PreCombine);
1757+ }
17551758 if (!WaitInstrs[CT])
17561759 continue ;
17571760
0 commit comments