[NFC][AMDGPU] IGLP: Fixes for unsigned int handling #135090

ro-i · 2025-04-09T22:10:40Z

Fixes unsigned int underflows in
MFMASmallGemmSingleWaveOpt::applyIGLPStrategy.

llvmbot · 2025-04-09T22:11:12Z

@llvm/pr-subscribers-backend-amdgpu

Author: Robert Imschweiler (ro-i)

Changes

Fixes:

detection of VMEM_READS which are FLAT loads.
unsigned int underflows in MFMASmallGemmSingleWaveOpt::applyIGLPStrategy.
resetting global static DSWCounters for new runs.

This LLVM defect was identified via the AMD Fuzzing project.

Full diff: https://github.com/llvm/llvm-project/pull/135090.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp (+18-11)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll (+22)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 7b4d00c8214cb..cea3bcf4b31df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -80,6 +80,10 @@ enum class SchedGroupMask {
   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
 };
 
+static bool handleAsVMEMInstr(const MachineInstr &MI, const SIInstrInfo *TII) {
+  return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI));
+}
+
 class SchedGroup;
 
 // InstructionRule class is used to enact a filter which determines whether or
@@ -1891,7 +1895,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
         }
       }
 
-      assert(Cache->size());
+      assert(!MFMAsFound || Cache->size());
       auto *DAG = SyncPipe[0].DAG;
       for (auto &Elt : *Cache) {
         if (DAG->IsReachable(Elt, const_cast<SUnit *>(SU)))
@@ -1994,7 +1998,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
       }
 
       if (NumBits < 128) {
-        assert(TII->isVMEM(*MI) && MI->mayLoad());
+        assert(handleAsVMEMInstr(*MI, TII) && MI->mayLoad());
         if (NumBits + TRI.getRegSizeInBits(*TRI.getRegClassForOperandReg(
                           MRI, MI->getOperand(0))) <=
             128)
@@ -2079,6 +2083,9 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
 static unsigned DSWCount = 0;
 static unsigned DSWWithPermCount = 0;
 static unsigned DSWWithSharedVMEMCount = 0;
+static void resetDSWCounters() {
+  DSWCount = DSWWithPermCount = DSWWithSharedVMEMCount = 0;
+}
 
 bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
     DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
@@ -2138,7 +2145,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
 
         for (auto &Succ : Pred.getSUnit()->Succs) {
           auto *MI = Succ.getSUnit()->getInstr();
-          if (!TII->isVMEM(*MI) || !MI->mayLoad())
+          if (!handleAsVMEMInstr(*MI, TII) || !MI->mayLoad())
             continue;
 
           if (MissedAny || !VMEMLookup.size()) {
@@ -2200,7 +2207,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
   SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
 
   // Interleave MFMA with DS_READ prefetch
-  for (unsigned I = 0; I < DSRCount - 4; ++I) {
+  for (unsigned I = 4; I < DSRCount; ++I) {
     SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
         SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII);
     SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
@@ -2213,7 +2220,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
   // Phase 2a: Loop carried dependency with V_PERM
   // Schedule VPerm & DS_WRITE as closely as possible to the VMEM_READ they
   // depend on. Interleave MFMA to keep XDL unit busy throughout.
-  for (unsigned I = 0; I < DSWWithPermCount - DSWWithSharedVMEMCount; ++I) {
+  for (unsigned I = DSWWithSharedVMEMCount; I < DSWWithPermCount; ++I) {
     SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
         SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
     SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
@@ -2250,7 +2257,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
   // Phase 2b: Loop carried dependency without V_PERM
   // Schedule DS_WRITE as closely as possible to the VMEM_READ they depend on.
   // Interleave MFMA to keep XDL unit busy throughout.
-  for (unsigned I = 0; I < DSWCount - DSWWithPermCount; I++) {
+  for (unsigned I = DSWWithPermCount; I < DSWCount; I++) {
     SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
         SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
     SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
@@ -2426,17 +2433,15 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
     Result = true;
 
   else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
-           (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+           handleAsVMEMInstr(MI, TII))
     Result = true;
 
   else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
-           MI.mayLoad() &&
-           (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+           MI.mayLoad() && handleAsVMEMInstr(MI, TII))
     Result = true;
 
   else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
-           MI.mayStore() &&
-           (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+           MI.mayStore() && handleAsVMEMInstr(MI, TII))
     Result = true;
 
   else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
@@ -2703,5 +2708,7 @@ bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
 /// for a given region.
 std::unique_ptr<ScheduleDAGMutation>
 llvm::createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) {
+  if (Phase == AMDGPU::SchedulingPhase::Initial)
+    resetDSWCounters();
   return std::make_unique<IGroupLPDAGMutation>(Phase);
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index 08c0d15432915..3ce25c0fd1fef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -321,6 +321,28 @@ entry:
   ret void
 }
 
+; Check fixes for:
+; - detection of VMEM_READS which are FLAT loads.
+; - unsigned int underflows in MFMASmallGemmSingleWaveOpt::applyIGLPStrategy.
+; - resetting global static DSWCounters for new runs.
+; (reduced fuzzer-generated test case)
+define amdgpu_kernel void @test_iglp_opt_flat_load(ptr %ptr1, ptr %ptr2, ptr addrspace(3) %ptr3, ptr addrspace(3) %ptr4) {
+entry:
+  %LGV2 = load <8 x half>, ptr %ptr1, align 16
+  %LGV = load i1, ptr %ptr2, align 1
+  call void @llvm.amdgcn.iglp.opt(i32 1)
+  %C = fcmp ugt <8 x half> zeroinitializer, %LGV2
+  store <8 x i1> %C, ptr addrspace(3) %ptr3, align 1
+  br i1 %LGV, label %common.ret, label %F
+
+common.ret:                                       ; preds = %F, %entry
+  ret void
+
+F:                                                ; preds = %entry
+  store <32 x float> zeroinitializer, ptr addrspace(3) %ptr4, align 128
+  br label %common.ret
+}
+
 declare void @llvm.amdgcn.iglp.opt(i32) #1
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) #1

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

ro-i · 2025-05-12T09:03:12Z

(This PR will be rebased and adapted as soon as the related PRs are done.)

arsenm

Overlap with #162560?

Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`.

ro-i · 2025-11-18T09:34:21Z

I stripped this PR down to only fixing the unsigned int for-loops I detected. It now doesn't have any dependencies on other PRs anymore.

github-actions · 2025-11-18T10:16:13Z

🐧 Linux x64 Test Results

186273 tests passed
4849 tests skipped

llvm-ci · 2025-11-18T20:40:07Z

LLVM Buildbot has detected a new failure on builder amdgpu-offload-rhel-8-cmake-build-only running on rocm-docker-rhel-8 while building llvm at step 2 "update-annotated-scripts".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/204/builds/28337

Here is the relevant piece of the build log for the reference

Step 2 (update-annotated-scripts) failure: update (failure)

ro-i · 2025-11-18T20:43:38Z

(failure unrelated)

Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`.

ro-i requested review from jrbyrnes and kerbowa April 9, 2025 22:10

llvmbot added the backend:AMDGPU label Apr 9, 2025

arsenm reviewed Apr 10, 2025

View reviewed changes

arsenm reviewed Oct 11, 2025

View reviewed changes

ro-i mentioned this pull request Oct 12, 2025

[AMDGPU] IGLP: Fix static variables #137549

Open

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling

5a1b3b2

Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`.

ro-i force-pushed the iglp-misc-fixes branch from a188054 to 5a1b3b2 Compare November 18, 2025 09:32

ro-i changed the title ~~[AMDGPU] IGLP: Fixes for VMEM load detection and unsigned int handling~~ [NFC][AMDGPU] IGLP: Fixes for unsigned int handling Nov 18, 2025

ro-i requested a review from arsenm November 18, 2025 09:34

shiltian approved these changes Nov 18, 2025

View reviewed changes

ro-i merged commit 576e1af into llvm:main Nov 18, 2025
10 checks passed

ro-i deleted the iglp-misc-fixes branch November 18, 2025 19:47

aadeshps-mcw pushed a commit to aadeshps-mcw/llvm-project that referenced this pull request Nov 26, 2025

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling (llvm#135090)

fbeb93d

Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`.

Priyanshu3820 pushed a commit to Priyanshu3820/llvm-project that referenced this pull request Nov 26, 2025

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling (llvm#135090)

219532d

Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling #135090

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling #135090

Uh oh!

ro-i commented Apr 9, 2025 •

edited

Loading

Uh oh!

llvmbot commented Apr 9, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ro-i commented May 12, 2025

Uh oh!

arsenm left a comment

Uh oh!

ro-i commented Nov 18, 2025

Uh oh!

github-actions bot commented Nov 18, 2025

Uh oh!

Uh oh!

llvm-ci commented Nov 18, 2025

Uh oh!

ro-i commented Nov 18, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling #135090

[NFC][AMDGPU] IGLP: Fixes for unsigned int handling #135090

Uh oh!

Conversation

ro-i commented Apr 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Apr 9, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ro-i commented May 12, 2025

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

ro-i commented Nov 18, 2025

Uh oh!

github-actions bot commented Nov 18, 2025

🐧 Linux x64 Test Results

Uh oh!

Uh oh!

llvm-ci commented Nov 18, 2025

Uh oh!

ro-i commented Nov 18, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

ro-i commented Apr 9, 2025 •

edited

Loading