Skip to content

Conversation

@ppenzin
Copy link
Contributor

@ppenzin ppenzin commented Nov 27, 2025

Create additional lists representing valid LMULs for strided and indexed load of particular element sizes.

Create additional lists representing valid LMULs for strided and indexed load
of particular element sizes.
@llvmbot
Copy link
Member

llvmbot commented Nov 27, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Petr Penzin (ppenzin)

Changes

Create additional lists representing valid LMULs for strided and indexed load of particular element sizes.


Full diff: https://github.com/llvm/llvm-project/pull/169756.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (+5-7)
  • (modified) llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td (+4-4)
  • (modified) llvm/lib/Target/RISCV/RISCVScheduleV.td (+8)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 36a2f46416674..21d882ee8f426 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -562,7 +562,7 @@ multiclass SiFive7WriteResBase<int VLEN,
   // resource, we do not need to use LMULSEWXXX constructors. However, we do
   // use the SEW from the name to determine the number of Cycles.
 
-  foreach mx = SchedMxList in {
+  foreach mx = SchedMxListDS8 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -582,10 +582,8 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  // TODO: The MxLists need to be filtered by EEW. We only need to support
-  // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
-  // since LMUL >= 16/64.
-  foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+
+  foreach mx = SchedMxListDS16 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -605,7 +603,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX16", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListDS32 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -625,7 +623,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX32", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListDS64 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
index a22552de71360..02c1b5fcf6462 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
@@ -437,7 +437,7 @@ foreach mx = SchedMxList in {
   defm "" : LMULWriteResMX<"WriteVSTM",    [AscalonLS], mx, IsWorstCase>;
 }
 
-foreach mx = SchedMxList in {
+foreach mx = SchedMxListDS8 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -449,7 +449,7 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS16 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -461,7 +461,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS32 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -473,7 +473,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS64 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index d11b446920c4e..f0831e278332a 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -19,6 +19,14 @@ defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
 defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
 // Used for widening floating-point Reduction as it doesn't contain MF8.
 defvar SchedMxListFWRed = SchedMxListF;
+// Used for indexed and strided loads of 8 bit lanes, same as full MX list
+defvar SchedMxListDS8 = SchedMxList;
+// Used for indexed and strided loads of 16 bit lanes
+defvar SchedMxListDS16 = SchedMxListF;
+// Used for indexed and strided loads of 32 bit lanes
+defvar SchedMxListDS32 = !listremove(SchedMxListDS16, ["MF4"]);
+// Used for indexed and strided loads of 64 bit lanes
+defvar SchedMxListDS64 = !listremove(SchedMxListDS32, ["MF2"]);
 
 class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
   assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";

@ppenzin
Copy link
Contributor Author

ppenzin commented Nov 27, 2025

Attempt to cover a point raised in #167066 (comment)

Copy link
Contributor

@wangpc-pp wangpc-pp left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
}
}
// TODO: The MxLists need to be filtered by EEW. We only need to support
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to keep this TODO?

Copy link
Contributor Author

@ppenzin ppenzin Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can revert that. If we try to implement this we would need to create a macro that takes EEW and returns a MxList, and this change doesn't quite get there. @mshockwave any objections on keeping the TODO?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment might be outdated already: the fact that it excludes MF8 means that it already accounts for EEW, as explained by the comment itself.

// Used for widening floating-point Reduction as it doesn't contain MF8.
defvar SchedMxListFWRed = SchedMxListF;
// Used for indexed and strided loads of 8 bit lanes, same as full MX list
defvar SchedMxListDS8 = SchedMxList;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any reason the suffix is called "DS"? Is it coming from SchedWrite names like "WriteVLDS8"? I believe those names should be read as: "V" + "LD" (load) + "S" (strided) + "8" (EEW).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Focused on wrong common letter, sorry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the aliases. Is SX (for S and UX/OX) better?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a list of allowed MX values for a particular EEW. It doesn't need to mention strided or indexed at all. Maybe SchedMxListEEW8, SchedMxListEEW16, SchedMxListEEW32, SchedMxListEEW64?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

second that names like SchedMxListEEW8 might be more general.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed.

S and UX/OX are the common parts of SchedWrite names for strided and segmented
operations, hence "SX" suffix, followed by EEW.
Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Member

@mshockwave mshockwave left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@ppenzin ppenzin merged commit b464d77 into llvm:main Dec 5, 2025
10 checks passed
@ppenzin ppenzin deleted the mxlist_aliases branch December 5, 2025 21:10
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
Create additional lists representing valid LMULs for strided and indexed
load of particular element sizes.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants