llvm · ppenzin · Dec 5, 2025 · Nov 27, 2025 · Dec 2, 2025 · Dec 4, 2025
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -562,7 +562,7 @@ multiclass SiFive7WriteResBase<int VLEN,
   // resource, we do not need to use LMULSEWXXX constructors. However, we do
   // use the SEW from the name to determine the number of Cycles.
 
-  foreach mx = SchedMxList in {
+  foreach mx = SchedMxListEEW8 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -582,10 +582,8 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  // TODO: The MxLists need to be filtered by EEW. We only need to support
-  // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
-  // since LMUL >= 16/64.
-  foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+
+  foreach mx = SchedMxListEEW16 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -605,7 +603,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX16", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListEEW32 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -625,7 +623,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX32", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListEEW64 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;

diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
@@ -437,7 +437,7 @@ foreach mx = SchedMxList in {
   defm "" : LMULWriteResMX<"WriteVSTM",    [AscalonLS], mx, IsWorstCase>;
 }
 
-foreach mx = SchedMxList in {
+foreach mx = SchedMxListEEW8 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -449,7 +449,7 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListEEW16 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -461,7 +461,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListEEW32 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -473,7 +473,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListEEW64 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {

diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -19,6 +19,14 @@ defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
 defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
 // Used for widening floating-point Reduction as it doesn't contain MF8.
 defvar SchedMxListFWRed = SchedMxListF;
+// Used for indexed and strided loads of 8 bit lanes, same as full MX list
+defvar SchedMxListEEW8 = SchedMxList;
+// Used for indexed and strided loads of 16 bit lanes
+defvar SchedMxListEEW16 = SchedMxListF;
+// Used for indexed and strided loads of 32 bit lanes
+defvar SchedMxListEEW32 = !listremove(SchedMxListEEW16, ["MF4"]);
+// Used for indexed and strided loads of 64 bit lanes
+defvar SchedMxListEEW64 = !listremove(SchedMxListEEW32, ["MF2"]);
 
 class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
   assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";