@@ -88,9 +88,8 @@ class SiFive7GetCyclesSegmentedSeg2<string mx> {
8888
8989// Cycles for segmented loads and stores are calculated using the
9090// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91- class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92- defvar VLEN = 512;
93- defvar DLEN = 256;
91+ class SiFive7GetCyclesSegmented<string mx, int sew, int nf, int VLEN> {
92+ defvar DLEN = !div(VLEN, 2);
9493 // (VLEN * LMUL) / SEW
9594 defvar VLUpperBound = !cond(
9695 !eq(mx, "M1") : !div(VLEN, sew),
@@ -107,23 +106,20 @@ class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
107106 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108107}
109108
110- class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111- // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112- // to use a different VLEN, this model will not make scheduling decisions
113- // based on the user specified VLEN.
109+ class SiFive7GetCyclesOnePerElement<string mx, int sew, int VLEN> {
114110 // c = ceil(VLEN / SEW) * LMUL
115111 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116112 // largest division performed on VLEN is in MF8 case with division
117113 // by 8. Therefore, there is no need to ceil the result.
118- int VLEN = !div(512 , sew);
114+ int numElements = !div(VLEN , sew);
119115 int c = !cond(
120- !eq(mx, "M1") : VLEN ,
121- !eq(mx, "M2") : !mul(VLEN , 2),
122- !eq(mx, "M4") : !mul(VLEN , 4),
123- !eq(mx, "M8") : !mul(VLEN , 8),
124- !eq(mx, "MF2") : !div(VLEN , 2),
125- !eq(mx, "MF4") : !div(VLEN , 4),
126- !eq(mx, "MF8") : !div(VLEN , 8)
116+ !eq(mx, "M1") : numElements ,
117+ !eq(mx, "M2") : !mul(numElements , 2),
118+ !eq(mx, "M4") : !mul(numElements , 4),
119+ !eq(mx, "M8") : !mul(numElements , 8),
120+ !eq(mx, "MF2") : !div(numElements , 2),
121+ !eq(mx, "MF4") : !div(numElements , 4),
122+ !eq(mx, "MF8") : !div(numElements , 8)
127123 );
128124}
129125
@@ -139,10 +135,9 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
139135
140136/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141137/// cycles.
142- class SiFive7GetReductionCycles<string mx, int sew> {
138+ class SiFive7GetReductionCycles<string mx, int sew, int VLEN > {
143139 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144140 // VLUpperBound=(VLEN*LMUL)/SEW.
145- defvar VLEN = 512;
146141 defvar DLEN = !div(VLEN, 2);
147142 defvar TwoTimesLMUL = !cond(
148143 !eq(mx, "M1") : 2,
@@ -160,8 +155,7 @@ class SiFive7GetReductionCycles<string mx, int sew> {
160155}
161156
162157/// Cycles for ordered reductions take approximately 6*VL cycles
163- class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164- defvar VLEN = 512;
158+ class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
165159 // (VLEN * LMUL) / SEW
166160 defvar VLUpperBound = !cond(
167161 !eq(mx, "M1") : !div(VLEN, sew),
@@ -234,6 +228,8 @@ def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
234228
235229def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
236230
231+ defvar SiFive7VLEN = 512;
232+
237233// Branching
238234let Latency = 3 in {
239235def : WriteRes<WriteJmp, [SiFive7PipeB]>;
@@ -481,7 +477,7 @@ foreach mx = SchedMxList in {
481477
482478foreach mx = SchedMxList in {
483479 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
484- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
480+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, SiFive7VLEN >.c;
485481 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
486482 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
487483 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -501,7 +497,7 @@ foreach mx = SchedMxList in {
501497// since LMUL >= 16/64.
502498foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
503499 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
504- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
500+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, SiFive7VLEN >.c;
505501 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
506502 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
507503 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -518,7 +514,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
518514}
519515foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
520516 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
521- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
517+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, SiFive7VLEN >.c;
522518 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
523519 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
524520 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -535,7 +531,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
535531}
536532foreach mx = ["M1", "M2", "M4", "M8"] in {
537533 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
538- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
534+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, SiFive7VLEN >.c;
539535 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
540536 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
541537 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -588,7 +584,7 @@ foreach mx = SchedMxList in {
588584 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589585 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
590586 foreach nf=3-8 in {
591- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
587+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN >.c;
592588 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
593589 // Does not chain so set latency high
594590 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -603,7 +599,7 @@ foreach mx = SchedMxList in {
603599foreach mx = SchedMxList in {
604600 foreach nf=2-8 in {
605601 foreach eew = [8, 16, 32, 64] in {
606- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
602+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN >.c;
607603 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
608604 // Does not chain so set latency high
609605 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -669,7 +665,7 @@ foreach mx = SchedMxList in {
669665foreach mx = SchedMxList in {
670666 foreach sew = SchedSEWSet<mx>.val in {
671667 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
672- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
668+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c, 4));
673669 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
674670 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
675671 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -774,7 +770,7 @@ foreach mx = SchedMxList in {
774770foreach mx = SchedMxListF in {
775771 foreach sew = SchedSEWSet<mx, isF=1>.val in {
776772 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
777- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
773+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c, 4));
778774 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
779775 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
780776 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -834,7 +830,7 @@ foreach mx = SchedMxListFW in {
834830// 14. Vector Reduction Operations
835831foreach mx = SchedMxList in {
836832 foreach sew = SchedSEWSet<mx>.val in {
837- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
833+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
838834 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
839835 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
840836 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -847,7 +843,7 @@ foreach mx = SchedMxList in {
847843
848844foreach mx = SchedMxListWRed in {
849845 foreach sew = SchedSEWSet<mx, 0, 1>.val in {
850- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
846+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
851847 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
852848 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
853849 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -857,15 +853,15 @@ foreach mx = SchedMxListWRed in {
857853
858854foreach mx = SchedMxListF in {
859855 foreach sew = SchedSEWSet<mx, 1>.val in {
860- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
856+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
861857 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
862858 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
863859 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
864860 mx, sew, IsWorstCase>;
865861 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
866862 mx, sew, IsWorstCase>;
867863 }
868- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
864+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN >.c;
869865 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
870866 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
871867 mx, sew, IsWorstCase>;
@@ -874,12 +870,12 @@ foreach mx = SchedMxListF in {
874870
875871foreach mx = SchedMxListFWRed in {
876872 foreach sew = SchedSEWSet<mx, 1, 1>.val in {
877- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
873+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
878874 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
879875 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
880876 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
881877 mx, sew, IsWorstCase>;
882- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
878+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN >.c;
883879 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
884880 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
885881 mx, sew, IsWorstCase>;
@@ -924,7 +920,7 @@ foreach mx = SchedMxList in {
924920
925921foreach mx = SchedMxList in {
926922 foreach sew = SchedSEWSet<mx>.val in {
927- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
923+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c;
928924 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
929925 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
930926 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
0 commit comments