@@ -654,35 +654,85 @@ foreach mx = SchedMxList in {
654654 foreach sew = SchedSEWSet<mx>.val in {
655655 defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
656656
657- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
658- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
657+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
658+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
659+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
660+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
661+
662+ // Pattern for vredsum: 5/5/5/7/11/19/35
663+ // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34
664+ // They are grouped together, so we use the worst-case vredsum latency.
665+ // TODO: split vredand, vredor, vredxor into separate scheduling classe.
666+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
667+ }
659668 }
660669}
661670
662671foreach mx = SchedMxListWRed in {
663672 foreach sew = SchedSEWSet<mx, 0, 1>.val in {
664673 defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
665674
666- defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
675+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
676+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
677+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
678+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
679+ }
680+ }
681+ }
682+
683+ foreach mx = SchedMxListF in {
684+ foreach sew = SchedSEWSet<mx, 1>.val in {
685+ defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
686+
687+ // Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57
688+ // Latency for vfredusum.vs is slightly lower for e16/e32
689+ // We use the worst-case
690+ defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c;
691+ defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c;
692+ let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in {
693+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
694+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
695+ }
667696 }
668697}
669698
670699foreach mx = SchedMxListF in {
671700 foreach sew = SchedSEWSet<mx, 1>.val in {
672701 defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
673702
674- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
675- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
676- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
703+ // Compute latency based on SEW
704+ defvar VFRedOV_FromLat = !cond(
705+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c,
706+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c,
707+ !eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c
708+ );
709+ defvar VFRedOV_FromOcc = !cond(
710+ !eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c,
711+ !eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c,
712+ !eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c
713+ );
714+ let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in {
715+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
716+ }
677717 }
678718}
679719
680720foreach mx = SchedMxListFWRed in {
681721 foreach sew = SchedSEWSet<mx, 1, 1>.val in {
682722 defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
683723
684- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
685- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
724+ defvar VFRedOVLat = !cond(
725+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c,
726+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c,
727+ );
728+ defvar VFRedOVOcc = !cond(
729+ !eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c,
730+ !eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c,
731+ );
732+ let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in {
733+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
734+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
735+ }
686736 }
687737}
688738
0 commit comments