@@ -687,18 +687,26 @@ foreach mx = SchedMxList in {
687687foreach mx = SchedMxList in {
688688 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
689689
690- // Pattern for vslide1down.vx, vslidedown.vi/vx : 4/5/9/17
691- // Pattern for vslide1up.vx: 4/4/8/16
692- // We use 4/4/8/16 for simplicity
693- defvar VSlideLatAndOcc = GetDoublingValueFromLMUL<"M2", 4, mx>.c;
694- let Latency = VSlideLatAndOcc, ReleaseAtCycles =[VSlideLatAndOcc] in {
695- defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
690+ // Latency for slide up : 4/4/8/16, ReleaseAtCycles is 2/4/8/16
691+ defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
692+ defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble<mx>.c;
693+ let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in {
694+ defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
695+ }
696696
697+ // Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17
698+ defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c;
699+ defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c;
700+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
701+ defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
702+ }
703+ // The following group slide up and down together, so we use the worst-case
704+ // (slide down) for all.
705+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
706+ defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
697707 defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
698- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
699708
700- defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
701- defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
709+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
702710 }
703711}
704712
@@ -720,8 +728,8 @@ let Latency = 4, ReleaseAtCycles = [4] in {
720728foreach mx = SchedMxList in {
721729 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
722730
723- defvar VRGatherLat = GetDoublingValueFromLMUL <"M2", 4, mx>.c;
724- let Latency = VRGatherLat, ReleaseAtCycles = [GetOneMF2AndDouble <mx>.c] in {
731+ defvar VRGatherLat = ConstValueUntilLMULThenDouble <"M2", 4, mx>.c;
732+ let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble <mx>.c] in {
725733 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
726734 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
727735 }
@@ -736,11 +744,11 @@ foreach mx = SchedMxList in {
736744 }
737745 // For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles)
738746 defvar VRGatherEI16Lat = !if(!eq(sew, 8),
739- GetLMULValue<[4, 4, 4, 8, 32, 128], mx>.c,
740- VRGatherVVLat );
747+ GetLMULValue<[4, 4, 4, 8, 32, 128, 256 ], mx>.c,
748+ GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c );
741749 defvar VRGatherEI16Occ = !if(!eq(sew, 8),
742- GetLMULValue<[1, 1, 2, 8, 32, 128], mx>.c,
743- GetLMULValue<[4, 4, 4 , 4, 16, 64, 256], mx>.c);
750+ GetLMULValue<[1, 1, 2, 8, 32, 128, 256 ], mx>.c,
751+ GetLMULValue<[1, 1, 1 , 4, 16, 64, 256], mx>.c);
744752 let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in {
745753 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
746754 }
0 commit comments