Skip to content

Commit 948d39b

Browse files
[RISCV] Update SpacemiT-X60 vector reduction operations latencies (llvm#152737)
This PR adds hardware-measured latencies for all instructions defined in Section 14 of the RVV specification: "Vector Reduction Operations" to the SpacemiT-X60 scheduling model. --------- Signed-off-by: Mikhail R. Gadelha <[email protected]>
1 parent 509ee6b commit 948d39b

File tree

2 files changed

+617
-567
lines changed

2 files changed

+617
-567
lines changed

llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -654,35 +654,85 @@ foreach mx = SchedMxList in {
654654
foreach sew = SchedSEWSet<mx>.val in {
655655
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
656656

657-
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
658-
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
657+
defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
658+
defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
659+
let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
660+
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
661+
662+
// Pattern for vredsum: 5/5/5/7/11/19/35
663+
// Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34
664+
// They are grouped together, so we use the worst-case vredsum latency.
665+
// TODO: split vredand, vredor, vredxor into separate scheduling classe.
666+
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
667+
}
659668
}
660669
}
661670

662671
foreach mx = SchedMxListWRed in {
663672
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
664673
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
665674

666-
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
675+
defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
676+
defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
677+
let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
678+
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
679+
}
680+
}
681+
}
682+
683+
foreach mx = SchedMxListF in {
684+
foreach sew = SchedSEWSet<mx, 1>.val in {
685+
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
686+
687+
// Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57
688+
// Latency for vfredusum.vs is slightly lower for e16/e32
689+
// We use the worst-case
690+
defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c;
691+
defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c;
692+
let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in {
693+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
694+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
695+
}
667696
}
668697
}
669698

670699
foreach mx = SchedMxListF in {
671700
foreach sew = SchedSEWSet<mx, 1>.val in {
672701
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
673702

674-
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
675-
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
676-
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
703+
// Compute latency based on SEW
704+
defvar VFRedOV_FromLat = !cond(
705+
!eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c,
706+
!eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c,
707+
!eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c
708+
);
709+
defvar VFRedOV_FromOcc = !cond(
710+
!eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c,
711+
!eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c,
712+
!eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c
713+
);
714+
let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in {
715+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
716+
}
677717
}
678718
}
679719

680720
foreach mx = SchedMxListFWRed in {
681721
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
682722
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
683723

684-
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
685-
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
724+
defvar VFRedOVLat = !cond(
725+
!eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c,
726+
!eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c,
727+
);
728+
defvar VFRedOVOcc = !cond(
729+
!eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c,
730+
!eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c,
731+
);
732+
let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in {
733+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
734+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
735+
}
686736
}
687737
}
688738

0 commit comments

Comments
 (0)