@@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN,
317
317
ProcResourceKind VL, ProcResourceKind VS,
318
318
ProcResourceKind VCQ,
319
319
SiFive7FPLatencies fpLatencies,
320
- bit isFP64Throttled = false,
321
320
bit hasFastGather = false> {
322
321
323
322
// Branching
@@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN,
832
831
// 13. Vector Floating-Point Instructions
833
832
foreach mx = SchedMxListF in {
834
833
foreach sew = SchedSEWSet<mx, isF=1>.val in {
835
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)),
836
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
837
- SiFive7GetCyclesDefault<mx>.c);
838
- defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8);
839
- defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2);
840
834
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
841
- let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
842
- defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>;
843
- defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>;
844
- defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>;
845
- defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>;
846
- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>;
847
- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>;
848
- defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
849
- defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
850
- }
851
- defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4);
852
- let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
853
- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>;
854
- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>;
855
- // min max require merge
856
- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
857
- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
835
+ if !eq(sew, 64) then {
836
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
837
+ foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF",
838
+ "WriteVFMulAddV", "WriteVFMulAddF"] in
839
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
840
+ // Predicated
841
+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
842
+ // Not Predicated
843
+ [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
844
+ mx, sew, IsWorstCase>;
845
+ foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in
846
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
847
+ // Predicated
848
+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
849
+ // Not Predicated
850
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
851
+ mx, sew, IsWorstCase>;
852
+ foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in
853
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
854
+ // Predicated
855
+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
856
+ // Not Predicated
857
+ [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
858
+ mx, sew, IsWorstCase>;
859
+ foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in
860
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
861
+ // Predicated
862
+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
863
+ // Not Predicated
864
+ [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
865
+ mx, sew, IsWorstCase>;
866
+ } else {
867
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
868
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
869
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
870
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
871
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
872
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
873
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
874
+ defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
875
+ defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
876
+ }
877
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
878
+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
879
+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
880
+ // min max require merge
881
+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
882
+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
883
+ }
858
884
}
859
885
}
860
886
}
@@ -892,31 +918,48 @@ multiclass SiFive7WriteResBase<int VLEN,
892
918
// Widening
893
919
foreach mx = SchedMxListW in {
894
920
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
895
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
896
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
897
- SiFive7GetCyclesDefault<mx>.c);
898
921
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
899
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
900
- defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
922
+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
923
+ if !eq(sew, 32) then {
924
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
925
+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred,
926
+ // Predicated
927
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
928
+ // Not Predicated
929
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
930
+ mx, sew, IsWorstCase>;
931
+ } else {
932
+ let Latency = 8,
933
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
934
+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
935
+ }
901
936
}
902
937
}
903
938
foreach mx = SchedMxListFW in {
904
939
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
905
- defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
940
+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
906
941
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
907
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles )] in {
942
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles )] in {
908
943
defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
909
944
defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
910
945
defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
911
946
defm : LMULSEWWriteResMXSEW<"WriteVFWMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
912
947
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
913
948
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
914
949
}
915
- defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
916
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
917
- SiFive7GetCyclesDefault<mx>.c);
918
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in
919
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
950
+ if !eq(sew, 32) then {
951
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
952
+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred,
953
+ // Predicated
954
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
955
+ // Not Predicated
956
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
957
+ mx, sew, IsWorstCase>;
958
+ } else {
959
+ let Latency = 8,
960
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
961
+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
962
+ }
920
963
}
921
964
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
922
965
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
@@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN,
933
976
}
934
977
foreach mx = SchedMxListFW in {
935
978
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
936
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
937
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
938
- SiFive7GetCyclesNarrowing<mx>.c);
939
979
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
940
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
941
- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
942
- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
980
+ defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c;
981
+ if !eq(sew, 32) then {
982
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
983
+ foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in
984
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
985
+ // Predicated
986
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
987
+ // Not Predicated
988
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
989
+ mx, sew, IsWorstCase>;
990
+ } else {
991
+ let Latency = 8,
992
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
993
+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
994
+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
995
+ }
943
996
}
944
997
}
945
998
}
@@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance {
1499
1552
/// eventually be supplied by different SchedMachineModels.
1500
1553
multiclass SiFive7SchedResources<int vlen, bit extraVALU,
1501
1554
SiFive7FPLatencies fpLatencies,
1502
- bit isFP64Throttled,
1503
1555
bit hasFastGather> {
1504
1556
defm SiFive7 : SiFive7ProcResources<extraVALU>;
1505
1557
@@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
1527
1579
: SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
1528
1580
SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
1529
1581
SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
1530
- SiFive7VCQ, fpLatencies, isFP64Throttled,
1531
- hasFastGather>;
1582
+ SiFive7VCQ, fpLatencies, hasFastGather>;
1532
1583
1533
1584
//===----------------------------------------------------------------------===//
1534
1585
// Bypass and advance
@@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
1560
1611
bit HasExtraVALU = false;
1561
1612
1562
1613
SiFive7FPLatencies FPLatencies;
1563
- bit IsFP64Throttled = false;
1564
1614
bit HasFastGather = false;
1565
1615
1566
1616
string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
@@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
1587
1637
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
1588
1638
let HasExtraVALU = true;
1589
1639
let FPLatencies = SiFive7LowFPLatencies;
1590
- let IsFP64Throttled = true;
1591
1640
let HasFastGather = true;
1592
1641
}
1593
1642
@@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
1596
1645
let SchedModel = model in
1597
1646
defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
1598
1647
model.FPLatencies,
1599
- model.IsFP64Throttled,
1600
1648
model.HasFastGather>;
1601
1649
}
1602
1650
0 commit comments