Skip to content

Commit 37aa347

Browse files
authored
[RISCV] Toggle throttled FP64 feature in SiFive7 scheduling model with subtarget feature (#162400)
This patch teaches the SiFive7 scheduling model to configure / toggle the throttled FP64 vector feature with subtarget feature rather than hard-coded TableGen parameter, which inevitably forces us to instantiate a new scheduling model for every performance features like this.
1 parent c8afc6a commit 37aa347

File tree

5 files changed

+193
-124
lines changed

5 files changed

+193
-124
lines changed

llvm/lib/Target/RISCV/RISCVInstrPredicates.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
// otherwise.
1515
def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
1616

17+
// This scheduling predicate is true when subtarget feature TuneHasSingleElementVecFP64
18+
// is enabled.
19+
def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>;
20+
1721
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
1822
def isSEXT_W
1923
: TIIPredicate<"isSEXT_W",

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,8 @@ def SIFIVE_X390 : RISCVProcessorModel<"sifive-x390",
338338
FeatureStdExtZvl1024b,
339339
FeatureVendorXSiFivecdiscarddlone,
340340
FeatureVendorXSiFivecflushdlone],
341-
SiFiveIntelligenceTuneFeatures>;
341+
!listconcat(SiFiveIntelligenceTuneFeatures,
342+
[TuneHasSingleElementVecFP64])>;
342343

343344
defvar SiFiveP400TuneFeatures = [TuneNoDefaultUnroll,
344345
TuneConditionalCompressedMoveFusion,

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 95 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN,
317317
ProcResourceKind VL, ProcResourceKind VS,
318318
ProcResourceKind VCQ,
319319
SiFive7FPLatencies fpLatencies,
320-
bit isFP64Throttled = false,
321320
bit hasFastGather = false> {
322321

323322
// Branching
@@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN,
832831
// 13. Vector Floating-Point Instructions
833832
foreach mx = SchedMxListF in {
834833
foreach sew = SchedSEWSet<mx, isF=1>.val in {
835-
defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)),
836-
SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
837-
SiFive7GetCyclesDefault<mx>.c);
838-
defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8);
839-
defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2);
840834
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
841-
let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
842-
defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>;
843-
defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>;
844-
defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>;
845-
defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>;
846-
defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>;
847-
defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>;
848-
defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
849-
defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
850-
}
851-
defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4);
852-
let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
853-
defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>;
854-
defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>;
855-
// min max require merge
856-
defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
857-
defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
835+
if !eq(sew, 64) then {
836+
defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
837+
foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF",
838+
"WriteVFMulAddV", "WriteVFMulAddF"] in
839+
defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
840+
// Predicated
841+
[VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
842+
// Not Predicated
843+
[VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
844+
mx, sew, IsWorstCase>;
845+
foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in
846+
defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
847+
// Predicated
848+
[VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
849+
// Not Predicated
850+
[VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
851+
mx, sew, IsWorstCase>;
852+
foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in
853+
defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
854+
// Predicated
855+
[VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
856+
// Not Predicated
857+
[VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
858+
mx, sew, IsWorstCase>;
859+
foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in
860+
defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
861+
// Predicated
862+
[VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
863+
// Not Predicated
864+
[VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
865+
mx, sew, IsWorstCase>;
866+
} else {
867+
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
868+
defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
869+
defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
870+
defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
871+
defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
872+
defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
873+
defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
874+
defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
875+
defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
876+
}
877+
let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
878+
defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
879+
defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
880+
// min max require merge
881+
defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
882+
defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
883+
}
858884
}
859885
}
860886
}
@@ -892,31 +918,48 @@ multiclass SiFive7WriteResBase<int VLEN,
892918
// Widening
893919
foreach mx = SchedMxListW in {
894920
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
895-
defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
896-
SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
897-
SiFive7GetCyclesDefault<mx>.c);
898921
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
899-
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
900-
defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
922+
defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
923+
if !eq(sew, 32) then {
924+
defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
925+
defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred,
926+
// Predicated
927+
[VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
928+
// Not Predicated
929+
[VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
930+
mx, sew, IsWorstCase>;
931+
} else {
932+
let Latency = 8,
933+
AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
934+
defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
935+
}
901936
}
902937
}
903938
foreach mx = SchedMxListFW in {
904939
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
905-
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
940+
defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
906941
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
907-
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
942+
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
908943
defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
909944
defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
910945
defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
911946
defm : LMULSEWWriteResMXSEW<"WriteVFWMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
912947
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
913948
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
914949
}
915-
defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
916-
SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
917-
SiFive7GetCyclesDefault<mx>.c);
918-
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in
919-
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
950+
if !eq(sew, 32) then {
951+
defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
952+
defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred,
953+
// Predicated
954+
[VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
955+
// Not Predicated
956+
[VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
957+
mx, sew, IsWorstCase>;
958+
} else {
959+
let Latency = 8,
960+
AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
961+
defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
962+
}
920963
}
921964
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
922965
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
@@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN,
933976
}
934977
foreach mx = SchedMxListFW in {
935978
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
936-
defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
937-
SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
938-
SiFive7GetCyclesNarrowing<mx>.c);
939979
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
940-
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
941-
defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
942-
defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
980+
defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c;
981+
if !eq(sew, 32) then {
982+
defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
983+
foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in
984+
defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
985+
// Predicated
986+
[VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
987+
// Not Predicated
988+
[VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
989+
mx, sew, IsWorstCase>;
990+
} else {
991+
let Latency = 8,
992+
AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
993+
defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
994+
defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
995+
}
943996
}
944997
}
945998
}
@@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance {
14991552
/// eventually be supplied by different SchedMachineModels.
15001553
multiclass SiFive7SchedResources<int vlen, bit extraVALU,
15011554
SiFive7FPLatencies fpLatencies,
1502-
bit isFP64Throttled,
15031555
bit hasFastGather> {
15041556
defm SiFive7 : SiFive7ProcResources<extraVALU>;
15051557

@@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
15271579
: SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
15281580
SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
15291581
SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
1530-
SiFive7VCQ, fpLatencies, isFP64Throttled,
1531-
hasFastGather>;
1582+
SiFive7VCQ, fpLatencies, hasFastGather>;
15321583

15331584
//===----------------------------------------------------------------------===//
15341585
// Bypass and advance
@@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
15601611
bit HasExtraVALU = false;
15611612

15621613
SiFive7FPLatencies FPLatencies;
1563-
bit IsFP64Throttled = false;
15641614
bit HasFastGather = false;
15651615

15661616
string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
@@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
15871637
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
15881638
let HasExtraVALU = true;
15891639
let FPLatencies = SiFive7LowFPLatencies;
1590-
let IsFP64Throttled = true;
15911640
let HasFastGather = true;
15921641
}
15931642

@@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
15961645
let SchedModel = model in
15971646
defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
15981647
model.FPLatencies,
1599-
model.IsFP64Throttled,
16001648
model.HasFastGather>;
16011649
}
16021650

llvm/lib/Target/RISCV/RISCVScheduleV.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
128128
IsWorstCase>;
129129
}
130130

131+
multiclass LMULSEWWriteResMXSEWVariant<string name, SchedPredicateBase Pred,
132+
list<ProcResourceKind> predResources,
133+
int predLat, list<int> predAcquireCycles,
134+
list<int> predReleaseCycles,
135+
list<ProcResourceKind> noPredResources,
136+
int noPredLat, list<int> noPredAcquireCycles,
137+
list<int> noPredReleaseCycles,
138+
string mx, int sew, bit IsWorstCase> {
139+
defm "" : LMULWriteResVariantImpl<name, name # "_" # mx # "_E" # sew, Pred, predResources,
140+
predLat, predAcquireCycles,
141+
predReleaseCycles, noPredResources,
142+
noPredLat, noPredAcquireCycles,
143+
noPredReleaseCycles,
144+
IsWorstCase>;
145+
}
146+
131147
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
132148
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
133149
// SchedMxList variants above. Each multiclass is responsible for defining

0 commit comments

Comments
 (0)