@@ -169,6 +169,64 @@ class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
169
169
int c = !mul(6, VLUpperBound);
170
170
}
171
171
172
+ class isSingleDLEN<string mx> {
173
+ bit c = !or(!eq(mx, "MF2"), !or(!eq(mx, "MF4"), !eq(mx, "MF8")));
174
+ }
175
+
176
+ class SiFive7GetCyclesVRGatherVV<string mx, int sew, int VLEN,
177
+ bit hasFastGather> {
178
+ // if (hasFastGather && isSingleDLEN(mx))
179
+ // c = 1;
180
+ // else if (hasFastGather && (log2(SEW/8) + log2(LMUL) <= log2(DLEN / 32))
181
+ // c = LMUL * 2 * ceil(vl * SEW / DLEN);
182
+ // else
183
+ // c = vl;
184
+
185
+ defvar y = !logtwo(!div(sew, 8));
186
+ defvar x = !cond(
187
+ !eq(mx, "M1") : y,
188
+ !eq(mx, "M2") : !add(y, 1),
189
+ !eq(mx, "M4") : !add(y, 2),
190
+ !eq(mx, "M8") : !add(y, 3),
191
+ // Give isSingleDLEN(mx) cases a garbage value to avoid build failures,
192
+ // even though x will go unused.
193
+ true : 1
194
+ );
195
+ // LMUL * 2 * ceil(vl * SEW / DLEN) = LMUL * 2 * ceil(2 * LMUL)
196
+ defvar z = !cond(
197
+ !eq(mx, "M1") : 4,
198
+ !eq(mx, "M2") : 16,
199
+ !eq(mx, "M4") : 64,
200
+ !eq(mx, "M8") : 256,
201
+ // Give isSingleDLEN(mx) cases a garbage value to avoid build failures,
202
+ // even though z will go unused.
203
+ true : 1
204
+ );
205
+ defvar VLUpperBound = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
206
+ bit IsSingleDLEN = isSingleDLEN<mx>.c;
207
+
208
+ int c = !cond(
209
+ !and(hasFastGather, IsSingleDLEN) : 1,
210
+ !and(hasFastGather, !le(x, !logtwo(!div(VLEN, 64)))) : z,
211
+ true: VLUpperBound
212
+ );
213
+ }
214
+
215
+ class SiFive7GetCyclesVCompress<string mx, int sew, int VLEN,
216
+ bit hasFastGather> {
217
+
218
+ // if (hasFastGather && isSingleDLEN(mx))
219
+ // c = 1
220
+ // else
221
+ // c = vl
222
+ defvar VLUpperBound = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
223
+ bit IsSingleDLEN = isSingleDLEN<mx>.c;
224
+
225
+ int c = !if(!and(hasFastGather, IsSingleDLEN),
226
+ 1,
227
+ VLUpperBound);
228
+ }
229
+
172
230
class SiFive7GetSiFiveVFNRClipCycles<string mx, int VLEN> {
173
231
int latency = !cond(
174
232
!eq(mx, "MF8"): 7,
@@ -259,7 +317,8 @@ multiclass SiFive7WriteResBase<int VLEN,
259
317
ProcResourceKind VL, ProcResourceKind VS,
260
318
ProcResourceKind VCQ,
261
319
SiFive7FPLatencies fpLatencies,
262
- bit isFP64Throttled = false> {
320
+ bit isFP64Throttled = false,
321
+ bit hasFastGather = false> {
263
322
264
323
// Branching
265
324
let Latency = 3 in {
@@ -976,13 +1035,33 @@ multiclass SiFive7WriteResBase<int VLEN,
976
1035
977
1036
foreach mx = SchedMxList in {
978
1037
foreach sew = SchedSEWSet<mx>.val in {
979
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
980
1038
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
981
- let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
982
- defm : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [VCQ, VA1], mx, sew, IsWorstCase>;
983
- defm : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [VCQ, VA1], mx, sew, IsWorstCase>;
984
- defm : LMULSEWWriteResMXSEW<"WriteVCompressV", [VCQ, VA1], mx, sew, IsWorstCase>;
985
- }
1039
+ defvar IsSingleDLEN = isSingleDLEN<mx>.c;
1040
+
1041
+ defvar GatherVVCycles =
1042
+ SiFive7GetCyclesVRGatherVV<mx, sew, VLEN, hasFastGather>.c;
1043
+ // 7 + DLEN/ SEW
1044
+ defvar SlowGatherLat = !add(7, !div(!div(VLEN, 2), sew));
1045
+ defvar GatherVVLat = !if(hasFastGather,
1046
+ !add(3, GatherVVCycles), SlowGatherLat);
1047
+
1048
+ let Latency = GatherVVLat, AcquireAtCycles = [0, 1],
1049
+ ReleaseAtCycles = [1, !add(5, GatherVVCycles)] in
1050
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [VCQ, VA1], mx, sew, IsWorstCase>;
1051
+
1052
+ // VRGatherEI16VV is not improved by fastGather.
1053
+ defvar GatherEI16VVCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
1054
+ let Latency = SlowGatherLat, AcquireAtCycles = [0, 1],
1055
+ ReleaseAtCycles = [1, !add(5, GatherEI16VVCycles)] in
1056
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [VCQ, VA1], mx, sew, IsWorstCase>;
1057
+
1058
+ defvar CompressCycles = SiFive7GetCyclesVCompress<mx, sew, VLEN, hasFastGather>.c;
1059
+ defvar CompressLat = !if(!and(hasFastGather, IsSingleDLEN),
1060
+ 4,
1061
+ !add(7, CompressCycles)); // 7 + VL
1062
+ let Latency = CompressLat, AcquireAtCycles = [0, 1],
1063
+ ReleaseAtCycles = [1, !add(8, CompressCycles)] in
1064
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [VCQ, VA1], mx, sew, IsWorstCase>;
986
1065
}
987
1066
}
988
1067
@@ -1408,7 +1487,8 @@ multiclass SiFive7ReadAdvance {
1408
1487
/// eventually be supplied by different SchedMachineModels.
1409
1488
multiclass SiFive7SchedResources<int vlen, bit extraVALU,
1410
1489
SiFive7FPLatencies fpLatencies,
1411
- bit isFP64Throttled> {
1490
+ bit isFP64Throttled,
1491
+ bit hasFastGather> {
1412
1492
defm SiFive7 : SiFive7ProcResources<extraVALU>;
1413
1493
1414
1494
// Pull out defs from SiFive7ProcResources so we can refer to them by name.
@@ -1435,7 +1515,8 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
1435
1515
: SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
1436
1516
SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
1437
1517
SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
1438
- SiFive7VCQ, fpLatencies, isFP64Throttled>;
1518
+ SiFive7VCQ, fpLatencies, isFP64Throttled,
1519
+ hasFastGather>;
1439
1520
1440
1521
//===----------------------------------------------------------------------===//
1441
1522
// Bypass and advance
@@ -1468,6 +1549,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
1468
1549
1469
1550
SiFive7FPLatencies FPLatencies;
1470
1551
bit IsFP64Throttled = false;
1552
+ bit HasFastGather = false;
1471
1553
1472
1554
string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
1473
1555
}
@@ -1494,14 +1576,16 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
1494
1576
let HasExtraVALU = true;
1495
1577
let FPLatencies = SiFive7LowFPLatencies;
1496
1578
let IsFP64Throttled = true;
1579
+ let HasFastGather = true;
1497
1580
}
1498
1581
1499
1582
/// Binding models to their scheduling resources.
1500
1583
foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
1501
1584
let SchedModel = model in
1502
1585
defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
1503
1586
model.FPLatencies,
1504
- model.IsFP64Throttled>;
1587
+ model.IsFP64Throttled,
1588
+ model.HasFastGather>;
1505
1589
}
1506
1590
1507
1591
// Some model name aliases.
0 commit comments