Skip to content

Commit 162020b

Browse files
committed
[WIP][RISCV] tt-ascalon-d8 vector scheduling
Drive-by: additional tuning knobs. Partial scheduling model for vector operations.
1 parent 8f683c3 commit 162020b

File tree

2 files changed

+319
-6
lines changed

2 files changed

+319
-6
lines changed

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,13 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8",
633633
FeatureUnalignedVectorMem]),
634634
[TuneNoDefaultUnroll,
635635
TuneNLogNVRGather,
636+
TuneOptimizedNF2SegmentLoadStore,
637+
TuneOptimizedNF3SegmentLoadStore,
638+
TuneOptimizedNF4SegmentLoadStore,
639+
TuneOptimizedNF5SegmentLoadStore,
640+
TuneOptimizedNF6SegmentLoadStore,
641+
TuneOptimizedNF7SegmentLoadStore,
642+
TuneOptimizedNF8SegmentLoadStore,
636643
TuneOptimizedZeroStrideLoad,
637644
TunePostRAScheduler]>;
638645

llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td

Lines changed: 312 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,85 @@
88

99
//===----------------------------------------------------------------------===//
1010

11+
class AscalonIsWorstCaseMX<string mx, list<string> MxList> {
12+
defvar LLMUL = LargestLMUL<MxList>.r;
13+
bit c = !eq(mx, LLMUL);
14+
}
15+
16+
class AscalonIsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
17+
bit isF = 0> {
18+
defvar LLMUL = LargestLMUL<MxList>.r;
19+
defvar SSEW = SmallestSEW<mx, isF>.r;
20+
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
21+
}
22+
23+
/// Cycle counts that scale with LMUL with LMUL=1 having the same latency as
24+
/// fractional LMULs
25+
class AscalonGetCyclesLMUL<string mx, int base> {
26+
int c = !cond(
27+
!eq(mx, "M1") : base,
28+
!eq(mx, "M2") : !mul(base, 2),
29+
!eq(mx, "M4") : !mul(base, 4),
30+
!eq(mx, "M8") : !mul(base, 8),
31+
!eq(mx, "MF2") : base,
32+
!eq(mx, "MF4") : base,
33+
!eq(mx, "MF8") : base
34+
);
35+
}
36+
37+
/// Linear LMUL scaling starting from smallest fractional LMUL
38+
class AscalonGetCyclesLMULFractional<string mx, int base> {
39+
int c = !cond(
40+
!eq(mx, "MF8") : base,
41+
!eq(mx, "MF4") : !mul(base, 2),
42+
!eq(mx, "MF2") : !mul(base, 4),
43+
!eq(mx, "M1") : !mul(base, 8),
44+
!eq(mx, "M2") : !mul(base, 16),
45+
!eq(mx, "M4") : !mul(base, 32),
46+
!eq(mx, "M8") : !mul(base, 64)
47+
);
48+
}
49+
50+
class AscalonGetCyclesDefault<string mx> {
51+
int c = AscalonGetCyclesLMUL<mx, 1>.c;
52+
}
53+
54+
class AscalonGetCyclesNarrowing<string mx> {
55+
int c = !cond(
56+
!eq(mx, "M1") : 4,
57+
!eq(mx, "M2") : 8,
58+
!eq(mx, "M4") : 16,
59+
!eq(mx, "MF2") : 2,
60+
!eq(mx, "MF4") : 1,
61+
!eq(mx, "MF8") : 1
62+
);
63+
}
64+
65+
66+
class AscalonGetCyclesDivOrSqrt<string mx, int sew> {
67+
int c = !cond(
68+
!eq(sew, 8) : AscalonGetCyclesLMUL<mx, 7>.c, // TODO not valid for fp
69+
!eq(sew, 16) : AscalonGetCyclesLMUL<mx, 6>.c,
70+
!eq(sew, 32) : AscalonGetCyclesLMUL<mx, 5>.c,
71+
!eq(sew, 64) : AscalonGetCyclesLMUL<mx, 8>.c
72+
);
73+
}
74+
75+
//===----------------------------------------------------------------------===//
76+
1177
def TTAscalonD8Model : SchedMachineModel {
1278
let IssueWidth = 8; // 8-way decode and dispatch
1379
let MicroOpBufferSize = 256; // 256 micro-op re-order buffer
1480
let LoadLatency = 4; // Optimistic load latency
1581
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
1682

17-
let CompleteModel = 0;
83+
let CompleteModel = false;
1884

1985
// TODO: supported, but haven't added scheduling info yet.
2086
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
2187
HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
2288
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
23-
HasStdExtZkr, HasVInstructions, HasVInstructionsI64];
89+
HasStdExtZkr];
2490
}
2591

2692
let SchedModel = TTAscalonD8Model in {
@@ -34,11 +100,17 @@ let BufferSize = 16 in {
34100
def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC
35101
def AscalonFXC : ProcResource<2>; // ALU, BR
36102
def AscalonFXD : ProcResource<2>; // ALU
37-
def AscalonFP : ProcResource<2>;
38-
// TODO: two vector units with vector scheduling model.
103+
def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
104+
// FP
105+
def AscalonFPA : ProcResource<1>; // Pipe A aslo handles FP/VEC -> INT
106+
def AscalonFPB : ProcResource<1>;
107+
def AscalonFP : ProcResGroup<[AscalonFPA, AscalonFPB]>;
108+
// Vector
109+
def AscalonVA : ProcResource<1>;
110+
def AscalonVB : ProcResource<1>;
111+
def AscalonV : ProcResGroup<[AscalonFPA, AscalonFPB]>;
39112
}
40113

41-
def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
42114

43115
//===----------------------------------------------------------------------===//
44116

@@ -316,10 +388,244 @@ def : ReadAdvance<ReadSHXADD32, 0>;
316388
def : ReadAdvance<ReadSingleBit, 0>;
317389
def : ReadAdvance<ReadSingleBitImm, 0>;
318390

391+
//===----------------------------------------------------------------------===//
392+
// Vector
393+
394+
// Configuration-Setting Instructions
395+
let Latency = 1 in {
396+
def : WriteRes<WriteVSETVLI, [AscalonV]>;
397+
def : WriteRes<WriteVSETIVLI, [AscalonV]>;
398+
}
399+
let Latency = 2 in {
400+
def : WriteRes<WriteVSETVL, [AscalonV]>;
401+
}
402+
403+
// Vector Integer Arithmetic Instructions
404+
foreach mx = SchedMxList in {
405+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
406+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
407+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
408+
defm "" : LMULWriteResMX<"WriteVIALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
409+
defm "" : LMULWriteResMX<"WriteVIALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
410+
defm "" : LMULWriteResMX<"WriteVIALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
411+
defm "" : LMULWriteResMX<"WriteVICALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
412+
defm "" : LMULWriteResMX<"WriteVICALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
413+
defm "" : LMULWriteResMX<"WriteVICALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
414+
defm "" : LMULWriteResMX<"WriteVICALUMV", [AscalonFX, AscalonV], mx, IsWorstCase>;
415+
defm "" : LMULWriteResMX<"WriteVICALUMX", [AscalonFX, AscalonV], mx, IsWorstCase>;
416+
defm "" : LMULWriteResMX<"WriteVICALUMI", [AscalonFX, AscalonV], mx, IsWorstCase>;
417+
defm "" : LMULWriteResMX<"WriteVShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
418+
defm "" : LMULWriteResMX<"WriteVShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
419+
defm "" : LMULWriteResMX<"WriteVShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
420+
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [AscalonFX, AscalonV], mx, IsWorstCase>;
421+
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [AscalonFX, AscalonV], mx, IsWorstCase>;
422+
defm "" : LMULWriteResMX<"WriteVIMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
423+
defm "" : LMULWriteResMX<"WriteVIMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
424+
defm "" : LMULWriteResMX<"WriteVIMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
425+
defm "" : LMULWriteResMX<"WriteVIMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
426+
defm "" : LMULWriteResMX<"WriteVIMergeV", [AscalonFX, AscalonV], mx, IsWorstCase>;
427+
defm "" : LMULWriteResMX<"WriteVIMergeX", [AscalonFX, AscalonV], mx, IsWorstCase>;
428+
defm "" : LMULWriteResMX<"WriteVIMergeI", [AscalonFX, AscalonV], mx, IsWorstCase>;
429+
defm "" : LMULWriteResMX<"WriteVIMovV", [AscalonFX, AscalonV], mx, IsWorstCase>;
430+
defm "" : LMULWriteResMX<"WriteVIMovX", [AscalonFX, AscalonV], mx, IsWorstCase>;
431+
defm "" : LMULWriteResMX<"WriteVIMovI", [AscalonFX, AscalonV], mx, IsWorstCase>;
432+
defm "" : LMULWriteResMX<"WriteVICmpV", [AscalonFX, AscalonV], mx, IsWorstCase>;
433+
defm "" : LMULWriteResMX<"WriteVICmpX", [AscalonFX, AscalonV], mx, IsWorstCase>;
434+
defm "" : LMULWriteResMX<"WriteVICmpI", [AscalonFX, AscalonV], mx, IsWorstCase>;
435+
}
436+
}
437+
foreach mx = SchedMxList in {
438+
foreach sew = SchedSEWSet<mx>.val in {
439+
defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
440+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
441+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
442+
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
443+
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
444+
}
445+
}
446+
}
447+
448+
// Widening
449+
foreach mx = SchedMxListW in {
450+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
451+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
452+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
453+
defm "" : LMULWriteResMX<"WriteVIWALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
454+
defm "" : LMULWriteResMX<"WriteVIWALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
455+
defm "" : LMULWriteResMX<"WriteVIWALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
456+
defm "" : LMULWriteResMX<"WriteVIWMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
457+
defm "" : LMULWriteResMX<"WriteVIWMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
458+
defm "" : LMULWriteResMX<"WriteVIWMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
459+
defm "" : LMULWriteResMX<"WriteVIWMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
460+
}
461+
}
462+
// Narrowing
463+
foreach mx = SchedMxListW in {
464+
defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
465+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
466+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
467+
defm "" : LMULWriteResMX<"WriteVNShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
468+
defm "" : LMULWriteResMX<"WriteVNShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
469+
defm "" : LMULWriteResMX<"WriteVNShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
470+
}
471+
}
472+
473+
// Vector Floating-Point Instructions
474+
foreach mx = SchedMxListF in {
475+
foreach sew = SchedSEWSet<mx, isF=1>.val in {
476+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
477+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
478+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
479+
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
480+
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
481+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
482+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
483+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
484+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
485+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
486+
defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
487+
}
488+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
489+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
490+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
491+
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
492+
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
493+
}
494+
}
495+
}
496+
foreach mx = SchedMxList in {
497+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
498+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
499+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
500+
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
501+
}
502+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
503+
defm "" : LMULWriteResMX<"WriteVFClassV", [AscalonFP, AscalonV], mx, IsWorstCase>;
504+
defm "" : LMULWriteResMX<"WriteVFMergeV", [AscalonFP, AscalonV], mx, IsWorstCase>;
505+
defm "" : LMULWriteResMX<"WriteVFMovV", [AscalonFP, AscalonV], mx, IsWorstCase>;
506+
}
507+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
508+
defm "" : LMULWriteResMX<"WriteVFCmpV", [AscalonFP, AscalonV], mx, IsWorstCase>;
509+
defm "" : LMULWriteResMX<"WriteVFCmpF", [AscalonFP, AscalonV], mx, IsWorstCase>;
510+
}
511+
}
512+
foreach mx = SchedMxListF in {
513+
foreach sew = SchedSEWSet<mx, isF=1>.val in {
514+
defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
515+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
516+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
517+
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
518+
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
519+
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
520+
}
521+
}
522+
}
523+
524+
// Widening
525+
foreach mx = SchedMxListW in {
526+
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
527+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
528+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
529+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
530+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
531+
}
532+
}
533+
foreach mx = SchedMxListFW in {
534+
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
535+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
536+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
537+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
538+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
539+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
540+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
541+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
542+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
543+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
544+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
545+
}
546+
}
547+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
548+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListFW>.c;
549+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
550+
defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
551+
}
552+
// Narrowing
553+
foreach mx = SchedMxListW in {
554+
defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
555+
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
556+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
557+
defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
558+
}
559+
}
560+
foreach mx = SchedMxListFW in {
561+
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
562+
defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
563+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
564+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
565+
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
566+
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
567+
}
568+
}
569+
}
570+
571+
// Vector Reduction Instructions
572+
foreach mx = SchedMxList in {
573+
foreach sew = SchedSEWSet<mx>.val in {
574+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
575+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
576+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
577+
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [AscalonFX, AscalonV],
578+
mx, sew, IsWorstCase>;
579+
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [AscalonFX, AscalonV],
580+
mx, sew, IsWorstCase>;
581+
}
582+
}
583+
}
584+
585+
foreach mx = SchedMxListWRed in {
586+
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
587+
defvar Cycles = AscalonGetCyclesDefault<mx>.c;
588+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
589+
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
590+
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [AscalonFX, AscalonV],
591+
mx, sew, IsWorstCase>;
592+
}
593+
}
594+
595+
foreach mx = SchedMxListF in {
596+
foreach sew = SchedSEWSet<mx, 1>.val in {
597+
defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
598+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
599+
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, RedCycles] in {
600+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [AscalonFX, AscalonV],
601+
mx, sew, IsWorstCase>;
602+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [AscalonFX, AscalonV],
603+
mx, sew, IsWorstCase>;
604+
}
605+
defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
606+
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, OrdRedCycles] in
607+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [AscalonFX, AscalonV],
608+
mx, sew, IsWorstCase>;
609+
}
610+
}
611+
612+
foreach mx = SchedMxListFWRed in {
613+
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
614+
defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
615+
defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
616+
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, RedCycles] in
617+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [AscalonFX, AscalonV],
618+
mx, sew, IsWorstCase>;
619+
defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
620+
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, OrdRedCycles] in
621+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [AscalonFX, AscalonV],
622+
mx, sew, IsWorstCase>;
623+
}
624+
}
625+
319626
//===----------------------------------------------------------------------===//
320627
// Unsupported extensions
321628
defm : UnsupportedSchedQ;
322-
defm : UnsupportedSchedV;
323629
defm : UnsupportedSchedZabha;
324630
defm : UnsupportedSchedZbc;
325631
defm : UnsupportedSchedZbkb;

0 commit comments

Comments
 (0)