88
99//===----------------------------------------------------------------------===//
1010
11+ class AscalonIsWorstCaseMX<string mx, list<string> MxList> {
12+ defvar LLMUL = LargestLMUL<MxList>.r;
13+ bit c = !eq(mx, LLMUL);
14+ }
15+
16+ class AscalonIsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
17+ bit isF = 0> {
18+ defvar LLMUL = LargestLMUL<MxList>.r;
19+ defvar SSEW = SmallestSEW<mx, isF>.r;
20+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
21+ }
22+
23+ /// Cycle counts that scale with LMUL with LMUL=1 having the same latency as
24+ /// fractional LMULs
25+ class AscalonGetCyclesLMUL<string mx, int base> {
26+ int c = !cond(
27+ !eq(mx, "M1") : base,
28+ !eq(mx, "M2") : !mul(base, 2),
29+ !eq(mx, "M4") : !mul(base, 4),
30+ !eq(mx, "M8") : !mul(base, 8),
31+ !eq(mx, "MF2") : base,
32+ !eq(mx, "MF4") : base,
33+ !eq(mx, "MF8") : base
34+ );
35+ }
36+
37+ /// Linear LMUL scaling starting from smallest fractional LMUL
38+ class AscalonGetCyclesLMULFractional<string mx, int base> {
39+ int c = !cond(
40+ !eq(mx, "MF8") : base,
41+ !eq(mx, "MF4") : !mul(base, 2),
42+ !eq(mx, "MF2") : !mul(base, 4),
43+ !eq(mx, "M1") : !mul(base, 8),
44+ !eq(mx, "M2") : !mul(base, 16),
45+ !eq(mx, "M4") : !mul(base, 32),
46+ !eq(mx, "M8") : !mul(base, 64)
47+ );
48+ }
49+
50+ class AscalonGetCyclesDefault<string mx> {
51+ int c = AscalonGetCyclesLMUL<mx, 1>.c;
52+ }
53+
54+ class AscalonGetCyclesNarrowing<string mx> {
55+ int c = !cond(
56+ !eq(mx, "M1") : 4,
57+ !eq(mx, "M2") : 8,
58+ !eq(mx, "M4") : 16,
59+ !eq(mx, "MF2") : 2,
60+ !eq(mx, "MF4") : 1,
61+ !eq(mx, "MF8") : 1
62+ );
63+ }
64+
65+
66+ class AscalonGetCyclesDivOrSqrt<string mx, int sew> {
67+ int c = !cond(
68+ !eq(sew, 8) : AscalonGetCyclesLMUL<mx, 7>.c, // TODO not valid for fp
69+ !eq(sew, 16) : AscalonGetCyclesLMUL<mx, 6>.c,
70+ !eq(sew, 32) : AscalonGetCyclesLMUL<mx, 5>.c,
71+ !eq(sew, 64) : AscalonGetCyclesLMUL<mx, 8>.c
72+ );
73+ }
74+
75+ //===----------------------------------------------------------------------===//
76+
1177def TTAscalonD8Model : SchedMachineModel {
1278 let IssueWidth = 8; // 8-way decode and dispatch
1379 let MicroOpBufferSize = 256; // 256 micro-op re-order buffer
1480 let LoadLatency = 4; // Optimistic load latency
1581 let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
1682
17- let CompleteModel = 0 ;
83+ let CompleteModel = false ;
1884
1985 // TODO: supported, but haven't added scheduling info yet.
2086 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
2187 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
2288 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
23- HasStdExtZkr, HasVInstructions, HasVInstructionsI64 ];
89+ HasStdExtZkr];
2490}
2591
2692let SchedModel = TTAscalonD8Model in {
@@ -34,11 +100,17 @@ let BufferSize = 16 in {
34100 def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC
35101 def AscalonFXC : ProcResource<2>; // ALU, BR
36102 def AscalonFXD : ProcResource<2>; // ALU
37- def AscalonFP : ProcResource<2>;
38- // TODO: two vector units with vector scheduling model.
103+ def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
104+ // FP
105+ def AscalonFPA : ProcResource<1>; // Pipe A aslo handles FP/VEC -> INT
106+ def AscalonFPB : ProcResource<1>;
107+ def AscalonFP : ProcResGroup<[AscalonFPA, AscalonFPB]>;
108+ // Vector
109+ def AscalonVA : ProcResource<1>;
110+ def AscalonVB : ProcResource<1>;
111+ def AscalonV : ProcResGroup<[AscalonFPA, AscalonFPB]>;
39112}
40113
41- def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
42114
43115//===----------------------------------------------------------------------===//
44116
@@ -316,10 +388,244 @@ def : ReadAdvance<ReadSHXADD32, 0>;
316388def : ReadAdvance<ReadSingleBit, 0>;
317389def : ReadAdvance<ReadSingleBitImm, 0>;
318390
391+ //===----------------------------------------------------------------------===//
392+ // Vector
393+
394+ // Configuration-Setting Instructions
395+ let Latency = 1 in {
396+ def : WriteRes<WriteVSETVLI, [AscalonV]>;
397+ def : WriteRes<WriteVSETIVLI, [AscalonV]>;
398+ }
399+ let Latency = 2 in {
400+ def : WriteRes<WriteVSETVL, [AscalonV]>;
401+ }
402+
403+ // Vector Integer Arithmetic Instructions
404+ foreach mx = SchedMxList in {
405+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
406+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
407+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
408+ defm "" : LMULWriteResMX<"WriteVIALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
409+ defm "" : LMULWriteResMX<"WriteVIALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
410+ defm "" : LMULWriteResMX<"WriteVIALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
411+ defm "" : LMULWriteResMX<"WriteVICALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
412+ defm "" : LMULWriteResMX<"WriteVICALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
413+ defm "" : LMULWriteResMX<"WriteVICALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
414+ defm "" : LMULWriteResMX<"WriteVICALUMV", [AscalonFX, AscalonV], mx, IsWorstCase>;
415+ defm "" : LMULWriteResMX<"WriteVICALUMX", [AscalonFX, AscalonV], mx, IsWorstCase>;
416+ defm "" : LMULWriteResMX<"WriteVICALUMI", [AscalonFX, AscalonV], mx, IsWorstCase>;
417+ defm "" : LMULWriteResMX<"WriteVShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
418+ defm "" : LMULWriteResMX<"WriteVShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
419+ defm "" : LMULWriteResMX<"WriteVShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
420+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [AscalonFX, AscalonV], mx, IsWorstCase>;
421+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [AscalonFX, AscalonV], mx, IsWorstCase>;
422+ defm "" : LMULWriteResMX<"WriteVIMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
423+ defm "" : LMULWriteResMX<"WriteVIMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
424+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
425+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
426+ defm "" : LMULWriteResMX<"WriteVIMergeV", [AscalonFX, AscalonV], mx, IsWorstCase>;
427+ defm "" : LMULWriteResMX<"WriteVIMergeX", [AscalonFX, AscalonV], mx, IsWorstCase>;
428+ defm "" : LMULWriteResMX<"WriteVIMergeI", [AscalonFX, AscalonV], mx, IsWorstCase>;
429+ defm "" : LMULWriteResMX<"WriteVIMovV", [AscalonFX, AscalonV], mx, IsWorstCase>;
430+ defm "" : LMULWriteResMX<"WriteVIMovX", [AscalonFX, AscalonV], mx, IsWorstCase>;
431+ defm "" : LMULWriteResMX<"WriteVIMovI", [AscalonFX, AscalonV], mx, IsWorstCase>;
432+ defm "" : LMULWriteResMX<"WriteVICmpV", [AscalonFX, AscalonV], mx, IsWorstCase>;
433+ defm "" : LMULWriteResMX<"WriteVICmpX", [AscalonFX, AscalonV], mx, IsWorstCase>;
434+ defm "" : LMULWriteResMX<"WriteVICmpI", [AscalonFX, AscalonV], mx, IsWorstCase>;
435+ }
436+ }
437+ foreach mx = SchedMxList in {
438+ foreach sew = SchedSEWSet<mx>.val in {
439+ defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
440+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
441+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
442+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
443+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
444+ }
445+ }
446+ }
447+
448+ // Widening
449+ foreach mx = SchedMxListW in {
450+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
451+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
452+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
453+ defm "" : LMULWriteResMX<"WriteVIWALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
454+ defm "" : LMULWriteResMX<"WriteVIWALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
455+ defm "" : LMULWriteResMX<"WriteVIWALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
456+ defm "" : LMULWriteResMX<"WriteVIWMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
457+ defm "" : LMULWriteResMX<"WriteVIWMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
458+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
459+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
460+ }
461+ }
462+ // Narrowing
463+ foreach mx = SchedMxListW in {
464+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
465+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
466+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
467+ defm "" : LMULWriteResMX<"WriteVNShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
468+ defm "" : LMULWriteResMX<"WriteVNShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
469+ defm "" : LMULWriteResMX<"WriteVNShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
470+ }
471+ }
472+
473+ // Vector Floating-Point Instructions
474+ foreach mx = SchedMxListF in {
475+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
476+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
477+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
478+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
479+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
480+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
481+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
482+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
483+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
484+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
485+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
486+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
487+ }
488+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
489+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
490+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
491+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
492+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
493+ }
494+ }
495+ }
496+ foreach mx = SchedMxList in {
497+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
498+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
499+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
500+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
501+ }
502+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
503+ defm "" : LMULWriteResMX<"WriteVFClassV", [AscalonFP, AscalonV], mx, IsWorstCase>;
504+ defm "" : LMULWriteResMX<"WriteVFMergeV", [AscalonFP, AscalonV], mx, IsWorstCase>;
505+ defm "" : LMULWriteResMX<"WriteVFMovV", [AscalonFP, AscalonV], mx, IsWorstCase>;
506+ }
507+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
508+ defm "" : LMULWriteResMX<"WriteVFCmpV", [AscalonFP, AscalonV], mx, IsWorstCase>;
509+ defm "" : LMULWriteResMX<"WriteVFCmpF", [AscalonFP, AscalonV], mx, IsWorstCase>;
510+ }
511+ }
512+ foreach mx = SchedMxListF in {
513+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
514+ defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
515+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
516+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
517+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
518+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
519+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
520+ }
521+ }
522+ }
523+
524+ // Widening
525+ foreach mx = SchedMxListW in {
526+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
527+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
528+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
529+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
530+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
531+ }
532+ }
533+ foreach mx = SchedMxListFW in {
534+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
535+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
536+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
537+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
538+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
539+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
540+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
541+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
542+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
543+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
544+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
545+ }
546+ }
547+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
548+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListFW>.c;
549+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
550+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
551+ }
552+ // Narrowing
553+ foreach mx = SchedMxListW in {
554+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
555+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
556+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
557+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
558+ }
559+ }
560+ foreach mx = SchedMxListFW in {
561+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
562+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
563+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
564+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
565+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
566+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
567+ }
568+ }
569+ }
570+
571+ // Vector Reduction Instructions
572+ foreach mx = SchedMxList in {
573+ foreach sew = SchedSEWSet<mx>.val in {
574+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
575+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
576+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in {
577+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [AscalonFX, AscalonV],
578+ mx, sew, IsWorstCase>;
579+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [AscalonFX, AscalonV],
580+ mx, sew, IsWorstCase>;
581+ }
582+ }
583+ }
584+
585+ foreach mx = SchedMxListWRed in {
586+ foreach sew = SchedSEWSet<mx, 0, 1>.val in {
587+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
588+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
589+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, Cycles] in
590+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [AscalonFX, AscalonV],
591+ mx, sew, IsWorstCase>;
592+ }
593+ }
594+
595+ foreach mx = SchedMxListF in {
596+ foreach sew = SchedSEWSet<mx, 1>.val in {
597+ defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
598+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
599+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, RedCycles] in {
600+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [AscalonFX, AscalonV],
601+ mx, sew, IsWorstCase>;
602+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [AscalonFX, AscalonV],
603+ mx, sew, IsWorstCase>;
604+ }
605+ defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
606+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, OrdRedCycles] in
607+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [AscalonFX, AscalonV],
608+ mx, sew, IsWorstCase>;
609+ }
610+ }
611+
612+ foreach mx = SchedMxListFWRed in {
613+ foreach sew = SchedSEWSet<mx, 1, 1>.val in {
614+ defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
615+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
616+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, RedCycles] in
617+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [AscalonFX, AscalonV],
618+ mx, sew, IsWorstCase>;
619+ defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
620+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, OrdRedCycles] in
621+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [AscalonFX, AscalonV],
622+ mx, sew, IsWorstCase>;
623+ }
624+ }
625+
319626//===----------------------------------------------------------------------===//
320627// Unsupported extensions
321628defm : UnsupportedSchedQ;
322- defm : UnsupportedSchedV;
323629defm : UnsupportedSchedZabha;
324630defm : UnsupportedSchedZbc;
325631defm : UnsupportedSchedZbkb;
0 commit comments