@@ -426,31 +426,29 @@ def : WriteRes<WriteVSETVL, [AscalonV]>;
426426foreach mx = SchedMxList in {
427427 defvar Cycles = AscalonGetCyclesDefault<mx>.c;
428428 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
429- let Latency = Cycles, ReleaseAtCycles = [Cycles] in {
429+ let Latency = Cycles in {
430430 defm "" : LMULWriteResMX<"WriteVLDE", [AscalonLS], mx, IsWorstCase>;
431431 defm "" : LMULWriteResMX<"WriteVLDFF", [AscalonLS], mx, IsWorstCase>;
432432 }
433- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in
433+ let Latency = 1 in
434434 defm "" : LMULWriteResMX<"WriteVSTE", [AscalonLS], mx, IsWorstCase>;
435435}
436436
437437foreach mx = SchedMxList in {
438438 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
439- let Latency = 1, ReleaseAtCycles = [1] in
439+ let Latency = 1 in
440440 defm "" : LMULWriteResMX<"WriteVLDM", [AscalonLS], mx, IsWorstCase>;
441- let Latency = 1, ReleaseAtCycles = [1] in
441+ let Latency = 1 in
442442 defm "" : LMULWriteResMX<"WriteVSTM", [AscalonLS], mx, IsWorstCase>;
443443}
444444
445445foreach mx = SchedMxList in {
446446 defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
447447 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
448- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
448+ let Latency = Cycles in {
449449 defm "" : LMULWriteResMX<"WriteVLDS8", [AscalonLS], mx, IsWorstCase>;
450450 defm "" : LMULWriteResMX<"WriteVLDUX8", [AscalonLS], mx, IsWorstCase>;
451451 defm "" : LMULWriteResMX<"WriteVLDOX8", [AscalonLS], mx, IsWorstCase>;
452- }
453- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
454452 defm "" : LMULWriteResMX<"WriteVSTS8", [AscalonLS], mx, IsWorstCase>;
455453 defm "" : LMULWriteResMX<"WriteVSTUX8", [AscalonLS], mx, IsWorstCase>;
456454 defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
@@ -459,12 +457,10 @@ foreach mx = SchedMxList in {
459457foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
460458 defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
461459 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
462- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
460+ let Latency = Cycles in {
463461 defm "" : LMULWriteResMX<"WriteVLDS16", [AscalonLS], mx, IsWorstCase>;
464462 defm "" : LMULWriteResMX<"WriteVLDUX16", [AscalonLS], mx, IsWorstCase>;
465463 defm "" : LMULWriteResMX<"WriteVLDOX16", [AscalonLS], mx, IsWorstCase>;
466- }
467- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
468464 defm "" : LMULWriteResMX<"WriteVSTS16", [AscalonLS], mx, IsWorstCase>;
469465 defm "" : LMULWriteResMX<"WriteVSTUX16", [AscalonLS], mx, IsWorstCase>;
470466 defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
@@ -473,12 +469,10 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
473469foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
474470 defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
475471 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
476- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
472+ let Latency = Cycles in {
477473 defm "" : LMULWriteResMX<"WriteVLDS32", [AscalonLS], mx, IsWorstCase>;
478474 defm "" : LMULWriteResMX<"WriteVLDUX32", [AscalonLS], mx, IsWorstCase>;
479475 defm "" : LMULWriteResMX<"WriteVLDOX32", [AscalonLS], mx, IsWorstCase>;
480- }
481- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
482476 defm "" : LMULWriteResMX<"WriteVSTS32", [AscalonLS], mx, IsWorstCase>;
483477 defm "" : LMULWriteResMX<"WriteVSTUX32", [AscalonLS], mx, IsWorstCase>;
484478 defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
@@ -487,12 +481,10 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
487481foreach mx = ["M1", "M2", "M4", "M8"] in {
488482 defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
489483 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
490- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
484+ let Latency = Cycles in {
491485 defm "" : LMULWriteResMX<"WriteVLDS64", [AscalonLS], mx, IsWorstCase>;
492486 defm "" : LMULWriteResMX<"WriteVLDUX64", [AscalonLS], mx, IsWorstCase>;
493487 defm "" : LMULWriteResMX<"WriteVLDOX64", [AscalonLS], mx, IsWorstCase>;
494- }
495- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
496488 defm "" : LMULWriteResMX<"WriteVSTS64", [AscalonLS], mx, IsWorstCase>;
497489 defm "" : LMULWriteResMX<"WriteVSTUX64", [AscalonLS], mx, IsWorstCase>;
498490 defm "" : LMULWriteResMX<"WriteVSTOX64", [AscalonLS], mx, IsWorstCase>;
@@ -518,8 +510,7 @@ foreach mx = SchedMxList in {
518510 foreach nf=2-8 in {
519511 defvar Cycles = AscalonGetCyclesDefault<mx>.c;
520512 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
521- // Does not chain so set latency high
522- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
513+ let Latency = Cycles in {
523514 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
524515 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
525516 }
@@ -533,13 +524,10 @@ foreach mx = SchedMxList in {
533524 foreach eew = [8, 16, 32, 64] in {
534525 defvar Cycles = AscalonGetCyclesStridedSegmented<mx, eew>.c;
535526 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
536- // Does not chain so set latency high
537- let Latency = !add(3, Cycles), AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
527+ let Latency = Cycles in {
538528 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
539529 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
540530 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
541- }
542- let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in {
543531 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
544532 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
545533 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
@@ -553,25 +541,23 @@ foreach mx = SchedMxList in {
553541 defvar Cycles = AscalonGetCyclesDefault<mx>.c;
554542 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
555543 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
556- // FIXME what pipe does this occupy aside from vec
557544 defm "" : LMULWriteResMX<"WriteVSALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
558545 defm "" : LMULWriteResMX<"WriteVSALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
559546 defm "" : LMULWriteResMX<"WriteVSALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
560547 defm "" : LMULWriteResMX<"WriteVAALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
561548 defm "" : LMULWriteResMX<"WriteVAALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
562549 defm "" : LMULWriteResMX<"WriteVSMulV", [AscalonFXA, AscalonV], mx, IsWorstCase>;
563550 defm "" : LMULWriteResMX<"WriteVSMulX", [AscalonFXA, AscalonV], mx, IsWorstCase>;
564- defm "" : LMULWriteResMX<"WriteVSShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>; // TODO correct?
565- defm "" : LMULWriteResMX<"WriteVSShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>; // TODO correct?
566- defm "" : LMULWriteResMX<"WriteVSShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>; // TODO correct?
551+ defm "" : LMULWriteResMX<"WriteVSShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
552+ defm "" : LMULWriteResMX<"WriteVSShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
553+ defm "" : LMULWriteResMX<"WriteVSShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
567554 }
568555}
569556// Narrowing
570557foreach mx = SchedMxListW in {
571558 defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
572559 defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
573560 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
574- // TODO verify
575561 defm "" : LMULWriteResMX<"WriteVNClipV", [AscalonFX, AscalonV], mx, IsWorstCase>;
576562 defm "" : LMULWriteResMX<"WriteVNClipX", [AscalonFX, AscalonV], mx, IsWorstCase>;
577563 defm "" : LMULWriteResMX<"WriteVNClipI", [AscalonFX, AscalonV], mx, IsWorstCase>;
0 commit comments