@@ -125,6 +125,33 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0
125125defvar SMX60VLEN = 256;
126126defvar SMX60DLEN = !div(SMX60VLEN, 2);
127127
128+ class SMX60GetLMulCycles<string mx> {
129+ int c = !cond(
130+ !eq(mx, "M1") : 1,
131+ !eq(mx, "M2") : 2,
132+ !eq(mx, "M4") : 4,
133+ !eq(mx, "M8") : 8,
134+ !eq(mx, "MF2") : 1,
135+ !eq(mx, "MF4") : 1,
136+ !eq(mx, "MF8") : 1
137+ );
138+ }
139+
140+ class SMX60GetVLMAX<string mx, int sew> {
141+ defvar LMUL = SMX60GetLMulCycles<mx>.c;
142+ int val = !cond(
143+ !eq(mx, "MF2") : !div(!div(SMX60VLEN, 2), sew),
144+ !eq(mx, "MF4") : !div(!div(SMX60VLEN, 4), sew),
145+ !eq(mx, "MF8") : !div(!div(SMX60VLEN, 8), sew),
146+ true: !div(!mul(SMX60VLEN, LMUL), sew)
147+ );
148+ }
149+
150+ // Latency for segmented loads and stores are calculated as vl * nf.
151+ class SMX60SegmentedLdStCycles<string mx, int sew, int nf> {
152+ int c = !mul(SMX60GetVLMAX<mx, sew>.val, nf);
153+ }
154+
128155def SpacemitX60Model : SchedMachineModel {
129156 let IssueWidth = 2; // dual-issue
130157 let MicroOpBufferSize = 0; // in-order
@@ -367,23 +394,43 @@ foreach mx = SchedMxList in {
367394 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
368395
369396 // Unit-stride loads and stores
370- defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
371- defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
372- defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
397+ defvar VLDELatAndOcc = ConstValueUntilLMULThenDoubleBase<"M2", 3, 4, mx>.c;
398+ let Latency = VLDELatAndOcc, ReleaseAtCycles = [VLDELatAndOcc] in {
399+ defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
400+ }
401+ defvar VSTELatAndOcc = GetLMULValue<[2, 2, 2, 3, 4, 8, 19], mx>.c;
402+ let Latency = VSTELatAndOcc, ReleaseAtCycles = [VSTELatAndOcc] in {
403+ defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
404+ }
405+ defvar VLDFFLatAndOcc = GetLMULValue<[4, 4, 4, 5, 7, 11, 19], mx>.c;
406+ let Latency = VLDFFLatAndOcc, ReleaseAtCycles = [VLDFFLatAndOcc] in {
407+ defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
408+ }
373409
374410 // Mask loads and stores
375- defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
376- defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
411+ let ReleaseAtCycles = [2] in {
412+ defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase>;
413+ }
414+ let Latency = 2, ReleaseAtCycles = [2] in {
415+ defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase>;
416+ }
377417
378418 // Strided and indexed loads and stores
379419 foreach eew = [8, 16, 32, 64] in {
380- defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
381- defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
382- defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
420+ defvar StridedLdStLatAndOcc = SMX60GetVLMAX<mx, eew>.val;
421+ let Latency = StridedLdStLatAndOcc, ReleaseAtCycles = [StridedLdStLatAndOcc] in {
422+ defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
423+ defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
424+ }
425+
426+ defvar IndexedLdStLatAndOcc = !div(SMX60GetVLMAX<mx, eew>.val, 2);
427+ let Latency = IndexedLdStLatAndOcc, ReleaseAtCycles = [IndexedLdStLatAndOcc] in {
428+ defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
429+ defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
383430
384- defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
385- defm "" : LMULWriteResMX<"WriteVSTUX " # eew, [SMX60_VLS], mx, IsWorstCase>;
386- defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
431+ defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
432+ defm "" : LMULWriteResMX<"WriteVSTOX " # eew, [SMX60_VLS], mx, IsWorstCase>;
433+ }
387434 }
388435}
389436
@@ -393,30 +440,39 @@ foreach mx = SchedMxList in {
393440 foreach eew = [8, 16, 32, 64] in {
394441 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
395442
396- // Unit-stride segmented
397- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
398- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
399- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
400-
401- // Strided/indexed segmented
402- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
403- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
404-
405- // Indexed segmented
406- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
407- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
408- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
409- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
443+ defvar SegmentedLdStLatAndOcc = SMX60SegmentedLdStCycles<mx, eew, nf>.c;
444+ let Latency = SegmentedLdStLatAndOcc, ReleaseAtCycles = [SegmentedLdStLatAndOcc] in {
445+ // Unit-stride segmented
446+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
447+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
448+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
449+
450+ // Strided/indexed segmented
451+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
452+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
453+
454+ // Indexed segmented
455+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
456+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
457+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
458+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
459+ }
410460 }
411461 }
412462}
413463
414464// Whole register move/load/store
415465foreach LMul = [1, 2, 4, 8] in {
416- def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
417- def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
466+ defvar WholeRegLdStLatAndOcc = !if(!eq(LMul, 1), 3, !mul(LMul, 2));
467+ let Latency = WholeRegLdStLatAndOcc, ReleaseAtCycles = [WholeRegLdStLatAndOcc] in {
468+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
469+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
470+ }
418471
419- def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
472+ defvar VMovLatAndOcc = !if(!eq(LMul, 1), 4, !mul(LMul, 2));
473+ let Latency = VMovLatAndOcc, ReleaseAtCycles = [VMovLatAndOcc] in {
474+ def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
475+ }
420476}
421477
422478// 11. Vector Integer Arithmetic Instructions
0 commit comments