@@ -418,21 +418,28 @@ foreach LMul = [1, 2, 4, 8] in {
418418foreach mx = SchedMxList in {
419419 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
420420
421- let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4 ] in {
421+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c ] in {
422422 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
423423 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
424424 }
425425
426+ // Latency of vadd, vsub, vrsub: 4/4/5/8
427+ // ReleaseAtCycles of vadd, vsub, vrsub: 1/2/4/8
428+ // Latency of vand, vor, vxor: 4/4/8/16
429+ // ReleaseAtCycles of vand, vor, vxor: 2/4/8/16
430+ // They are grouped together, so we used the worst case 4/4/8/16 and 2/4/8/16
431+ // TODO: use InstRW to override individual instructions' scheduling data
426432 defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
427- let Latency = VIALULat, ReleaseAtCycles = [4] in {
428- // Pattern of vadd, vsub, vrsub: 4/4/5/8
429- // Pattern of vand, vor, vxor: 4/4/8/16
430- // They are grouped together, so we used the worst case 4/4/8/16
431- // TODO: use InstRW to override individual instructions' scheduling data
433+ defvar VIALUOcc = ConstOneUntilMF2ThenDouble<mx>.c;
434+ let Latency = VIALULat, ReleaseAtCycles = [VIALUOcc] in {
432435 defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
433436 defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
434437 defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
438+ }
435439
440+ defvar VILogicalLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
441+ defvar VILogicalOcc = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
442+ let Latency = VILogicalLat, ReleaseAtCycles = [VILogicalOcc] in {
436443 defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
437444 defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
438445 defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
@@ -449,7 +456,9 @@ foreach mx = SchedMxList in {
449456 defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
450457 }
451458
452- let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
459+ // Slightly increase Occ when LMUL == M8
460+ defvar VICmpCarryOcc = GetLMULValue<[1, 1, 1, 2, 4, 8, 18], mx>.c;
461+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [VICmpCarryOcc] in {
453462 defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
454463 defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
455464 defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -458,10 +467,14 @@ foreach mx = SchedMxList in {
458467 defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
459468 }
460469
461- // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
470+ // Latency of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
462471 // e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
463472 // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
464- let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
473+ defvar VIMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c;
474+ // ReleaseAtCycles for vnmsac/vnmsub is 1/1/1/1/2/5 but we use the worse case
475+ // here since they are grouped together with vmacc/vmadd/vmul/vmulh.
476+ defvar VIMulOcc = ConstOneUntilM1ThenDouble<mx>.c;
477+ let Latency = VIMulLat, ReleaseAtCycles = [VIMulOcc] in {
465478 defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
466479 defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
467480 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -475,7 +488,8 @@ foreach mx = SchedMxList in {
475488foreach mx = SchedMxListW in {
476489 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
477490
478- let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [4] in {
491+ defvar VIWideningOcc = ConstOneUntilMF2ThenDouble<mx>.c;
492+ let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [VIWideningOcc] in {
479493 defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
480494 defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
481495 defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -497,8 +511,9 @@ foreach mx = SchedMxList in {
497511 foreach sew = SchedSEWSet<mx>.val in {
498512 defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
499513
500- defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
501- let Latency = VIDivLat, ReleaseAtCycles = [12] in {
514+ // Not pipelined
515+ defvar VIDivLatAndOcc = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
516+ let Latency = VIDivLatAndOcc, ReleaseAtCycles = [VIDivLatAndOcc] in {
502517 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
503518 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
504519 }
@@ -510,7 +525,8 @@ foreach mx = SchedMxListW in {
510525 defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
511526
512527 defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
513- let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
528+ defvar VNarrowingOcc = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
529+ let Latency = VNarrowingLat, ReleaseAtCycles = [VNarrowingOcc] in {
514530 defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
515531 defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
516532 defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
0 commit comments