Skip to content

Commit 6fc48de

Browse files
authored
[AArch64] Update zero latency instructions in Neoverse scheduling tables (#165690)
NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE. Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions: MOV Xd|Wd, #0|XZR|WZR For all the above Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine: MOV Wd,Wn MOV Xd,Xn For Neoverse-N3 only, these instructions also have zero latency FMOV Dd, Dn FMOV Sd, Sn MOV Vd, Vn (vector) MOV Zd.D, Zn.D PTRUE PFALSE
1 parent 655662e commit 6fc48de

File tree

11 files changed

+189
-109
lines changed

11 files changed

+189
-109
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
7272
// Define customized scheduler read/write types specific to the Neoverse N2.
7373

7474
//===----------------------------------------------------------------------===//
75+
76+
// Define generic 0 micro-op types
77+
def N2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
78+
7579
// Define generic 1 micro-op types
7680

7781
def N2Write_1c_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
@@ -645,6 +649,21 @@ def N2Write_11c_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
645649
let NumMicroOps = 27;
646650
}
647651

652+
//===----------------------------------------------------------------------===//
653+
// Define predicate-controlled types
654+
655+
def N2Write_0or1c_1I : SchedWriteVariant<[
656+
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
657+
SchedVar<NoSchedPred, [N2Write_1c_1I]>]>;
658+
659+
def N2Write_0or2c_1V : SchedWriteVariant<[
660+
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
661+
SchedVar<NoSchedPred, [N2Write_2c_1V]>]>;
662+
663+
def N2Write_0or3c_1M0 : SchedWriteVariant<[
664+
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
665+
SchedVar<NoSchedPred, [N2Write_3c_1M0]>]>;
666+
648667
//===----------------------------------------------------------------------===//
649668
// Define types for arithmetic and logical ops with short shifts
650669
def N2Write_Arith : SchedWriteVariant<[
@@ -680,6 +699,7 @@ def : InstRW<[N2Write_1c_1B_1S], (instrs BL, BLR)>;
680699
// ALU, basic
681700
// ALU, basic, flagset
682701
def : SchedAlias<WriteI, N2Write_1c_1I>;
702+
def : InstRW<[N2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
683703

684704
// ALU, extend and shift
685705
def : SchedAlias<WriteIEReg, N2Write_2c_1M>;
@@ -691,7 +711,8 @@ def : SchedAlias<WriteISReg, N2Write_Arith>;
691711

692712
// Logical, shift, no flagset
693713
def : InstRW<[N2Write_1c_1I],
694-
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
714+
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
715+
def : InstRW<[N2Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
695716

696717
// Logical, shift, flagset
697718
def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -882,8 +903,7 @@ def : SchedAlias<WriteFImm, N2Write_2c_1V>;
882903
def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
883904

884905
// FP transfer, from gen to low half of vec reg
885-
def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
886-
FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
906+
def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
887907

888908
// FP transfer, from gen to high half of vec reg
889909
def : InstRW<[N2Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1225,6 +1245,8 @@ def : InstRW<[N2Write_3c_1V0], (instrs BFCVT)>;
12251245
// ASIMD unzip/zip
12261246
// Handled by SchedAlias<WriteV[dq], ...>
12271247

1248+
def : InstRW<[N2Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
1249+
12281250
// ASIMD duplicate, gen reg
12291251
def : InstRW<[N2Write_3c_1M0], (instregex "^DUPv.+gpr")>;
12301252

llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
7575

7676
def N3Write_0c : SchedWriteRes<[]> {
7777
let Latency = 0;
78-
let NumMicroOps = 0;
78+
let NumMicroOps = 1;
7979
}
8080

8181
def N3Write_4c : SchedWriteRes<[]> {
@@ -553,6 +553,25 @@ def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
553553
let NumMicroOps = 16;
554554
}
555555

556+
557+
//===----------------------------------------------------------------------===//
558+
// Define predicate-controlled types
559+
560+
def N3Write_0or1c_1I : SchedWriteVariant<[
561+
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
562+
SchedVar<NoSchedPred, [N3Write_1c_1I]>]>;
563+
564+
def N3Write_0or2c_1V : SchedWriteVariant<[
565+
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
566+
SchedVar<NoSchedPred, [N3Write_2c_1V]>]>;
567+
568+
def N3Write_0or2c_1M : SchedWriteVariant<[
569+
SchedVar<NeoverseAllActivePredicate, [N3Write_0c]>,
570+
SchedVar<NoSchedPred, [N3Write_2c_1M]>]>;
571+
572+
def N3Write_0or3c_1M0 : SchedWriteVariant<[
573+
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
574+
SchedVar<NoSchedPred, [N3Write_3c_1M0]>]>;
556575
//===----------------------------------------------------------------------===//
557576
// Define forwarded types
558577
// NOTE: SOG, p. 19, n. 2: Accumulator forwarding is not supported for
@@ -682,6 +701,7 @@ def : InstRW<[N3Write_1c_1B_1S], (instrs BL, BLR)>;
682701
// Conditional compare
683702
// Conditional select
684703
def : SchedAlias<WriteI, N3Write_1c_1I>;
704+
def : InstRW<[N3Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
685705

686706
// ALU, extend and shift
687707
def : SchedAlias<WriteIEReg, N3Write_2c_1M>;
@@ -711,7 +731,8 @@ def : InstRW<[N3Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;
711731

712732
// Logical, shift, no flagset
713733
def : InstRW<[N3Write_1c_1I],
714-
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
734+
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
735+
def : InstRW<[N3Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
715736

716737
// Logical, shift, flagset
717738
def : InstRW<[N3Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -957,10 +978,11 @@ def : SchedAlias<WriteFCvt, N3Write_3c_1V0>;
957978
def : SchedAlias<WriteFImm, N3Write_2c_1V>;
958979

959980
// FP move, register
960-
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
981+
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr)>;
982+
def : InstRW<[N3Write_0c], (instrs FMOVSr, FMOVDr)>;
961983

962984
// FP transfer, from gen to low half of vec reg
963-
def : InstRW<[N3Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
985+
def : InstRW<[N3Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
964986

965987
// FP transfer, from gen to high half of vec reg
966988
def : InstRW<[N3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1064,6 +1086,8 @@ def : InstRW<[WriteAdr, N3Write_2c_1L01_1V_1I], (instregex "^STP[SDQ](post|pre)$
10641086
// ASIMD compare
10651087
// ASIMD logical
10661088
// ASIMD max/min, basic and pair-wise
1089+
def : InstRW<[N3Write_0or2c_1V], (instrs ORRv16i8, ORRv8i8)>;
1090+
10671091
def : SchedAlias<WriteVd, N3Write_2c_1V>;
10681092
def : SchedAlias<WriteVq, N3Write_2c_1V>;
10691093

@@ -1290,6 +1314,7 @@ def : InstRW<[N3Write_3c_1V0], (instrs BFCVT)>;
12901314
// ASIMD transpose
12911315
// ASIMD unzip/zip
12921316
// Covered by WriteV[dq]
1317+
def : InstRW<[N3Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
12931318

12941319
// ASIMD duplicate, gen reg
12951320
def : InstRW<[N3Write_3c_1M0], (instregex "^DUPv.+gpr")>;
@@ -1668,10 +1693,11 @@ def : InstRW<[N3Write_2c_1M], (instregex "^REV_PP_[BHSD]")>;
16681693
def : InstRW<[N3Write_1c_1M], (instrs SEL_PPPP)>;
16691694

16701695
// Predicate set
1671-
def : InstRW<[N3Write_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
1696+
def : InstRW<[N3Write_0c], (instrs PFALSE)>;
1697+
def : InstRW<[N3Write_0or2c_1M], (instregex "^PTRUE_[BHSD]")>;
16721698

16731699
// Predicate set/initialize, set flags
1674-
def : InstRW<[N3Write_2c_1M], (instregex "^PTRUES_[BHSD]")>;
1700+
def : InstRW<[N3Write_0or2c_1M], (instregex "^PTRUES_[BHSD]")>;
16751701

16761702
// Predicate find first/next
16771703
def : InstRW<[N3Write_2c_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
@@ -1897,10 +1923,11 @@ def : InstRW<[N3Write_5c_1M0_1V], (instregex "^INDEX_(IR|RI|RR)_D$")>;
18971923
// Logical
18981924
def : InstRW<[N3Write_2c_1V],
18991925
(instregex "^(AND|EOR|ORR)_ZI",
1900-
"^(AND|BIC|EOR|ORR)_ZZZ",
1926+
"^(AND|BIC|EOR)_ZZZ",
19011927
"^EOR(BT|TB)_ZZZ_[BHSD]",
19021928
"^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
19031929
"^NOT_ZPmZ_[BHSD]")>;
1930+
def : InstRW<[N3Write_0or2c_1V], (instrs ORR_ZZZ)>;
19041931

19051932
// Max/min, basic and pairwise
19061933
def : InstRW<[N3Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",

llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
9494
let Latency = 0, NumMicroOps = 0 in
9595
def V1Write_0c_0Z : SchedWriteRes<[]>;
9696

97+
def V1Write_0c : SchedWriteRes<[]> { let Latency = 0; }
9798

9899
//===----------------------------------------------------------------------===//
99100
// Define generic 1 micro-op types
@@ -472,6 +473,17 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
472473
V1UnitV, V1UnitV, V1UnitV,
473474
V1UnitV, V1UnitV, V1UnitV]>;
474475

476+
//===----------------------------------------------------------------------===//
477+
// Define predicate-controlled types
478+
479+
def V1Write_0or1c_1I : SchedWriteVariant<[
480+
SchedVar<NeoverseZeroMove, [V1Write_0c]>,
481+
SchedVar<NoSchedPred, [V1Write_1c_1I]>]>;
482+
483+
def V1Write_0or3c_1M0 : SchedWriteVariant<[
484+
SchedVar<NeoverseZeroMove, [V1Write_0c]>,
485+
SchedVar<NoSchedPred, [V1Write_3c_1M0]>]>;
486+
475487
//===----------------------------------------------------------------------===//
476488
// Define forwarded types
477489

@@ -603,6 +615,7 @@ def : InstRW<[V1Write_1c_1I_1Flg],
603615
"^(ADC|SBC)S[WX]r$",
604616
"^ANDS[WX]ri$",
605617
"^(AND|BIC)S[WX]rr$")>;
618+
def : InstRW<[V1Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
606619

607620
// ALU, extend and shift
608621
def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
@@ -623,7 +636,8 @@ def : InstRW<[V1WriteISRegS],
623636
(instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
624637

625638
// Logical, shift, no flagset
626-
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
639+
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
640+
def : InstRW<[V1Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
627641

628642
// Logical, shift, flagset
629643
def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;

llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,23 @@ def NeoverseZeroMove : MCSchedPredicate<
8080
// MOVI Dd, #0
8181
// MOVI Vd.2D, #0
8282
CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
83-
CheckImmOperand<1, 0>]>
83+
CheckImmOperand<1, 0>]>,
84+
// MOV Zd, Zn
85+
CheckAll<[CheckOpcode<[ORR_ZZZ]>,
86+
CheckSameRegOperand<1, 2>]>,
87+
// MOV Vd, Vn
88+
CheckAll<[CheckOpcode<[ORRv16i8, ORRv8i8]>,
89+
CheckSameRegOperand<1, 2>]>,
8490
]>>;
91+
92+
def NeoverseAllActivePredicate : MCSchedPredicate<
93+
CheckAny<[
94+
// PTRUE Pd, ALL
95+
// PTRUES Pd, ALL
96+
CheckAll<[
97+
CheckOpcode<[
98+
PTRUE_B, PTRUE_H, PTRUE_S, PTRUE_D,
99+
PTRUES_B, PTRUES_H, PTRUES_S, PTRUES_D]>,
100+
CheckIsImmOperand<1>,
101+
CheckImmOperand<1, 31>]>,
102+
]>>;

llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2086,9 +2086,9 @@ drps
20862086
# CHECK-NEXT: 1 3 0.50 fcvtas x27, d28
20872087
# CHECK-NEXT: 1 3 0.50 fcvtau w29, d30
20882088
# CHECK-NEXT: 1 3 0.50 fcvtau xzr, d0
2089-
# CHECK-NEXT: 1 3 3.00 fmov w3, s9
2089+
# CHECK-NEXT: 1 2 0.50 fmov w3, s9
20902090
# CHECK-NEXT: 1 3 3.00 fmov s9, w3
2091-
# CHECK-NEXT: 1 3 3.00 fmov x20, d31
2091+
# CHECK-NEXT: 1 2 0.50 fmov x20, d31
20922092
# CHECK-NEXT: 1 3 3.00 fmov d1, x15
20932093
# CHECK-NEXT: 1 2 0.50 fmov x3, v12.d[1]
20942094
# CHECK-NEXT: 2 5 1.00 fmov v1.d[1], x19
@@ -2508,14 +2508,14 @@ drps
25082508
# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
25092509
# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
25102510
# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
2511-
# CHECK-NEXT: 1 1 0.25 mov x3, x6
2512-
# CHECK-NEXT: 1 1 0.25 mov x3, xzr
2513-
# CHECK-NEXT: 1 1 0.25 mov wzr, w2
2514-
# CHECK-NEXT: 1 1 0.25 mov w3, w5
2511+
# CHECK-NEXT: 1 0 0.20 mov x3, x6
2512+
# CHECK-NEXT: 1 0 0.20 mov x3, xzr
2513+
# CHECK-NEXT: 1 0 0.20 mov wzr, w2
2514+
# CHECK-NEXT: 1 0 0.20 mov w3, w5
25152515
# CHECK-NEXT: 1 1 0.25 movz w2, #0, lsl #16
25162516
# CHECK-NEXT: 1 1 0.25 mov w2, #-1235
25172517
# CHECK-NEXT: 1 1 0.25 mov x2, #5299989643264
2518-
# CHECK-NEXT: 1 1 0.25 mov x2, #0
2518+
# CHECK-NEXT: 1 0 0.20 mov x2, #0
25192519
# CHECK-NEXT: 1 1 0.25 movk w3, #0
25202520
# CHECK-NEXT: 1 1 0.25 movz x4, #0, lsl #16
25212521
# CHECK-NEXT: 1 1 0.25 movk w5, #0, lsl #16
@@ -2557,7 +2557,7 @@ drps
25572557

25582558
# CHECK: Resource pressure per iteration:
25592559
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
2560-
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50
2560+
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 509.75 249.75 161.25 161.25 216.50 86.50
25612561

25622562
# CHECK: Resource pressure by instruction:
25632563
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -3270,9 +3270,9 @@ drps
32703270
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtas x27, d28
32713271
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtau w29, d30
32723272
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtau xzr, d0
3273-
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov w3, s9
3273+
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov w3, s9
32743274
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov s9, w3
3275-
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov x20, d31
3275+
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x20, d31
32763276
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov d1, x15
32773277
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x3, v12.d[1]
32783278
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 fmov v1.d[1], x19
@@ -3692,14 +3692,14 @@ drps
36923692
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - bics x3, xzr, x3, lsl #1
36933693
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst w3, w7, lsl #31
36943694
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst x2, x20, asr #2
3695-
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, x6
3696-
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, xzr
3697-
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov wzr, w2
3698-
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w3, w5
3695+
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, x6
3696+
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, xzr
3697+
# CHECK-NEXT: - - - - - - - - - - - - - mov wzr, w2
3698+
# CHECK-NEXT: - - - - - - - - - - - - - mov w3, w5
36993699
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz w2, #0, lsl #16
37003700
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w2, #-1235
37013701
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #5299989643264
3702-
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #0
3702+
# CHECK-NEXT: - - - - - - - - - - - - - mov x2, #0
37033703
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w3, #0
37043704
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz x4, #0, lsl #16
37053705
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w5, #0, lsl #16

0 commit comments

Comments
 (0)