Skip to content

Commit 8e36b5f

Browse files
committed
[AArch64] Initial sched model for Neoverse V3, V3AE
Add the scheduling models for Neoverse V3 and Neoverse V3AE based on information taken from the V3 Software Optimization guide: https://developer.arm.com/documentation/109678/300/?lang=en and on information taken from the V3AE Software Optimization guide: https://developer.arm.com/documentation/109703/300/?lang=en Implements #134977 Change-Id: Ib43fb98dda9e8a7cdf8c8910df63c89936701ff5
1 parent 70e577e commit 8e36b5f

17 files changed

+13190
-13309
lines changed

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,11 +1272,11 @@ def : ProcessorModel<"cortex-x2", NeoverseV2Model, ProcessorFeatures.X2,
12721272
[TuneX2]>;
12731273
def : ProcessorModel<"cortex-x3", NeoverseV2Model, ProcessorFeatures.X3,
12741274
[TuneX3]>;
1275-
def : ProcessorModel<"cortex-x4", NeoverseV2Model, ProcessorFeatures.X4,
1275+
def : ProcessorModel<"cortex-x4", NeoverseV3Model, ProcessorFeatures.X4,
12761276
[TuneX4]>;
1277-
def : ProcessorModel<"cortex-x925", NeoverseV2Model, ProcessorFeatures.X925,
1277+
def : ProcessorModel<"cortex-x925", NeoverseV3Model, ProcessorFeatures.X925,
12781278
[TuneX925]>;
1279-
def : ProcessorModel<"gb10", NeoverseV2Model, ProcessorFeatures.GB10,
1279+
def : ProcessorModel<"gb10", NeoverseV3Model, ProcessorFeatures.GB10,
12801280
[TuneX925]>;
12811281
def : ProcessorModel<"grace", NeoverseV2Model, ProcessorFeatures.Grace,
12821282
[TuneNeoverseV2]>;

llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
def NeoverseV3Model : SchedMachineModel {
17-
let IssueWidth = 8; // Expect best value to be slightly higher than V2
18-
let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer.
17+
let IssueWidth = 10; // Expect best value to be slightly higher than V2
18+
let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V2
1919
let LoadLatency = 4; // Optimistic load latency.
2020
let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
2121
let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
@@ -55,7 +55,7 @@ def V3UnitLS0 : ProcResource<1>; // Load/Store 0
5555
def V3UnitL12 : ProcResource<2>; // Load 1/2
5656
def V3UnitST1 : ProcResource<1>; // Store 1
5757
def V3UnitD : ProcResource<2>; // Store data 0/1
58-
def V3UnitFlg : ProcResource<8>; // Flags
58+
def V3UnitFlg : ProcResource<4>; // Flags
5959

6060
def V3UnitS : ProcResGroup<[V3UnitS0, V3UnitS1, V3UnitS2, V3UnitS3, V3UnitS4, V3UnitS5]>; // Integer single-cycle 0/1/2/3/4/5
6161
def V3UnitI : ProcResGroup<[V3UnitS0, V3UnitS1, V3UnitS2, V3UnitS3, V3UnitS4, V3UnitS5, V3UnitM0, V3UnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1
@@ -128,18 +128,14 @@ def V3Write_3c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 3; }
128128
def V3Write_3c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 3; }
129129
def V3Write_4c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 4; }
130130
def V3Write_4c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
131-
def V3Write_7c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 7;
132-
let ReleaseAtCycles = [7]; }
133131
def V3Write_9c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 9; }
134132
def V3Write_10c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 10; }
135-
def V3Write_8c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 8;
136-
let ReleaseAtCycles = [2]; }
133+
def V3Write_8c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 8; }
137134
def V3Write_12c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 12;
138135
let ReleaseAtCycles = [11]; }
139136
def V3Write_13c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 13; }
140137
def V3Write_15c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 15; }
141-
def V3Write_13c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 13;
142-
let ReleaseAtCycles = [8]; }
138+
def V3Write_13c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 13; }
143139
def V3Write_16c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 16; }
144140
def V3Write_16c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 16;
145141
let ReleaseAtCycles = [8]; }

llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
//===----------------------------------------------------------------------===//
88
//
99
// This file defines the scheduling model for the Arm Neoverse V3AE processors.
10-
// All information is taken from the V3AE Software Optimization guide:
10+
// All information is taken from the V3AE Software Optimisation guide:
1111
//
1212
// https://developer.arm.com/documentation/109703/300/?lang=en
1313
//
1414
//===----------------------------------------------------------------------===//
1515

1616
def NeoverseV3AEModel : SchedMachineModel {
17-
let IssueWidth = 8; // Expect best value to be slightly higher than V2
18-
let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer.
17+
let IssueWidth = 10; // Expect best value to be slightly higher than V2
18+
let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V2
1919
let LoadLatency = 4; // Optimistic load latency.
2020
let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
2121
let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
@@ -53,7 +53,7 @@ def V3AEUnitLS0 : ProcResource<1>; // Load/Store 0
5353
def V3AEUnitL12 : ProcResource<2>; // Load 1/2
5454
def V3AEUnitST1 : ProcResource<1>; // Store 1
5555
def V3AEUnitD : ProcResource<2>; // Store data 0/1
56-
def V3AEUnitFlg : ProcResource<8>; // Flags
56+
def V3AEUnitFlg : ProcResource<4>; // Flags
5757

5858
def V3AEUnitS : ProcResGroup<[V3AEUnitS0, V3AEUnitS1, V3AEUnitS2, V3AEUnitS3, V3AEUnitS4, V3AEUnitS5]>; // Integer single-cycle 0/1/2/3/4/5
5959
def V3AEUnitI : ProcResGroup<[V3AEUnitS0, V3AEUnitS1, V3AEUnitS2, V3AEUnitS3, V3AEUnitS4, V3AEUnitS5, V3AEUnitM0, V3AEUnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1
@@ -119,12 +119,9 @@ def V3AEWrite_6c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 6; }
119119
def V3AEWrite_12c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 12; }
120120
def V3AEWrite_3c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 3; }
121121
def V3AEWrite_4c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
122-
def V3AEWrite_7c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 7;
123-
let ReleaseAtCycles = [7]; }
124122
def V3AEWrite_9c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 9; }
125123
def V3AEWrite_10c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 10; }
126-
def V3AEWrite_8c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 8;
127-
let ReleaseAtCycles = [2]; }
124+
def V3AEWrite_8c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 8; }
128125
def V3AEWrite_12c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 12;
129126
let ReleaseAtCycles = [11]; }
130127
def V3AEWrite_13c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 13; }

0 commit comments

Comments
 (0)