Skip to content

Commit 9cc030d

Browse files
[RISC-V] Base scheduling model for tt-ascalon-d8
First part of tt-ascalon-d8 scheduling model, only containing scalar ops. Scheduling for vector instructions will be added in a follow-up patch. Co-authored-by: Anton Blanchard <[email protected]>
1 parent 0a7e048 commit 9cc030d

File tree

5 files changed

+497
-1
lines changed

5 files changed

+497
-1
lines changed

llvm/lib/Target/RISCV/RISCV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ include "RISCVSchedSyntacoreSCR1.td"
5454
include "RISCVSchedSyntacoreSCR345.td"
5555
include "RISCVSchedSyntacoreSCR7.td"
5656
include "RISCVSchedXiangShanNanHu.td"
57+
include "RISCVSchedTTAscalonD8.td"
5758

5859
//===----------------------------------------------------------------------===//
5960
// RISC-V processors supported.

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def SYNTACORE_SCR7 : RISCVProcessorModel<"syntacore-scr7",
454454
[TuneNoDefaultUnroll, TunePostRAScheduler]>;
455455

456456
def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8",
457-
NoSchedModel,
457+
TTAscalonD8Model,
458458
!listconcat(RVA23S64Features,
459459
[FeatureStdExtSmaia,
460460
FeatureStdExtSsaia,
Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
//=- RISCVSchedTTAscalonD8.td - Tenstorrent Ascalon Scheduling Defs -----*- tablegen -*-=//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
//===----------------------------------------------------------------------===//
10+
11+
def TTAscalonD8Model : SchedMachineModel {
12+
let IssueWidth = 8; // 8-way decode and dispatch
13+
let MicroOpBufferSize = 256; // 256 micro-op re-order buffer
14+
let LoadLatency = 4; // Optimistic load latency
15+
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
16+
17+
// Enable partial & runtime unrolling.
18+
let LoopMicroOpBufferSize = 16;
19+
20+
let CompleteModel = 0;
21+
22+
// TODO supported, but haven't added scheduling info yet
23+
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
24+
HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
25+
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
26+
HasStdExtZkr, HasVInstructions, HasVInstructionsI64];
27+
}
28+
29+
let SchedModel = TTAscalonD8Model in {
30+
31+
//===----------------------------------------------------------------------===//
32+
// Define each kind of processor resource and number available.
33+
34+
let BufferSize = 16 in {
35+
def AscalonLS : ProcResource<3>;
36+
def AscalonFXA : ProcResource<1>; // ALU, FP/VEC -> INT, MUL, DIV, CSR
37+
def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC
38+
def AscalonFXC : ProcResource<2>; // ALU, BR
39+
def AscalonFXD : ProcResource<2>; // ALU
40+
def AscalonFP : ProcResource<2>;
41+
def AscalonV : ProcResource<2>;
42+
}
43+
44+
def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
45+
46+
//===----------------------------------------------------------------------===//
47+
48+
// Branching
49+
def : WriteRes<WriteJmp, [AscalonFXC]>;
50+
def : WriteRes<WriteJal, [AscalonFXC]>;
51+
def : WriteRes<WriteJalr, [AscalonFXC]>;
52+
53+
// Integer arithmetic and logic
54+
def : WriteRes<WriteIALU32, [AscalonFX]>;
55+
def : WriteRes<WriteIALU, [AscalonFX]>;
56+
def : WriteRes<WriteShiftImm32, [AscalonFX]>;
57+
def : WriteRes<WriteShiftImm, [AscalonFX]>;
58+
def : WriteRes<WriteShiftReg32, [AscalonFX]>;
59+
def : WriteRes<WriteShiftReg, [AscalonFX]>;
60+
61+
// Integer multiplication
62+
let Latency = 3 in {
63+
def : WriteRes<WriteIMul, [AscalonFXA]>;
64+
def : WriteRes<WriteIMul32, [AscalonFXA]>;
65+
}
66+
67+
// Integer division
68+
// Worst case latency is used.
69+
70+
let Latency = 7, ReleaseAtCycles = [7] in {
71+
def : WriteRes<WriteIDiv32, [AscalonFXA]>;
72+
def : WriteRes<WriteIDiv, [AscalonFXA]>;
73+
def : WriteRes<WriteIRem32, [AscalonFXA]>;
74+
def : WriteRes<WriteIRem, [AscalonFXA]>;
75+
}
76+
77+
// Bitmanip
78+
def : WriteRes<WriteRotateImm, [AscalonFX]>;
79+
def : WriteRes<WriteRotateImm32, [AscalonFX]>;
80+
def : WriteRes<WriteRotateReg, [AscalonFX]>;
81+
def : WriteRes<WriteRotateReg32, [AscalonFX]>;
82+
83+
def : WriteRes<WriteCLZ, [AscalonFX]>;
84+
def : WriteRes<WriteCLZ32, [AscalonFX]>;
85+
def : WriteRes<WriteCTZ, [AscalonFX]>;
86+
def : WriteRes<WriteCTZ32, [AscalonFX]>;
87+
88+
def : WriteRes<WriteCPOP, [AscalonFX]>;
89+
def : WriteRes<WriteCPOP32, [AscalonFX]>;
90+
91+
def : WriteRes<WriteORCB, [AscalonFX]>;
92+
93+
def : WriteRes<WriteIMinMax, [AscalonFX]>;
94+
95+
def : WriteRes<WriteREV8, [AscalonFX]>;
96+
97+
def : WriteRes<WriteSHXADD, [AscalonFX]>;
98+
def : WriteRes<WriteSHXADD32, [AscalonFX]>;
99+
100+
// Single-bit instructions
101+
def : WriteRes<WriteSingleBit, [AscalonFX]>;
102+
def : WriteRes<WriteSingleBitImm, [AscalonFX]>;
103+
def : WriteRes<WriteBEXT, [AscalonFX]>;
104+
def : WriteRes<WriteBEXTI, [AscalonFX]>;
105+
106+
// Memory
107+
def : WriteRes<WriteSTB, [AscalonLS]>;
108+
def : WriteRes<WriteSTH, [AscalonLS]>;
109+
def : WriteRes<WriteSTW, [AscalonLS]>;
110+
def : WriteRes<WriteSTD, [AscalonLS]>;
111+
def : WriteRes<WriteFST16, [AscalonLS]>;
112+
def : WriteRes<WriteFST32, [AscalonLS]>;
113+
def : WriteRes<WriteFST64, [AscalonLS]>;
114+
115+
let Latency = 4 in {
116+
def : WriteRes<WriteLDB, [AscalonLS]>;
117+
def : WriteRes<WriteLDH, [AscalonLS]>;
118+
def : WriteRes<WriteLDW, [AscalonLS]>;
119+
def : WriteRes<WriteLDD, [AscalonLS]>;
120+
def : WriteRes<WriteFLD16, [AscalonLS]>;
121+
def : WriteRes<WriteFLD32, [AscalonLS]>;
122+
def : WriteRes<WriteFLD64, [AscalonLS]>;
123+
}
124+
125+
// Atomic memory
126+
def : WriteRes<WriteAtomicSTW, [AscalonLS]>;
127+
def : WriteRes<WriteAtomicSTD, [AscalonLS]>;
128+
129+
let Latency = 4 in {
130+
def : WriteRes<WriteAtomicW, [AscalonLS]>;
131+
def : WriteRes<WriteAtomicD, [AscalonLS]>;
132+
def : WriteRes<WriteAtomicLDW, [AscalonLS]>;
133+
def : WriteRes<WriteAtomicLDD, [AscalonLS]>;
134+
}
135+
136+
// Half precision.
137+
let Latency = 3 in {
138+
def : WriteRes<WriteFAdd16, [AscalonFP]>;
139+
def : WriteRes<WriteFMul16, [AscalonFP]>;
140+
def : WriteRes<WriteFMA16, [AscalonFP]>;
141+
def : WriteRes<WriteFSGNJ16, [AscalonFP]>;
142+
def : WriteRes<WriteFMinMax16, [AscalonFP]>;
143+
}
144+
145+
let Latency = 7, ReleaseAtCycles = [7] in {
146+
def : WriteRes<WriteFDiv16, [AscalonFP]>;
147+
def : WriteRes<WriteFSqrt16, [AscalonFP]>;
148+
}
149+
150+
// Single precision.
151+
let Latency = 3 in {
152+
def : WriteRes<WriteFAdd32, [AscalonFP]>;
153+
def : WriteRes<WriteFMul32, [AscalonFP]>;
154+
def : WriteRes<WriteFMA32, [AscalonFP]>;
155+
def : WriteRes<WriteFSGNJ32, [AscalonFP]>;
156+
def : WriteRes<WriteFMinMax32, [AscalonFP]>;
157+
}
158+
159+
let Latency = 7, ReleaseAtCycles = [7] in {
160+
def : WriteRes<WriteFDiv32, [AscalonFP]>;
161+
def : WriteRes<WriteFSqrt32, [AscalonFP]>;
162+
}
163+
164+
// Double precision
165+
let Latency = 3 in {
166+
def : WriteRes<WriteFAdd64, [AscalonFP]>;
167+
def : WriteRes<WriteFMul64, [AscalonFP]>;
168+
def : WriteRes<WriteFMA64, [AscalonFP]>;
169+
def : WriteRes<WriteFSGNJ64, [AscalonFP]>;
170+
def : WriteRes<WriteFMinMax64, [AscalonFP]>;
171+
}
172+
173+
let Latency = 12, ReleaseAtCycles = [12] in {
174+
def : WriteRes<WriteFDiv64, [AscalonFP]>;
175+
def : WriteRes<WriteFSqrt64, [AscalonFP]>;
176+
}
177+
178+
// Conversions
179+
def : WriteRes<WriteFCvtI32ToF16, [AscalonFXB]>;
180+
def : WriteRes<WriteFCvtI32ToF32, [AscalonFXB]>;
181+
def : WriteRes<WriteFCvtI32ToF64, [AscalonFXB]>;
182+
def : WriteRes<WriteFCvtI64ToF16, [AscalonFXB]>;
183+
def : WriteRes<WriteFCvtI64ToF32, [AscalonFXB]>;
184+
def : WriteRes<WriteFCvtI64ToF64, [AscalonFXB]>;
185+
def : WriteRes<WriteFCvtF16ToI32, [AscalonFXA]>;
186+
def : WriteRes<WriteFCvtF16ToI64, [AscalonFXA]>;
187+
def : WriteRes<WriteFCvtF16ToF32, [AscalonFP]>;
188+
def : WriteRes<WriteFCvtF16ToF64, [AscalonFP]>;
189+
def : WriteRes<WriteFCvtF32ToI32, [AscalonFXA]>;
190+
def : WriteRes<WriteFCvtF32ToI64, [AscalonFXA]>;
191+
def : WriteRes<WriteFCvtF32ToF16, [AscalonFP]>;
192+
def : WriteRes<WriteFCvtF32ToF64, [AscalonFP]>;
193+
def : WriteRes<WriteFCvtF64ToI32, [AscalonFXA]>;
194+
def : WriteRes<WriteFCvtF64ToI64, [AscalonFXA]>;
195+
def : WriteRes<WriteFCvtF64ToF16, [AscalonFP]>;
196+
def : WriteRes<WriteFCvtF64ToF32, [AscalonFP]>;
197+
198+
def : WriteRes<WriteFClass16, [AscalonFP]>;
199+
def : WriteRes<WriteFClass32, [AscalonFP]>;
200+
def : WriteRes<WriteFClass64, [AscalonFP]>;
201+
def : WriteRes<WriteFCmp16, [AscalonFP]>;
202+
def : WriteRes<WriteFCmp32, [AscalonFP]>;
203+
def : WriteRes<WriteFCmp64, [AscalonFP]>;
204+
205+
def : WriteRes<WriteFMovI16ToF16, [AscalonFXB]>;
206+
def : WriteRes<WriteFMovF16ToI16, [AscalonFXA]>;
207+
def : WriteRes<WriteFMovI32ToF32, [AscalonFXB]>;
208+
def : WriteRes<WriteFMovF32ToI32, [AscalonFXA]>;
209+
def : WriteRes<WriteFMovI64ToF64, [AscalonFXB]>;
210+
def : WriteRes<WriteFMovF64ToI64, [AscalonFXA]>;
211+
212+
// Others
213+
def : WriteRes<WriteCSR, [AscalonFXA]>;
214+
def : WriteRes<WriteNop, [AscalonFX]>;
215+
216+
def : InstRW<[WriteIALU], (instrs COPY)>;
217+
218+
//===----------------------------------------------------------------------===//
219+
// Bypass and advance
220+
def : ReadAdvance<ReadJmp, 0>;
221+
def : ReadAdvance<ReadJalr, 0>;
222+
def : ReadAdvance<ReadCSR, 0>;
223+
def : ReadAdvance<ReadStoreData, 0>;
224+
def : ReadAdvance<ReadMemBase, 0>;
225+
def : ReadAdvance<ReadIALU, 0>;
226+
def : ReadAdvance<ReadIALU32, 0>;
227+
def : ReadAdvance<ReadShiftImm, 0>;
228+
def : ReadAdvance<ReadShiftImm32, 0>;
229+
def : ReadAdvance<ReadShiftReg, 0>;
230+
def : ReadAdvance<ReadShiftReg32, 0>;
231+
def : ReadAdvance<ReadIDiv, 0>;
232+
def : ReadAdvance<ReadIDiv32, 0>;
233+
def : ReadAdvance<ReadIRem, 0>;
234+
def : ReadAdvance<ReadIRem32, 0>;
235+
def : ReadAdvance<ReadIMul, 0>;
236+
def : ReadAdvance<ReadIMul32, 0>;
237+
def : ReadAdvance<ReadAtomicWA, 0>;
238+
def : ReadAdvance<ReadAtomicWD, 0>;
239+
def : ReadAdvance<ReadAtomicDA, 0>;
240+
def : ReadAdvance<ReadAtomicDD, 0>;
241+
def : ReadAdvance<ReadAtomicLDW, 0>;
242+
def : ReadAdvance<ReadAtomicLDD, 0>;
243+
def : ReadAdvance<ReadAtomicSTW, 0>;
244+
def : ReadAdvance<ReadAtomicSTD, 0>;
245+
def : ReadAdvance<ReadFStoreData, 0>;
246+
def : ReadAdvance<ReadFMemBase, 0>;
247+
def : ReadAdvance<ReadFAdd16, 0>;
248+
def : ReadAdvance<ReadFAdd32, 0>;
249+
def : ReadAdvance<ReadFAdd64, 0>;
250+
def : ReadAdvance<ReadFMul16, 0>;
251+
def : ReadAdvance<ReadFMA16, 0>;
252+
def : ReadAdvance<ReadFMA16Addend, 0>;
253+
def : ReadAdvance<ReadFMul32, 0>;
254+
def : ReadAdvance<ReadFMul64, 0>;
255+
def : ReadAdvance<ReadFMA32, 0>;
256+
def : ReadAdvance<ReadFMA32Addend, 0>;
257+
def : ReadAdvance<ReadFMA64, 0>;
258+
def : ReadAdvance<ReadFMA64Addend, 0>;
259+
def : ReadAdvance<ReadFDiv16, 0>;
260+
def : ReadAdvance<ReadFDiv32, 0>;
261+
def : ReadAdvance<ReadFDiv64, 0>;
262+
def : ReadAdvance<ReadFSqrt16, 0>;
263+
def : ReadAdvance<ReadFSqrt32, 0>;
264+
def : ReadAdvance<ReadFSqrt64, 0>;
265+
def : ReadAdvance<ReadFCmp16, 0>;
266+
def : ReadAdvance<ReadFCmp32, 0>;
267+
def : ReadAdvance<ReadFCmp64, 0>;
268+
def : ReadAdvance<ReadFSGNJ16, 0>;
269+
def : ReadAdvance<ReadFSGNJ32, 0>;
270+
def : ReadAdvance<ReadFSGNJ64, 0>;
271+
def : ReadAdvance<ReadFMinMax16, 0>;
272+
def : ReadAdvance<ReadFMinMax32, 0>;
273+
def : ReadAdvance<ReadFMinMax64, 0>;
274+
def : ReadAdvance<ReadFCvtF16ToI32, 0>;
275+
def : ReadAdvance<ReadFCvtF16ToI64, 0>;
276+
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
277+
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
278+
def : ReadAdvance<ReadFCvtF64ToI32, 0>;
279+
def : ReadAdvance<ReadFCvtF64ToI64, 0>;
280+
def : ReadAdvance<ReadFCvtI32ToF16, 0>;
281+
def : ReadAdvance<ReadFCvtI32ToF32, 0>;
282+
def : ReadAdvance<ReadFCvtI32ToF64, 0>;
283+
def : ReadAdvance<ReadFCvtI64ToF16, 0>;
284+
def : ReadAdvance<ReadFCvtI64ToF32, 0>;
285+
def : ReadAdvance<ReadFCvtI64ToF64, 0>;
286+
def : ReadAdvance<ReadFCvtF32ToF64, 0>;
287+
def : ReadAdvance<ReadFCvtF64ToF32, 0>;
288+
def : ReadAdvance<ReadFCvtF16ToF32, 0>;
289+
def : ReadAdvance<ReadFCvtF32ToF16, 0>;
290+
def : ReadAdvance<ReadFCvtF16ToF64, 0>;
291+
def : ReadAdvance<ReadFCvtF64ToF16, 0>;
292+
def : ReadAdvance<ReadFMovF16ToI16, 0>;
293+
def : ReadAdvance<ReadFMovI16ToF16, 0>;
294+
def : ReadAdvance<ReadFMovF32ToI32, 0>;
295+
def : ReadAdvance<ReadFMovI32ToF32, 0>;
296+
def : ReadAdvance<ReadFMovF64ToI64, 0>;
297+
def : ReadAdvance<ReadFMovI64ToF64, 0>;
298+
def : ReadAdvance<ReadFClass16, 0>;
299+
def : ReadAdvance<ReadFClass32, 0>;
300+
def : ReadAdvance<ReadFClass64, 0>;
301+
302+
// Bitmanip
303+
def : ReadAdvance<ReadRotateImm, 0>;
304+
def : ReadAdvance<ReadRotateImm32, 0>;
305+
def : ReadAdvance<ReadRotateReg, 0>;
306+
def : ReadAdvance<ReadRotateReg32, 0>;
307+
def : ReadAdvance<ReadCLZ, 0>;
308+
def : ReadAdvance<ReadCLZ32, 0>;
309+
def : ReadAdvance<ReadCTZ, 0>;
310+
def : ReadAdvance<ReadCTZ32, 0>;
311+
def : ReadAdvance<ReadCPOP, 0>;
312+
def : ReadAdvance<ReadCPOP32, 0>;
313+
def : ReadAdvance<ReadORCB, 0>;
314+
def : ReadAdvance<ReadIMinMax, 0>;
315+
def : ReadAdvance<ReadREV8, 0>;
316+
def : ReadAdvance<ReadSHXADD, 0>;
317+
def : ReadAdvance<ReadSHXADD32, 0>;
318+
// Single-bit instructions
319+
def : ReadAdvance<ReadSingleBit, 0>;
320+
def : ReadAdvance<ReadSingleBitImm, 0>;
321+
322+
//===----------------------------------------------------------------------===//
323+
// Unsupported extensions
324+
defm : UnsupportedSchedV;
325+
defm : UnsupportedSchedXsfvcp;
326+
defm : UnsupportedSchedZabha;
327+
defm : UnsupportedSchedZbc;
328+
defm : UnsupportedSchedZbkb;
329+
defm : UnsupportedSchedZbkx;
330+
defm : UnsupportedSchedZfa;
331+
defm : UnsupportedSchedZvk;
332+
defm : UnsupportedSchedSFB;
333+
}

0 commit comments

Comments
 (0)