Skip to content

Commit 31ef91b

Browse files
Updated latency based on experiments
Signed-off-by: Mikhail R. Gadelha <[email protected]>
1 parent f0c1830 commit 31ef91b

File tree

1 file changed

+39
-40
lines changed

1 file changed

+39
-40
lines changed

llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def : WriteRes<WriteJal, [SMX60_IEUA]>;
5454
def : WriteRes<WriteJalr, [SMX60_IEUA]>;
5555

5656
// Integer arithmetic and logic
57+
// Latency of ALU instructions is 1, but add.uw is 2
5758
def : WriteRes<WriteIALU32, [SMX60_IEU]>;
5859
def : WriteRes<WriteIALU, [SMX60_IEU]>;
5960
def : WriteRes<WriteShiftImm32, [SMX60_IEU]>;
@@ -62,14 +63,13 @@ def : WriteRes<WriteShiftReg32, [SMX60_IEU]>;
6263
def : WriteRes<WriteShiftReg, [SMX60_IEU]>;
6364

6465
// Integer multiplication
65-
let Latency = 4 in {
66-
def : WriteRes<WriteIMul, [SMX60_IEU]>;
67-
def : WriteRes<WriteIMul32, [SMX60_IEU]>;
68-
}
66+
// The latency of mul is 5, while mulh, mulhsu, mulhu is 6.
67+
// Worst case latency is used
68+
def : WriteRes<WriteIMul, [SMX60_IEU]> { let Latency = 6; }
69+
def : WriteRes<WriteIMul32, [SMX60_IEU]> { let Latency = 3; }
6970

7071
// Integer division/remainder
71-
// Worst case latency is used.
72-
let Latency = 15, ReleaseAtCycles = [15] in {
72+
let Latency = 3, ReleaseAtCycles = [3] in {
7373
def : WriteRes<WriteIDiv32, [SMX60_IEUA]>;
7474
def : WriteRes<WriteIRem32, [SMX60_IEUA]>;
7575
}
@@ -96,13 +96,14 @@ let Latency = 2 in {
9696
}
9797

9898
def : WriteRes<WriteORCB, [SMX60_IEU]>;
99-
10099
def : WriteRes<WriteIMinMax, [SMX60_IEU]>;
101-
102100
def : WriteRes<WriteREV8, [SMX60_IEU]>;
103101

104-
def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
105-
def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
102+
let Latency = 2 in {
103+
def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
104+
def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
105+
def : WriteRes<WriteCLMUL, [SMX60_IEU]>;
106+
}
106107

107108
// Single-bit instructions
108109
def : WriteRes<WriteSingleBit, [SMX60_IEU]>;
@@ -142,92 +143,90 @@ let Latency = 5 in {
142143
}
143144

144145
// Floating point units Half precision
145-
let Latency = 3 in {
146+
let Latency = 4 in {
146147
def : WriteRes<WriteFAdd16, [SMX60_FP]>;
147148
def : WriteRes<WriteFMul16, [SMX60_FP]>;
148149
def : WriteRes<WriteFSGNJ16, [SMX60_FP]>;
149150
def : WriteRes<WriteFMinMax16, [SMX60_FP]>;
150151
}
151-
def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 4; }
152+
def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 5; }
152153

153-
// Worst case latency is used
154-
let Latency = 7, ReleaseAtCycles = [7] in {
154+
let Latency = 12, ReleaseAtCycles = [12] in {
155155
def : WriteRes<WriteFDiv16, [SMX60_FP]>;
156156
def : WriteRes<WriteFSqrt16, [SMX60_FP]>;
157157
}
158158

159159
// Single precision
160-
let Latency = 3 in {
160+
let Latency = 4 in {
161161
def : WriteRes<WriteFAdd32, [SMX60_FP]>;
162+
def : WriteRes<WriteFMul32, [SMX60_FP]>;
162163
def : WriteRes<WriteFSGNJ32, [SMX60_FP]>;
163164
def : WriteRes<WriteFMinMax32, [SMX60_FP]>;
164165
}
165-
def : WriteRes<WriteFMul32, [SMX60_FP]> { let Latency = 4; }
166166
def : WriteRes<WriteFMA32, [SMX60_FP]> { let Latency = 5; }
167167

168-
// Worst case latency is used
169-
let Latency = 10, ReleaseAtCycles = [10] in {
168+
let Latency = 15, ReleaseAtCycles = [15] in {
170169
def : WriteRes<WriteFDiv32, [SMX60_FP]>;
171170
def : WriteRes<WriteFSqrt32, [SMX60_FP]>;
172171
}
173172

174173
// Double precision
175-
let Latency = 4 in {
174+
let Latency = 5 in {
176175
def : WriteRes<WriteFAdd64, [SMX60_FP]>;
177176
def : WriteRes<WriteFMul64, [SMX60_FP]>;
178-
}
179-
let Latency = 3 in {
180177
def : WriteRes<WriteFSGNJ64, [SMX60_FP]>;
181-
def : WriteRes<WriteFMinMax64, [SMX60_FP]>;
182178
}
183-
def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 5; }
179+
def : WriteRes<WriteFMinMax64, [SMX60_FP]> { let Latency = 4; }
180+
def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 6; }
184181

185-
let Latency = 10, ReleaseAtCycles = [10] in {
182+
let Latency = 22, ReleaseAtCycles = [22] in {
186183
def : WriteRes<WriteFDiv64, [SMX60_FP]>;
187184
def : WriteRes<WriteFSqrt64, [SMX60_FP]>;
188185
}
189186

190187
// Conversions
191-
let Latency = 3 in {
188+
let Latency = 6 in {
189+
def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
190+
def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
191+
def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
192+
def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
193+
def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
194+
def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
195+
}
196+
197+
let Latency = 4 in {
192198
def : WriteRes<WriteFCvtI32ToF16, [SMX60_IEU]>;
193199
def : WriteRes<WriteFCvtI32ToF32, [SMX60_IEU]>;
194200
def : WriteRes<WriteFCvtI32ToF64, [SMX60_IEU]>;
195201
def : WriteRes<WriteFCvtI64ToF16, [SMX60_IEU]>;
196202
def : WriteRes<WriteFCvtI64ToF32, [SMX60_IEU]>;
197203
def : WriteRes<WriteFCvtI64ToF64, [SMX60_IEU]>;
198-
def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
199-
def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
200204
def : WriteRes<WriteFCvtF16ToF32, [SMX60_FP]>;
201205
def : WriteRes<WriteFCvtF16ToF64, [SMX60_FP]>;
202-
def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
203-
def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
204206
def : WriteRes<WriteFCvtF32ToF16, [SMX60_FP]>;
205207
def : WriteRes<WriteFCvtF32ToF64, [SMX60_FP]>;
206-
def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
207-
def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
208208
def : WriteRes<WriteFCvtF64ToF16, [SMX60_FP]>;
209209
def : WriteRes<WriteFCvtF64ToF32, [SMX60_FP]>;
210210
}
211211

212-
let Latency = 2 in {
212+
let Latency = 6 in {
213213
def : WriteRes<WriteFClass16, [SMX60_FP]>;
214214
def : WriteRes<WriteFClass32, [SMX60_FP]>;
215215
def : WriteRes<WriteFClass64, [SMX60_FP]>;
216-
}
217216

218-
let Latency = 4 in {
219217
def : WriteRes<WriteFCmp16, [SMX60_FP]>;
220218
def : WriteRes<WriteFCmp32, [SMX60_FP]>;
221219
def : WriteRes<WriteFCmp64, [SMX60_FP]>;
220+
221+
def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
222+
def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
222223
}
223224

224-
let Latency = 2 in {
225+
let Latency = 4 in {
225226
def : WriteRes<WriteFMovI16ToF16, [SMX60_IEU]>;
226-
def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
227-
def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
228-
def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
229-
def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
230227
def : WriteRes<WriteFMovF64ToI64, [SMX60_IEU]>;
228+
def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
229+
def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
231230
}
232231

233232
// Others
@@ -334,6 +333,7 @@ def : ReadAdvance<ReadIMinMax, 0>;
334333
def : ReadAdvance<ReadREV8, 0>;
335334
def : ReadAdvance<ReadSHXADD, 0>;
336335
def : ReadAdvance<ReadSHXADD32, 0>;
336+
def : ReadAdvance<ReadCLMUL, 0>;
337337
// Single-bit instructions
338338
def : ReadAdvance<ReadSingleBit, 0>;
339339
def : ReadAdvance<ReadSingleBitImm, 0>;
@@ -343,7 +343,6 @@ def : ReadAdvance<ReadSingleBitImm, 0>;
343343
defm : UnsupportedSchedV;
344344
defm : UnsupportedSchedXsfvcp;
345345
defm : UnsupportedSchedZabha;
346-
defm : UnsupportedSchedZbc;
347346
defm : UnsupportedSchedZbkb;
348347
defm : UnsupportedSchedZbkx;
349348
defm : UnsupportedSchedZfa;

0 commit comments

Comments
 (0)