@@ -54,6 +54,7 @@ def : WriteRes<WriteJal, [SMX60_IEUA]>;
5454def : WriteRes<WriteJalr, [SMX60_IEUA]>;
5555
5656// Integer arithmetic and logic
57+ // Latency of ALU instructions is 1, but add.uw is 2
5758def : WriteRes<WriteIALU32, [SMX60_IEU]>;
5859def : WriteRes<WriteIALU, [SMX60_IEU]>;
5960def : WriteRes<WriteShiftImm32, [SMX60_IEU]>;
@@ -62,14 +63,13 @@ def : WriteRes<WriteShiftReg32, [SMX60_IEU]>;
6263def : WriteRes<WriteShiftReg, [SMX60_IEU]>;
6364
6465// Integer multiplication
65- let Latency = 4 in {
66- def : WriteRes<WriteIMul, [SMX60_IEU]>;
67- def : WriteRes<WriteIMul32 , [SMX60_IEU]>;
68- }
66+ // The latency of mul is 5, while mulh, mulhsu, mulhu is 6.
67+ // Worst case latency is used
68+ def : WriteRes<WriteIMul , [SMX60_IEU]> { let Latency = 6; }
69+ def : WriteRes<WriteIMul32, [SMX60_IEU]> { let Latency = 3; }
6970
7071// Integer division/remainder
71- // Worst case latency is used.
72- let Latency = 15, ReleaseAtCycles = [15] in {
72+ let Latency = 3, ReleaseAtCycles = [3] in {
7373 def : WriteRes<WriteIDiv32, [SMX60_IEUA]>;
7474 def : WriteRes<WriteIRem32, [SMX60_IEUA]>;
7575}
@@ -96,13 +96,14 @@ let Latency = 2 in {
9696}
9797
9898def : WriteRes<WriteORCB, [SMX60_IEU]>;
99-
10099def : WriteRes<WriteIMinMax, [SMX60_IEU]>;
101-
102100def : WriteRes<WriteREV8, [SMX60_IEU]>;
103101
104- def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
105- def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
102+ let Latency = 2 in {
103+ def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
104+ def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
105+ def : WriteRes<WriteCLMUL, [SMX60_IEU]>;
106+ }
106107
107108// Single-bit instructions
108109def : WriteRes<WriteSingleBit, [SMX60_IEU]>;
@@ -142,92 +143,90 @@ let Latency = 5 in {
142143}
143144
144145// Floating point units Half precision
145- let Latency = 3 in {
146+ let Latency = 4 in {
146147 def : WriteRes<WriteFAdd16, [SMX60_FP]>;
147148 def : WriteRes<WriteFMul16, [SMX60_FP]>;
148149 def : WriteRes<WriteFSGNJ16, [SMX60_FP]>;
149150 def : WriteRes<WriteFMinMax16, [SMX60_FP]>;
150151}
151- def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 4 ; }
152+ def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 5 ; }
152153
153- // Worst case latency is used
154- let Latency = 7, ReleaseAtCycles = [7] in {
154+ let Latency = 12, ReleaseAtCycles = [12] in {
155155 def : WriteRes<WriteFDiv16, [SMX60_FP]>;
156156 def : WriteRes<WriteFSqrt16, [SMX60_FP]>;
157157}
158158
159159// Single precision
160- let Latency = 3 in {
160+ let Latency = 4 in {
161161 def : WriteRes<WriteFAdd32, [SMX60_FP]>;
162+ def : WriteRes<WriteFMul32, [SMX60_FP]>;
162163 def : WriteRes<WriteFSGNJ32, [SMX60_FP]>;
163164 def : WriteRes<WriteFMinMax32, [SMX60_FP]>;
164165}
165- def : WriteRes<WriteFMul32, [SMX60_FP]> { let Latency = 4; }
166166def : WriteRes<WriteFMA32, [SMX60_FP]> { let Latency = 5; }
167167
168- // Worst case latency is used
169- let Latency = 10, ReleaseAtCycles = [10] in {
168+ let Latency = 15, ReleaseAtCycles = [15] in {
170169 def : WriteRes<WriteFDiv32, [SMX60_FP]>;
171170 def : WriteRes<WriteFSqrt32, [SMX60_FP]>;
172171}
173172
174173// Double precision
175- let Latency = 4 in {
174+ let Latency = 5 in {
176175 def : WriteRes<WriteFAdd64, [SMX60_FP]>;
177176 def : WriteRes<WriteFMul64, [SMX60_FP]>;
178- }
179- let Latency = 3 in {
180177 def : WriteRes<WriteFSGNJ64, [SMX60_FP]>;
181- def : WriteRes<WriteFMinMax64, [SMX60_FP]>;
182178}
183- def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 5; }
179+ def : WriteRes<WriteFMinMax64, [SMX60_FP]> { let Latency = 4; }
180+ def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 6; }
184181
185- let Latency = 10 , ReleaseAtCycles = [10 ] in {
182+ let Latency = 22 , ReleaseAtCycles = [22 ] in {
186183 def : WriteRes<WriteFDiv64, [SMX60_FP]>;
187184 def : WriteRes<WriteFSqrt64, [SMX60_FP]>;
188185}
189186
190187// Conversions
191- let Latency = 3 in {
188+ let Latency = 6 in {
189+ def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
190+ def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
191+ def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
192+ def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
193+ def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
194+ def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
195+ }
196+
197+ let Latency = 4 in {
192198 def : WriteRes<WriteFCvtI32ToF16, [SMX60_IEU]>;
193199 def : WriteRes<WriteFCvtI32ToF32, [SMX60_IEU]>;
194200 def : WriteRes<WriteFCvtI32ToF64, [SMX60_IEU]>;
195201 def : WriteRes<WriteFCvtI64ToF16, [SMX60_IEU]>;
196202 def : WriteRes<WriteFCvtI64ToF32, [SMX60_IEU]>;
197203 def : WriteRes<WriteFCvtI64ToF64, [SMX60_IEU]>;
198- def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
199- def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
200204 def : WriteRes<WriteFCvtF16ToF32, [SMX60_FP]>;
201205 def : WriteRes<WriteFCvtF16ToF64, [SMX60_FP]>;
202- def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
203- def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
204206 def : WriteRes<WriteFCvtF32ToF16, [SMX60_FP]>;
205207 def : WriteRes<WriteFCvtF32ToF64, [SMX60_FP]>;
206- def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
207- def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
208208 def : WriteRes<WriteFCvtF64ToF16, [SMX60_FP]>;
209209 def : WriteRes<WriteFCvtF64ToF32, [SMX60_FP]>;
210210}
211211
212- let Latency = 2 in {
212+ let Latency = 6 in {
213213 def : WriteRes<WriteFClass16, [SMX60_FP]>;
214214 def : WriteRes<WriteFClass32, [SMX60_FP]>;
215215 def : WriteRes<WriteFClass64, [SMX60_FP]>;
216- }
217216
218- let Latency = 4 in {
219217 def : WriteRes<WriteFCmp16, [SMX60_FP]>;
220218 def : WriteRes<WriteFCmp32, [SMX60_FP]>;
221219 def : WriteRes<WriteFCmp64, [SMX60_FP]>;
220+
221+ def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
222+ def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
222223}
223224
224- let Latency = 2 in {
225+ let Latency = 4 in {
225226 def : WriteRes<WriteFMovI16ToF16, [SMX60_IEU]>;
226- def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
227- def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
228- def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
229- def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
230227 def : WriteRes<WriteFMovF64ToI64, [SMX60_IEU]>;
228+ def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
229+ def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
231230}
232231
233232// Others
@@ -334,6 +333,7 @@ def : ReadAdvance<ReadIMinMax, 0>;
334333def : ReadAdvance<ReadREV8, 0>;
335334def : ReadAdvance<ReadSHXADD, 0>;
336335def : ReadAdvance<ReadSHXADD32, 0>;
336+ def : ReadAdvance<ReadCLMUL, 0>;
337337// Single-bit instructions
338338def : ReadAdvance<ReadSingleBit, 0>;
339339def : ReadAdvance<ReadSingleBitImm, 0>;
@@ -343,7 +343,6 @@ def : ReadAdvance<ReadSingleBitImm, 0>;
343343defm : UnsupportedSchedV;
344344defm : UnsupportedSchedXsfvcp;
345345defm : UnsupportedSchedZabha;
346- defm : UnsupportedSchedZbc;
347346defm : UnsupportedSchedZbkb;
348347defm : UnsupportedSchedZbkx;
349348defm : UnsupportedSchedZfa;
0 commit comments