Skip to content

Commit d214276

Browse files
authored
Fix precision problem in DSDOT
1 parent 2fbfc64 commit d214276

File tree

1 file changed

+159
-10
lines changed

1 file changed

+159
-10
lines changed

kernel/mips64/dot.S

Lines changed: 159 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,35 +103,83 @@
103103
.align 3
104104

105105
.L12:
106+
#ifdef DSDOT
107+
cvt.d.s a1, a1
108+
cvt.d.s b1, b1
109+
madd.d s1, s1, a1, b1
110+
#else
106111
MADD s1, s1, a1, b1
112+
#endif
107113
LD a1, 4 * SIZE(X)
108114
LD b1, 4 * SIZE(Y)
109115

116+
#ifdef DSDOT
117+
cvt.d.s a2, a2
118+
cvt.d.s b2, b2
119+
madd.d s2, s2, a2, b2
120+
#else
110121
MADD s2, s2, a2, b2
122+
#endif
111123
LD a2, 5 * SIZE(X)
112124
LD b2, 5 * SIZE(Y)
113125

126+
#ifdef DSDOT
127+
cvt.d.s a3, a3
128+
cvt.d.s b3, b3
129+
madd.d s1, s1, a3, b3
130+
#else
114131
MADD s1, s1, a3, b3
132+
#endif
115133
LD a3, 6 * SIZE(X)
116134
LD b3, 6 * SIZE(Y)
117135

136+
#ifdef DSDOT
137+
cvt.d.s a4, a4
138+
cvt.d.s b4, b4
139+
madd.d s2, s2, a4, b4
140+
#else
118141
MADD s2, s2, a4, b4
142+
#endif
119143
LD a4, 7 * SIZE(X)
120144
LD b4, 7 * SIZE(Y)
121145

146+
#ifdef DSDOT
147+
cvt.d.s a1, a1
148+
cvt.d.s b1, b1
149+
madd.d s1, s1, a1, b1
150+
#else
122151
MADD s1, s1, a1, b1
152+
#endif
123153
LD a1, 8 * SIZE(X)
124154
LD b1, 8 * SIZE(Y)
125155

156+
#ifdef DSDOT
157+
cvt.d.s a2, a2
158+
cvt.d.s b2, b2
159+
madd.d s2, s2, a2, b2
160+
#else
126161
MADD s2, s2, a2, b2
162+
#endif
127163
LD a2, 9 * SIZE(X)
128164
LD b2, 9 * SIZE(Y)
129165

166+
#ifdef DSDOT
167+
cvt.d.s a3, a3
168+
cvt.d.s b3, b3
169+
madd.d s1, s1, a3, b3
170+
#else
130171
MADD s1, s1, a3, b3
172+
#endif
131173
LD a3, 10 * SIZE(X)
132174
LD b3, 10 * SIZE(Y)
133175

176+
#ifdef DSDOT
177+
cvt.d.s a4, a4
178+
cvt.d.s b4, b4
179+
madd.d s2, s2, a4, b4
180+
#else
134181
MADD s2, s2, a4, b4
182+
#endif
135183
LD a4, 11 * SIZE(X)
136184
LD b4, 11 * SIZE(Y)
137185

@@ -143,29 +191,77 @@
143191
.align 3
144192

145193
.L13:
194+
#ifdef DSDOT
195+
cvt.d.s a1, a1
196+
cvt.d.s b1, b1
197+
madd.d s1, s1, a1, b1
198+
#else
146199
MADD s1, s1, a1, b1
200+
#endif
147201
LD a1, 4 * SIZE(X)
148202
LD b1, 4 * SIZE(Y)
149203

204+
#ifdef DSDOT
205+
cvt.d.s a2, a2
206+
cvt.d.s b2, b2
207+
madd.d s2, s2, a2, b2
208+
#else
150209
MADD s2, s2, a2, b2
210+
#endif
151211
LD a2, 5 * SIZE(X)
152212
LD b2, 5 * SIZE(Y)
153213

214+
#ifdef DSDOT
215+
cvt.d.s a3, a3
216+
cvt.d.s b3, b3
217+
madd.d s1, s1, a3, b3
218+
#else
154219
MADD s1, s1, a3, b3
220+
#endif
155221
LD a3, 6 * SIZE(X)
156222
LD b3, 6 * SIZE(Y)
157223

224+
#ifdef DSDOT
225+
cvt.d.s a4, a4
226+
cvt.d.s b4, b4
227+
madd.d s2, s2, a4, b4
228+
#else
158229
MADD s2, s2, a4, b4
230+
#endif
159231
LD a4, 7 * SIZE(X)
160232
LD b4, 7 * SIZE(Y)
161233

234+
#ifdef DSDOT
235+
cvt.d.s a1, a1
236+
cvt.d.s b1, b1
237+
madd.d s1, s1, a1, b1
238+
#else
162239
MADD s1, s1, a1, b1
240+
#endif
163241
daddiu X, X, 8 * SIZE
242+
#ifdef DSDOT
243+
cvt.d.s a2, a2
244+
cvt.d.s b2, b2
245+
madd.d s2, s2, a2, b2
246+
#else
164247
MADD s2, s2, a2, b2
248+
#endif
165249
daddiu Y, Y, 8 * SIZE
166250

251+
#ifdef DSDOT
252+
cvt.d.s a3, a3
253+
cvt.d.s b3, b3
254+
madd.d s1, s1, a3, b3
255+
#else
167256
MADD s1, s1, a3, b3
257+
#endif
258+
#ifdef DSDOT
259+
cvt.d.s a4, a4
260+
cvt.d.s b4, b4
261+
madd.d s2, s2, a4, b4
262+
#else
168263
MADD s2, s2, a4, b4
264+
#endif
169265
.align 3
170266

171267
.L15:
@@ -179,8 +275,13 @@
179275
LD a1, 0 * SIZE(X)
180276
LD b1, 0 * SIZE(Y)
181277

278+
#ifdef DSDOT
279+
cvt.d.s a1, a1
280+
cvt.d.s b1, b1
281+
madd.d s1, s1, a1, b1
282+
#else
182283
MADD s1, s1, a1, b1
183-
284+
#endif
184285
daddiu I, I, -1
185286

186287
daddiu X, X, SIZE
@@ -225,50 +326,85 @@
225326
LD b1, 0 * SIZE(Y)
226327
dadd Y, Y, INCY
227328

329+
#ifdef DSDOT
330+
cvt.d.s a1, a1
331+
cvt.d.s b1, b1
332+
madd.d s1, s1, a1, b1
333+
#else
228334
MADD s1, s1, a1, b1
229-
335+
#endif
230336
LD a1, 0 * SIZE(X)
231337
dadd X, X, INCX
232338
LD b1, 0 * SIZE(Y)
233339
dadd Y, Y, INCY
234340

341+
#ifdef DSDOT
342+
cvt.d.s a1, a1
343+
cvt.d.s b1, b1
344+
madd.d s2, s2, a1, b1
345+
#else
235346
MADD s2, s2, a1, b1
236-
347+
#endif
237348
LD a1, 0 * SIZE(X)
238349
dadd X, X, INCX
239350
LD b1, 0 * SIZE(Y)
240351
dadd Y, Y, INCY
241352

353+
#ifdef DSDOT
354+
cvt.d.s a1, a1
355+
cvt.d.s b1, b1
356+
madd.d s1, s1, a1, b1
357+
#else
242358
MADD s1, s1, a1, b1
243-
359+
#endif
244360
LD a1, 0 * SIZE(X)
245361
dadd X, X, INCX
246362
LD b1, 0 * SIZE(Y)
247363
dadd Y, Y, INCY
248364

365+
#ifdef DSDOT
366+
cvt.d.s a1, a1
367+
cvt.d.s b1, b1
368+
madd.d s2, s2, a1, b1
369+
#else
249370
MADD s2, s2, a1, b1
250-
371+
#endif
251372
LD a1, 0 * SIZE(X)
252373
dadd X, X, INCX
253374
LD b1, 0 * SIZE(Y)
254375
dadd Y, Y, INCY
255376

377+
#ifdef DSDOT
378+
cvt.d.s a1, a1
379+
cvt.d.s b1, b1
380+
madd.d s1, s1, a1, b1
381+
#else
256382
MADD s1, s1, a1, b1
257-
383+
#endif
258384
LD a1, 0 * SIZE(X)
259385
dadd X, X, INCX
260386
LD b1, 0 * SIZE(Y)
261387
dadd Y, Y, INCY
262388

389+
#ifdef DSDOT
390+
cvt.d.s a1, a1
391+
cvt.d.s b1, b1
392+
madd.d s2, s2, a1, b1
393+
#else
263394
MADD s2, s2, a1, b1
264-
395+
#endif
265396
LD a1, 0 * SIZE(X)
266397
dadd X, X, INCX
267398
LD b1, 0 * SIZE(Y)
268399
dadd Y, Y, INCY
269400

401+
#ifdef DSDOT
402+
cvt.d.s a1, a1
403+
cvt.d.s b1, b1
404+
madd.d s1, s1, a1, b1
405+
#else
270406
MADD s1, s1, a1, b1
271-
407+
#endif
272408
LD a1, 0 * SIZE(X)
273409
dadd X, X, INCX
274410
LD b1, 0 * SIZE(Y)
@@ -277,7 +413,13 @@
277413
daddiu I, I, -1
278414

279415
bgtz I, .L23
416+
#ifdef DSDOT
417+
cvt.d.s a1, a1
418+
cvt.d.s b1, b1
419+
madd.d s2, s2, a1, b1
420+
#else
280421
MADD s2, s2, a1, b1
422+
#endif
281423
.align 3
282424

283425
.L25:
@@ -296,13 +438,20 @@
296438
daddiu I, I, -1
297439

298440
bgtz I, .L26
441+
#ifdef DSDOT
442+
cvt.d.s a1, a1
443+
cvt.d.s b1, b1
444+
madd.d s1, s1, a1, b1
445+
#else
299446
MADD s1, s1, a1, b1
447+
#endif
300448
.align 3
301449

302450
.L999:
303-
ADD s1, s1, s2
304451
#ifdef DSDOT
305-
cvt.d.s s1, s1
452+
add.d s1, s1, s2
453+
#else
454+
ADD s1, s1, s2
306455
#endif
307456
j $31
308457
NOP

0 commit comments

Comments
 (0)