Skip to content

Commit 688fa92

Browse files
authored
Add MIPS64 implementation of ?sum
as trivial copy of ?asum with the fabs replaced by mov to preserve code structure
1 parent cdbe0f0 commit 688fa92

File tree

2 files changed

+536
-0
lines changed

2 files changed

+536
-0
lines changed

kernel/mips64/sum.S

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
42+
#define N $4
43+
#define X $5
44+
#define INCX $6
45+
46+
#define I $2
47+
#define TEMP $3
48+
49+
#define a1 $f2
50+
#define a2 $f3
51+
#define a3 $f4
52+
#define a4 $f5
53+
#define a5 $f6
54+
#define a6 $f7
55+
#define a7 $f8
56+
#define a8 $f9
57+
58+
#define t1 $f10
59+
#define t2 $f11
60+
#define t3 $f12
61+
#define t4 $f13
62+
63+
#define s1 $f0
64+
#define s2 $f1
65+
66+
PROLOGUE
67+
68+
#ifdef F_INTERFACE
69+
LDINT N, 0(N)
70+
LDINT INCX, 0(INCX)
71+
#endif
72+
73+
MTC $0, s1
74+
75+
MTC $0, s2
76+
dsll INCX, INCX, BASE_SHIFT
77+
78+
blez N, .L999
79+
li TEMP, SIZE
80+
81+
bne INCX, TEMP, .L20
82+
dsra I, N, 3
83+
84+
blez I, .L15
85+
NOP
86+
87+
LD a1, 0 * SIZE(X)
88+
LD a2, 1 * SIZE(X)
89+
LD a3, 2 * SIZE(X)
90+
LD a4, 3 * SIZE(X)
91+
92+
LD a5, 4 * SIZE(X)
93+
MOV t1, a1
94+
LD a6, 5 * SIZE(X)
95+
MOV t2, a2
96+
LD a7, 6 * SIZE(X)
97+
MOV t3, a3
98+
99+
MOV t4, a4
100+
daddiu I, I, -1
101+
102+
blez I, .L13
103+
LD a8, 7 * SIZE(X)
104+
.align 3
105+
106+
.L12:
107+
ADD s1, s1, t1
108+
LD a1, 8 * SIZE(X)
109+
110+
MOV t1, a5
111+
daddiu I, I, -1
112+
113+
ADD s2, s2, t2
114+
LD a2, 9 * SIZE(X)
115+
116+
MOV t2, a6
117+
NOP
118+
119+
ADD s1, s1, t3
120+
LD a3, 10 * SIZE(X)
121+
122+
MOV t3, a7
123+
NOP
124+
125+
ADD s2, s2, t4
126+
LD a4, 11 * SIZE(X)
127+
128+
MOV t4, a8
129+
daddiu X, X, 8 * SIZE
130+
131+
ADD s1, s1, t1
132+
LD a5, 4 * SIZE(X)
133+
134+
MOV t1, a1
135+
NOP
136+
137+
ADD s2, s2, t2
138+
LD a6, 5 * SIZE(X)
139+
140+
MOV t2, a2
141+
NOP
142+
143+
ADD s1, s1, t3
144+
LD a7, 6 * SIZE(X)
145+
146+
MOV t3, a3
147+
NOP
148+
149+
ADD s2, s2, t4
150+
LD a8, 7 * SIZE(X)
151+
152+
bgtz I, .L12
153+
MOV t4, a4
154+
.align 3
155+
156+
.L13:
157+
ADD s1, s1, t1
158+
daddiu X, X, 8 * SIZE
159+
160+
MOV t1, a5
161+
NOP
162+
163+
ADD s2, s2, t2
164+
MOV t2, a6
165+
166+
ADD s1, s1, t3
167+
MOV t3, a7
168+
169+
ADD s2, s2, t4
170+
MOV t4, a8
171+
172+
ADD s1, s1, t1
173+
ADD s2, s2, t2
174+
ADD s1, s1, t3
175+
ADD s2, s2, t4
176+
.align 3
177+
178+
.L15:
179+
andi I, N, 7
180+
181+
blez I, .L999
182+
NOP
183+
.align 3
184+
185+
.L16:
186+
LD a1, 0 * SIZE(X)
187+
daddiu I, I, -1
188+
189+
MOV t1, a1
190+
191+
ADD s1, s1, t1
192+
193+
bgtz I, .L16
194+
daddiu X, X, SIZE
195+
196+
j .L999
197+
NOP
198+
.align 3
199+
200+
.L20:
201+
blez I, .L25
202+
NOP
203+
204+
LD a1, 0 * SIZE(X)
205+
daddu X, X, INCX
206+
207+
LD a2, 0 * SIZE(X)
208+
daddu X, X, INCX
209+
210+
LD a3, 0 * SIZE(X)
211+
daddu X, X, INCX
212+
213+
LD a4, 0 * SIZE(X)
214+
daddu X, X, INCX
215+
216+
LD a5, 0 * SIZE(X)
217+
daddu X, X, INCX
218+
219+
LD a6, 0 * SIZE(X)
220+
daddu X, X, INCX
221+
222+
MOV t1, a1
223+
LD a7, 0 * SIZE(X)
224+
225+
MOV t2, a2
226+
daddu X, X, INCX
227+
228+
MOV t3, a3
229+
LD a8, 0 * SIZE(X)
230+
231+
MOV t4, a4
232+
daddiu I, I, -1
233+
234+
blez I, .L24
235+
daddu X, X, INCX
236+
.align 3
237+
238+
.L23:
239+
ADD s1, s1, t1
240+
LD a1, 0 * SIZE(X)
241+
242+
MOV t1, a5
243+
daddu X, X, INCX
244+
245+
ADD s2, s2, t2
246+
LD a2, 0 * SIZE(X)
247+
248+
MOV t2, a6
249+
daddu X, X, INCX
250+
251+
ADD s1, s1, t3
252+
LD a3, 0 * SIZE(X)
253+
254+
MOV t3, a7
255+
daddu X, X, INCX
256+
257+
ADD s2, s2, t4
258+
LD a4, 0 * SIZE(X)
259+
260+
MOV t4, a8
261+
daddu X, X, INCX
262+
263+
ADD s1, s1, t1
264+
LD a5, 0 * SIZE(X)
265+
266+
MOV t1, a1
267+
daddu X, X, INCX
268+
269+
ADD s2, s2, t2
270+
LD a6, 0 * SIZE(X)
271+
272+
MOV t2, a2
273+
daddu X, X, INCX
274+
275+
ADD s1, s1, t3
276+
LD a7, 0 * SIZE(X)
277+
278+
MOV t3, a3
279+
daddu X, X, INCX
280+
281+
ADD s2, s2, t4
282+
LD a8, 0 * SIZE(X)
283+
284+
MOV t4, a4
285+
daddiu I, I, -1
286+
287+
bgtz I, .L23
288+
daddu X, X, INCX
289+
.align 3
290+
291+
.L24:
292+
ADD s1, s1, t1
293+
MOV t1, a5
294+
295+
ADD s2, s2, t2
296+
MOV t2, a6
297+
298+
ADD s1, s1, t3
299+
MOV t3, a7
300+
301+
ADD s2, s2, t4
302+
MOV t4, a8
303+
304+
ADD s1, s1, t1
305+
ADD s2, s2, t2
306+
ADD s1, s1, t3
307+
ADD s2, s2, t4
308+
.align 3
309+
310+
.L25:
311+
andi I, N, 7
312+
313+
blez I, .L999
314+
NOP
315+
.align 3
316+
317+
.L26:
318+
LD a1, 0 * SIZE(X)
319+
daddiu I, I, -1
320+
321+
MOV t1, a1
322+
daddu X, X, INCX
323+
324+
bgtz I, .L26
325+
ADD s1, s1, t1
326+
.align 3
327+
328+
.L999:
329+
j $31
330+
ADD s1, s1, s2
331+
332+
EPILOGUE

0 commit comments

Comments
 (0)