Skip to content

Commit c3cfc69

Browse files
authored
Add implementations of ssum/dsum and csum/zsum
as trivial copies of asum/zsasum with the fabs calls replaced by fmov to preserve code structure
1 parent b9f4943 commit c3cfc69

File tree

2 files changed

+414
-0
lines changed

2 files changed

+414
-0
lines changed

kernel/alpha/sum.S

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
#include "version.h"
42+
43+
#define PREFETCHSIZE 88
44+
45+
#define N $16
46+
#define X $17
47+
#define INCX $18
48+
#define I $19
49+
50+
#define s0 $f0
51+
#define s1 $f1
52+
#define s2 $f10
53+
#define s3 $f11
54+
55+
#define a0 $f12
56+
#define a1 $f13
57+
#define a2 $f14
58+
#define a3 $f15
59+
#define a4 $f16
60+
#define a5 $f17
61+
#define a6 $f18
62+
#define a7 $f19
63+
64+
#define t0 $f20
65+
#define t1 $f21
66+
#define t2 $f22
67+
#define t3 $f23
68+
69+
PROLOGUE
70+
PROFCODE
71+
72+
fclr s0
73+
unop
74+
fclr t0
75+
ble N, $L999
76+
77+
sra N, 3, I
78+
fclr s1
79+
fclr s2
80+
ble I, $L15
81+
82+
LD a0, 0 * SIZE(X)
83+
fclr t1
84+
SXADDQ INCX, X, X
85+
fclr t2
86+
87+
LD a1, 0 * SIZE(X)
88+
fclr t3
89+
SXADDQ INCX, X, X
90+
fclr s3
91+
92+
LD a2, 0 * SIZE(X)
93+
SXADDQ INCX, X, X
94+
LD a3, 0 * SIZE(X)
95+
SXADDQ INCX, X, X
96+
97+
LD a4, 0 * SIZE(X)
98+
SXADDQ INCX, X, X
99+
LD a5, 0 * SIZE(X)
100+
SXADDQ INCX, X, X
101+
102+
lda I, -1(I)
103+
ble I, $L13
104+
.align 4
105+
106+
$L12:
107+
ADD s0, t0, s0
108+
ldl $31, PREFETCHSIZE * 2 * SIZE(X)
109+
fmov a0, t0
110+
lda I, -1(I)
111+
112+
ADD s1, t1, s1
113+
LD a6, 0 * SIZE(X)
114+
fmov a1, t1
115+
SXADDQ INCX, X, X
116+
117+
ADD s2, t2, s2
118+
LD a7, 0 * SIZE(X)
119+
fmov a2, t2
120+
SXADDQ INCX, X, X
121+
122+
ADD s3, t3, s3
123+
LD a0, 0 * SIZE(X)
124+
fmov a3, t3
125+
SXADDQ INCX, X, X
126+
127+
ADD s0, t0, s0
128+
LD a1, 0 * SIZE(X)
129+
fmov a4, t0
130+
SXADDQ INCX, X, X
131+
132+
ADD s1, t1, s1
133+
LD a2, 0 * SIZE(X)
134+
fmov a5, t1
135+
SXADDQ INCX, X, X
136+
137+
ADD s2, t2, s2
138+
LD a3, 0 * SIZE(X)
139+
fmov a6, t2
140+
SXADDQ INCX, X, X
141+
142+
ADD s3, t3, s3
143+
LD a4, 0 * SIZE(X)
144+
fmov a7, t3
145+
SXADDQ INCX, X, X
146+
147+
LD a5, 0 * SIZE(X)
148+
unop
149+
SXADDQ INCX, X, X
150+
bne I, $L12
151+
.align 4
152+
153+
$L13:
154+
ADD s0, t0, s0
155+
LD a6, 0 * SIZE(X)
156+
fmov a0, t0
157+
SXADDQ INCX, X, X
158+
159+
ADD s1, t1, s1
160+
LD a7, 0 * SIZE(X)
161+
fmov a1, t1
162+
SXADDQ INCX, X, X
163+
164+
ADD s2, t2, s2
165+
fmov a2, t2
166+
ADD s3, t3, s3
167+
fmov a3, t3
168+
169+
ADD s0, t0, s0
170+
fmov a4, t0
171+
ADD s1, t1, s1
172+
fmov a5, t1
173+
ADD s2, t2, s2
174+
fmov a6, t2
175+
ADD s3, t3, s3
176+
fmov a7, t3
177+
178+
ADD s1, t1, s1
179+
ADD s2, t2, s2
180+
ADD s3, t3, s3
181+
182+
ADD s0, s1, s0
183+
ADD s2, s3, s2
184+
.align 4
185+
186+
$L15:
187+
and N, 7, I
188+
ADD s0, s2, s0
189+
unop
190+
ble I, $L999
191+
.align 4
192+
193+
$L17:
194+
ADD s0, t0, s0
195+
LD a0, 0 * SIZE(X)
196+
SXADDQ INCX, X, X
197+
fmov a0, t0
198+
199+
lda I, -1(I)
200+
bne I, $L17
201+
.align 4
202+
203+
$L999:
204+
ADD s0, t0, s0
205+
ret
206+
EPILOGUE

0 commit comments

Comments
 (0)