Skip to content

Commit b111829

Browse files
authored
[ZARCH] Update max/min functions
1 parent b815a04 commit b111829

24 files changed

+805
-1117
lines changed

kernel/zarch/camax.c

Lines changed: 74 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x)
5555
"srlg %%r0,%1,5 \n\t"
5656
"xgr %%r1,%%r1 \n\t"
5757
"0: \n\t"
58-
"pfd 1, 1024(%2) \n\t"
58+
"pfd 1, 1024(%%r1,%2) \n\t"
5959

6060
"vlef %%v16,0(%%r1,%2),0 \n\t"
6161
"vlef %%v17,4(%%r1,%2),0 \n\t"
@@ -93,100 +93,88 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x)
9393
"vlef %%v22,120(%%r1,%2),3 \n\t"
9494
"vlef %%v23,124(%%r1,%2),3 \n\t"
9595

96-
"vflpsb %%v16, %%v16 \n\t"
97-
"vflpsb %%v17, %%v17 \n\t"
98-
"vflpsb %%v18, %%v18 \n\t"
99-
"vflpsb %%v19, %%v19 \n\t"
100-
"vflpsb %%v20, %%v20 \n\t"
101-
"vflpsb %%v21, %%v21 \n\t"
102-
"vflpsb %%v22, %%v22 \n\t"
103-
"vflpsb %%v23, %%v23 \n\t"
104-
"vfasb %%v16,%%v16,%%v17 \n\t"
105-
"vfasb %%v17,%%v18,%%v19 \n\t"
106-
"vfasb %%v18,%%v20,%%v21 \n\t"
107-
"vfasb %%v19,%%v22,%%v23 \n\t"
108-
109-
"vfchsb %%v24,%%v16,%%v17 \n\t"
110-
"vfchsb %%v25,%%v18,%%v19 \n\t"
111-
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
112-
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
113-
114-
"vfchsb %%v26,%%v24,%%v25 \n\t"
115-
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
116-
117-
"vfchsb %%v27,%%v26,%%v0 \n\t"
118-
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
119-
120-
"vlef %%v16,128(%%r1,%2),0 \n\t"
121-
"vlef %%v17,132(%%r1,%2),0 \n\t"
122-
"vlef %%v16,136(%%r1,%2),1 \n\t"
123-
"vlef %%v17,140(%%r1,%2),1 \n\t"
124-
"vlef %%v16,144(%%r1,%2),2 \n\t"
125-
"vlef %%v17,148(%%r1,%2),2 \n\t"
126-
"vlef %%v16,152(%%r1,%2),3 \n\t"
127-
"vlef %%v17,156(%%r1,%2),3 \n\t"
128-
129-
"vlef %%v18,160(%%r1,%2),0 \n\t"
130-
"vlef %%v19,164(%%r1,%2),0 \n\t"
131-
"vlef %%v18,168(%%r1,%2),1 \n\t"
132-
"vlef %%v19,172(%%r1,%2),1 \n\t"
133-
"vlef %%v18,176(%%r1,%2),2 \n\t"
134-
"vlef %%v19,180(%%r1,%2),2 \n\t"
135-
"vlef %%v18,184(%%r1,%2),3 \n\t"
136-
"vlef %%v19,188(%%r1,%2),3 \n\t"
137-
138-
"vlef %%v20,192(%%r1,%2),0 \n\t"
139-
"vlef %%v21,196(%%r1,%2),0 \n\t"
140-
"vlef %%v20,200(%%r1,%2),1 \n\t"
141-
"vlef %%v21,204(%%r1,%2),1 \n\t"
142-
"vlef %%v20,208(%%r1,%2),2 \n\t"
143-
"vlef %%v21,212(%%r1,%2),2 \n\t"
144-
"vlef %%v20,216(%%r1,%2),3 \n\t"
145-
"vlef %%v21,220(%%r1,%2),3 \n\t"
146-
147-
"vlef %%v22,224(%%r1,%2),0 \n\t"
148-
"vlef %%v23,228(%%r1,%2),0 \n\t"
149-
"vlef %%v22,232(%%r1,%2),1 \n\t"
150-
"vlef %%v23,236(%%r1,%2),1 \n\t"
151-
"vlef %%v22,240(%%r1,%2),2 \n\t"
152-
"vlef %%v23,244(%%r1,%2),2 \n\t"
153-
"vlef %%v22,248(%%r1,%2),3 \n\t"
154-
"vlef %%v23,252(%%r1,%2),3 \n\t"
155-
156-
"vflpsb %%v16, %%v16 \n\t"
157-
"vflpsb %%v17, %%v17 \n\t"
158-
"vflpsb %%v18, %%v18 \n\t"
159-
"vflpsb %%v19, %%v19 \n\t"
160-
"vflpsb %%v20, %%v20 \n\t"
161-
"vflpsb %%v21, %%v21 \n\t"
162-
"vflpsb %%v22, %%v22 \n\t"
163-
"vflpsb %%v23, %%v23 \n\t"
96+
"vlef %%v24,128(%%r1,%2),0 \n\t"
97+
"vlef %%v25,132(%%r1,%2),0 \n\t"
98+
"vlef %%v24,136(%%r1,%2),1 \n\t"
99+
"vlef %%v25,140(%%r1,%2),1 \n\t"
100+
"vlef %%v24,144(%%r1,%2),2 \n\t"
101+
"vlef %%v25,148(%%r1,%2),2 \n\t"
102+
"vlef %%v24,152(%%r1,%2),3 \n\t"
103+
"vlef %%v25,156(%%r1,%2),3 \n\t"
104+
105+
"vlef %%v26,160(%%r1,%2),0 \n\t"
106+
"vlef %%v27,164(%%r1,%2),0 \n\t"
107+
"vlef %%v26,168(%%r1,%2),1 \n\t"
108+
"vlef %%v27,172(%%r1,%2),1 \n\t"
109+
"vlef %%v26,176(%%r1,%2),2 \n\t"
110+
"vlef %%v27,180(%%r1,%2),2 \n\t"
111+
"vlef %%v26,184(%%r1,%2),3 \n\t"
112+
"vlef %%v27,188(%%r1,%2),3 \n\t"
113+
114+
"vlef %%v28,192(%%r1,%2),0 \n\t"
115+
"vlef %%v29,196(%%r1,%2),0 \n\t"
116+
"vlef %%v28,200(%%r1,%2),1 \n\t"
117+
"vlef %%v29,204(%%r1,%2),1 \n\t"
118+
"vlef %%v28,208(%%r1,%2),2 \n\t"
119+
"vlef %%v29,212(%%r1,%2),2 \n\t"
120+
"vlef %%v28,216(%%r1,%2),3 \n\t"
121+
"vlef %%v29,220(%%r1,%2),3 \n\t"
122+
123+
"vlef %%v30,224(%%r1,%2),0 \n\t"
124+
"vlef %%v31,228(%%r1,%2),0 \n\t"
125+
"vlef %%v30,232(%%r1,%2),1 \n\t"
126+
"vlef %%v31,236(%%r1,%2),1 \n\t"
127+
"vlef %%v30,240(%%r1,%2),2 \n\t"
128+
"vlef %%v31,244(%%r1,%2),2 \n\t"
129+
"vlef %%v30,248(%%r1,%2),3 \n\t"
130+
"vlef %%v31,252(%%r1,%2),3 \n\t"
131+
132+
"vflpsb %%v16,%%v16 \n\t"
133+
"vflpsb %%v17,%%v17 \n\t"
134+
"vflpsb %%v18,%%v18 \n\t"
135+
"vflpsb %%v19,%%v19 \n\t"
136+
"vflpsb %%v20,%%v20 \n\t"
137+
"vflpsb %%v21,%%v21 \n\t"
138+
"vflpsb %%v22,%%v22 \n\t"
139+
"vflpsb %%v23,%%v23 \n\t"
140+
"vflpsb %%v24,%%v24 \n\t"
141+
"vflpsb %%v25,%%v25 \n\t"
142+
"vflpsb %%v26,%%v26 \n\t"
143+
"vflpsb %%v27,%%v27 \n\t"
144+
"vflpsb %%v28,%%v28 \n\t"
145+
"vflpsb %%v29,%%v29 \n\t"
146+
"vflpsb %%v30,%%v30 \n\t"
147+
"vflpsb %%v31,%%v31 \n\t"
148+
164149
"vfasb %%v16,%%v16,%%v17 \n\t"
165-
"vfasb %%v17,%%v18,%%v19 \n\t"
166-
"vfasb %%v18,%%v20,%%v21 \n\t"
167-
"vfasb %%v19,%%v22,%%v23 \n\t"
150+
"vfasb %%v18,%%v18,%%v19 \n\t"
151+
"vfasb %%v20,%%v20,%%v21 \n\t"
152+
"vfasb %%v22,%%v22,%%v23 \n\t"
153+
"vfasb %%v24,%%v24,%%v25 \n\t"
154+
"vfasb %%v26,%%v26,%%v27 \n\t"
155+
"vfasb %%v28,%%v28,%%v29 \n\t"
156+
"vfasb %%v30,%%v30,%%v31 \n\t"
168157

169-
"vfchsb %%v24,%%v16,%%v17 \n\t"
170-
"vfchsb %%v25,%%v18,%%v19 \n\t"
171-
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
172-
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
158+
"vfmaxsb %%v16,%%v16,%%v24,0 \n\t"
159+
"vfmaxsb %%v18,%%v18,%%v26,0 \n\t"
160+
"vfmaxsb %%v20,%%v20,%%v28,0 \n\t"
161+
"vfmaxsb %%v22,%%v22,%%v30,0 \n\t"
162+
163+
"vfmaxsb %%v16,%%v16,%%v20,0 \n\t"
164+
"vfmaxsb %%v18,%%v18,%%v22,0 \n\t"
173165

174-
"vfchsb %%v26,%%v24,%%v25 \n\t"
175-
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
166+
"vfmaxsb %%v16,%%v16,%%v18,0 \n\t"
176167

177-
"vfchsb %%v27,%%v26,%%v0 \n\t"
178-
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
168+
"vfmaxsb %%v0,%%v0,%%v16,0 \n\t"
179169

180170
"agfi %%r1, 256 \n\t"
181171
"brctg %%r0, 0b \n\t"
182172

183173
"veslg %%v16,%%v0,32 \n\t"
184-
"vfchsb %%v17,%%v16,%%v0 \n\t"
185-
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
174+
"vfmaxsb %%v0,%%v0,%%v16,0 \n\t"
186175

187176
"vrepf %%v16,%%v0,2 \n\t"
188-
"wfchsb %%v17,%%v16,%%v0 \n\t"
189-
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
177+
"wfmaxsb %%v0,%%v0,%%v16,0 \n\t"
190178
"ler %0,%%f0 "
191179
:"=f"(amax)
192180
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@@ -233,11 +221,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
233221

234222
maxf=CABS1(x,0);
235223
inc_x2 = 2 * inc_x;
236-
ix += inc_x2;
237-
i++;
238224

239-
BLASLONG n1 = (n - 1) & -4;
240-
while ((i - 1) < n1) {
225+
BLASLONG n1 = n & -4;
226+
while (i < n1) {
241227

242228
if (CABS1(x,ix) > maxf) {
243229
maxf = CABS1(x,ix);

0 commit comments

Comments
 (0)