@@ -58,6 +58,7 @@ extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n
58
58
#define CUR_MAXINV "d8"
59
59
#define CUR_MAXINV_V "v8.2d"
60
60
#define CUR_MAX_V "v8.2d"
61
+ #define REGINF "d9"
61
62
62
63
static void nrm2_compute (BLASLONG n , FLOAT * x , BLASLONG inc_x ,
63
64
double * ssq , double * scale )
@@ -79,8 +80,10 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
79
80
" ble 9f //nrm2_kernel_L999 \n"
80
81
81
82
"1: //nrm2_kernel_F_BEGIN: \n"
83
+ " mov x6, #0x7FF0000000000000 //+Infinity \n"
82
84
" fmov " REGZERO ", xzr \n"
83
85
" fmov " REGONE ", #1.0 \n"
86
+ " fmov " REGINF ", x6 \n"
84
87
" lsl " INC_X ", " INC_X ", #" INC_SHIFT " \n"
85
88
" mov " J ", " N " \n"
86
89
" cmp " J ", xzr \n"
@@ -104,6 +107,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
104
107
" ldr d4, [" X "] \n"
105
108
" fabs d4, d4 \n"
106
109
" fmax " CUR_MAX ", " SCALE ", d4 \n"
110
+ " fcmp " CUR_MAX ", " REGINF " \n"
111
+ " beq 10f \n"
107
112
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
108
113
" fmul " SCALE ", " SCALE ", " SCALE " \n"
109
114
" fmul " SSQ ", " SSQ ", " SCALE " \n"
@@ -116,6 +121,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
116
121
" ldr d3, [" X ", #8] \n"
117
122
" fabs d3, d3 \n"
118
123
" fmax " CUR_MAX ", " SCALE ", d3 \n"
124
+ " fcmp " CUR_MAX ", " REGINF " \n"
125
+ " beq 10f \n"
119
126
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
120
127
" fmul " SCALE ", " SCALE ", " SCALE " \n"
121
128
" fmul " SSQ ", " SSQ ", " SCALE " \n"
@@ -158,6 +165,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
158
165
" fmaxp v24.2d, v24.2d, v26.2d \n"
159
166
" fmaxp v24.2d, v24.2d, v24.2d \n"
160
167
" fmax " CUR_MAX ", " SCALE ", d24 \n"
168
+ " fcmp " CUR_MAX ", " REGINF " \n"
169
+ " beq 10f \n"
161
170
" fdiv " CUR_MAXINV ", " REGONE ", " CUR_MAX " \n"
162
171
" //dup " CUR_MAX_V ", v7.d[0] \n"
163
172
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
@@ -217,6 +226,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
217
226
" fmaxp v24.2d, v24.2d, v26.2d \n"
218
227
" fmaxp v24.2d, v24.2d, v24.2d \n"
219
228
" fmax " CUR_MAX ", " SCALE ", d24 \n"
229
+ " fcmp " CUR_MAX ", " REGINF " \n"
230
+ " beq 10f \n"
220
231
" fdiv " CUR_MAXINV ", " REGONE ", " CUR_MAX " \n"
221
232
" //dup " CUR_MAX_V ", v7.d[0] \n"
222
233
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
@@ -265,6 +276,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
265
276
" ldr d4, [" X "] \n"
266
277
" fabs d4, d4 \n"
267
278
" fmax " CUR_MAX ", " SCALE ", d4 \n"
279
+ " fcmp " CUR_MAX ", " REGINF " \n"
280
+ " beq 10f \n"
268
281
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
269
282
" fmul " SCALE ", " SCALE ", " SCALE " \n"
270
283
" fmul " SSQ ", " SSQ ", " SCALE " \n"
@@ -276,6 +289,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
276
289
" ldr d3, [" X ", #8] \n"
277
290
" fabs d3, d3 \n"
278
291
" fmax " CUR_MAX ", " SCALE ", d3 \n"
292
+ " fcmp " CUR_MAX ", " REGINF " \n"
293
+ " beq 10f \n"
279
294
" fdiv " SCALE ", " SCALE ", " CUR_MAX " \n"
280
295
" fmul " SCALE ", " SCALE ", " SCALE " \n"
281
296
" fmul " SSQ ", " SSQ ", " SCALE " \n"
@@ -291,6 +306,11 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
291
306
"9: //nrm2_kernel_L999: \n"
292
307
" str " SSQ ", [%[SSQ_]] \n"
293
308
" str " SCALE ", [%[SCALE_]] \n"
309
+ " b 11f \n"
310
+ "10: \n"
311
+ " str " REGINF ", [%[SSQ_]] \n"
312
+ " str " REGINF ", [%[SCALE_]] \n"
313
+ "11: \n"
294
314
295
315
:
296
316
: [SSQ_ ] "r" (ssq ), //%0
@@ -300,7 +320,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
300
320
[INCX_ ] "r" (inc_x ) //%4
301
321
: "cc" ,
302
322
"memory" ,
303
- "x0" , "x1" , "x2" , "x3" , "x4" , "x5" ,
323
+ "x0" , "x1" , "x2" , "x3" , "x4" , "x5" , "x6" ,
304
324
"d0" , "d1" , "d2" , "d3" , "d4" , "d5" , "d6" , "d7" , "d8"
305
325
);
306
326
@@ -359,6 +379,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
359
379
cur_ssq = * ptr ;
360
380
cur_scale = * (ptr + 1 );
361
381
382
+ if (cur_ssq == INFINITY ) {
383
+ ssq = INFINITY ;
384
+ scale = INFINITY ;
385
+ break ;
386
+ }
387
+
362
388
if (cur_scale != 0 ) {
363
389
if (cur_scale > scale ) {
364
390
scale = (scale / cur_scale );
0 commit comments