|
22 | 22 |
|
23 | 23 | var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' ); |
24 | 24 | var floor = require( '@stdlib/math/base/special/floor' ); |
25 | | -var abs = require( '@stdlib/math/base/special/abs' ); |
| 25 | +var absf = require( '@stdlib/math/base/special/absf' ); |
26 | 26 |
|
27 | 27 |
|
28 | 28 | // VARIABLES // |
@@ -78,47 +78,47 @@ function sasumpw( N, x, strideX, offsetX ) { |
78 | 78 | } |
79 | 79 | ix = offsetX; |
80 | 80 | if ( strideX === 0 ) { |
81 | | - return float64ToFloat32( N * abs( x[ ix ] ) ); |
| 81 | + return float64ToFloat32( N * absf( x[ ix ] ) ); |
82 | 82 | } |
83 | 83 | if ( N < 8 ) { |
84 | 84 | // Use simple summation... |
85 | 85 | s = 0.0; |
86 | 86 | for ( i = 0; i < N; i++ ) { |
87 | | - s = float64ToFloat32( s + abs( x[ ix ] ) ); |
| 87 | + s = float64ToFloat32( s + absf( x[ ix ] ) ); |
88 | 88 | ix += strideX; |
89 | 89 | } |
90 | 90 | return s; |
91 | 91 | } |
92 | 92 | if ( N <= BLOCKSIZE ) { |
93 | 93 | // Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)... |
94 | | - s0 = abs( x[ ix ] ); |
95 | | - s1 = abs( x[ ix+strideX ] ); |
96 | | - s2 = abs( x[ ix+(2*strideX) ] ); |
97 | | - s3 = abs( x[ ix+(3*strideX) ] ); |
98 | | - s4 = abs( x[ ix+(4*strideX) ] ); |
99 | | - s5 = abs( x[ ix+(5*strideX) ] ); |
100 | | - s6 = abs( x[ ix+(6*strideX) ] ); |
101 | | - s7 = abs( x[ ix+(7*strideX) ] ); |
| 94 | + s0 = absf( x[ ix ] ); |
| 95 | + s1 = absf( x[ ix+strideX ] ); |
| 96 | + s2 = absf( x[ ix+(2*strideX) ] ); |
| 97 | + s3 = absf( x[ ix+(3*strideX) ] ); |
| 98 | + s4 = absf( x[ ix+(4*strideX) ] ); |
| 99 | + s5 = absf( x[ ix+(5*strideX) ] ); |
| 100 | + s6 = absf( x[ ix+(6*strideX) ] ); |
| 101 | + s7 = absf( x[ ix+(7*strideX) ] ); |
102 | 102 | ix += 8 * strideX; |
103 | 103 |
|
104 | 104 | M = N % 8; |
105 | 105 | for ( i = 8; i < N-M; i += 8 ) { |
106 | | - s0 = float64ToFloat32( s0 + abs( x[ ix ] ) ); |
107 | | - s1 = float64ToFloat32( s1 + abs( x[ ix+strideX ] ) ); |
108 | | - s2 = float64ToFloat32( s2 + abs( x[ ix+(2*strideX) ] ) ); |
109 | | - s3 = float64ToFloat32( s3 + abs( x[ ix+(3*strideX) ] ) ); |
110 | | - s4 = float64ToFloat32( s4 + abs( x[ ix+(4*strideX) ] ) ); |
111 | | - s5 = float64ToFloat32( s5 + abs( x[ ix+(5*strideX) ] ) ); |
112 | | - s6 = float64ToFloat32( s6 + abs( x[ ix+(6*strideX) ] ) ); |
113 | | - s7 = float64ToFloat32( s7 + abs( x[ ix+(7*strideX) ] ) ); |
| 106 | + s0 = float64ToFloat32( s0 + absf( x[ ix ] ) ); |
| 107 | + s1 = float64ToFloat32( s1 + absf( x[ ix+strideX ] ) ); |
| 108 | + s2 = float64ToFloat32( s2 + absf( x[ ix+(2*strideX) ] ) ); |
| 109 | + s3 = float64ToFloat32( s3 + absf( x[ ix+(3*strideX) ] ) ); |
| 110 | + s4 = float64ToFloat32( s4 + absf( x[ ix+(4*strideX) ] ) ); |
| 111 | + s5 = float64ToFloat32( s5 + absf( x[ ix+(5*strideX) ] ) ); |
| 112 | + s6 = float64ToFloat32( s6 + absf( x[ ix+(6*strideX) ] ) ); |
| 113 | + s7 = float64ToFloat32( s7 + absf( x[ ix+(7*strideX) ] ) ); |
114 | 114 | ix += 8 * strideX; |
115 | 115 | } |
116 | 116 | // Pairwise sum the accumulators: |
117 | 117 | s = float64ToFloat32( float64ToFloat32( float64ToFloat32(s0+s1) + float64ToFloat32(s2+s3) ) + float64ToFloat32( float64ToFloat32(s4+s5) + float64ToFloat32(s6+s7) ) ); // eslint-disable-line max-len |
118 | 118 |
|
119 | 119 | // Clean-up loop... |
120 | 120 | for ( i; i < N; i++ ) { |
121 | | - s = float64ToFloat32( s + abs( x[ ix ] ) ); |
| 121 | + s = float64ToFloat32( s + absf( x[ ix ] ) ); |
122 | 122 | ix += strideX; |
123 | 123 | } |
124 | 124 | return s; |
|
0 commit comments