20
20
21
21
// MODULES //
22
22
23
- var float64ToFloat32 = require ( '@stdlib/number/float64/base/to-float32' ) ;
23
+ var f32 = require ( '@stdlib/number/float64/base/to-float32' ) ;
24
24
var floor = require ( '@stdlib/math/base/special/floor' ) ;
25
25
26
26
@@ -81,9 +81,10 @@ function ssumpw( N, x, strideX, offsetX ) {
81
81
}
82
82
if ( N < 8 ) {
83
83
// Use simple summation...
84
- s = 0.0 ;
85
- for ( i = 0 ; i < N ; i ++ ) {
86
- s = float64ToFloat32 ( s + x [ ix ] ) ;
84
+ s = x [ ix ] ;
85
+ ix += strideX ;
86
+ for ( i = 1 ; i < N ; i ++ ) {
87
+ s = f32 ( s + x [ ix ] ) ;
87
88
ix += strideX ;
88
89
}
89
90
return s ;
@@ -102,30 +103,30 @@ function ssumpw( N, x, strideX, offsetX ) {
102
103
103
104
M = N % 8 ;
104
105
for ( i = 8 ; i < N - M ; i += 8 ) {
105
- s0 = float64ToFloat32 ( s0 + x [ ix ] ) ;
106
- s1 = float64ToFloat32 ( s1 + x [ ix + strideX ] ) ;
107
- s2 = float64ToFloat32 ( s2 + x [ ix + ( 2 * strideX ) ] ) ;
108
- s3 = float64ToFloat32 ( s3 + x [ ix + ( 3 * strideX ) ] ) ;
109
- s4 = float64ToFloat32 ( s4 + x [ ix + ( 4 * strideX ) ] ) ;
110
- s5 = float64ToFloat32 ( s5 + x [ ix + ( 5 * strideX ) ] ) ;
111
- s6 = float64ToFloat32 ( s6 + x [ ix + ( 6 * strideX ) ] ) ;
112
- s7 = float64ToFloat32 ( s7 + x [ ix + ( 7 * strideX ) ] ) ;
106
+ s0 = f32 ( s0 + x [ ix ] ) ;
107
+ s1 = f32 ( s1 + x [ ix + strideX ] ) ;
108
+ s2 = f32 ( s2 + x [ ix + ( 2 * strideX ) ] ) ;
109
+ s3 = f32 ( s3 + x [ ix + ( 3 * strideX ) ] ) ;
110
+ s4 = f32 ( s4 + x [ ix + ( 4 * strideX ) ] ) ;
111
+ s5 = f32 ( s5 + x [ ix + ( 5 * strideX ) ] ) ;
112
+ s6 = f32 ( s6 + x [ ix + ( 6 * strideX ) ] ) ;
113
+ s7 = f32 ( s7 + x [ ix + ( 7 * strideX ) ] ) ;
113
114
ix += 8 * strideX ;
114
115
}
115
116
// Pairwise sum the accumulators:
116
- s = float64ToFloat32 ( float64ToFloat32 ( float64ToFloat32 ( s0 + s1 ) + float64ToFloat32 ( s2 + s3 ) ) + float64ToFloat32 ( float64ToFloat32 ( s4 + s5 ) + float64ToFloat32 ( s6 + s7 ) ) ) ; // eslint-disable-line max-len
117
+ s = f32 ( f32 ( f32 ( s0 + s1 ) + f32 ( s2 + s3 ) ) + f32 ( f32 ( s4 + s5 ) + f32 ( s6 + s7 ) ) ) ; // eslint-disable-line max-len
117
118
118
119
// Clean-up loop...
119
120
for ( i ; i < N ; i ++ ) {
120
- s = float64ToFloat32 ( s + x [ ix ] ) ;
121
+ s = f32 ( s + x [ ix ] ) ;
121
122
ix += strideX ;
122
123
}
123
124
return s ;
124
125
}
125
126
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
126
127
n = floor ( N / 2 ) ;
127
128
n -= n % 8 ;
128
- return float64ToFloat32 ( ssumpw ( n , x , strideX , ix ) + ssumpw ( N - n , x , strideX , ix + ( n * strideX ) ) ) ; // eslint-disable-line max-len
129
+ return f32 ( ssumpw ( n , x , strideX , ix ) + ssumpw ( N - n , x , strideX , ix + ( n * strideX ) ) ) ; // eslint-disable-line max-len
129
130
}
130
131
131
132
0 commit comments