Skip to content

Commit 4f0b1ca

Browse files
committed
fix: update implementation to preserve signed zeros
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: passed - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 8976032 commit 4f0b1ca

File tree

3 files changed

+12
-188
lines changed

3 files changed

+12
-188
lines changed

lib/node_modules/@stdlib/blas/ext/base/dnansumpw/lib/ndarray.js

Lines changed: 5 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,13 @@
2020

2121
// MODULES //
2222

23-
var isnan = require( '@stdlib/math/base/assert/is-nan' );
24-
var floor = require( '@stdlib/math/base/special/floor' );
23+
var Float64Array = require( '@stdlib/array/float64' );
24+
var dnannsumpw = require( '@stdlib/blas/ext/base/dnannsumpw' ).ndarray;
2525

2626

2727
// VARIABLES //
2828

29-
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
30-
var BLOCKSIZE = 128;
29+
var WORKSPACE = new Float64Array( 2 );
3130

3231

3332
// MAIN //
@@ -58,95 +57,8 @@ var BLOCKSIZE = 128;
5857
* // returns 5.0
5958
*/
6059
function dnansumpw( N, x, strideX, offsetX ) {
61-
var ix;
62-
var s0;
63-
var s1;
64-
var s2;
65-
var s3;
66-
var s4;
67-
var s5;
68-
var s6;
69-
var s7;
70-
var M;
71-
var s;
72-
var n;
73-
var i;
74-
75-
if ( N <= 0 ) {
76-
return 0.0;
77-
}
78-
if ( strideX === 0 ) {
79-
if ( isnan( x[ offsetX ] ) ) {
80-
return 0.0;
81-
}
82-
return x[ offsetX ] * N;
83-
}
84-
ix = offsetX;
85-
if ( N < 8 ) {
86-
// Use simple summation...
87-
s = 0.0;
88-
for ( i = 0; i < N; i++ ) {
89-
if ( isnan( x[ ix ] ) === false ) {
90-
s += x[ ix ];
91-
}
92-
ix += strideX;
93-
}
94-
return s;
95-
}
96-
if ( N <= BLOCKSIZE ) {
97-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
98-
s0 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
99-
ix += strideX;
100-
s1 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
101-
ix += strideX;
102-
s2 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
103-
ix += strideX;
104-
s3 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
105-
ix += strideX;
106-
s4 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
107-
ix += strideX;
108-
s5 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
109-
ix += strideX;
110-
s6 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
111-
ix += strideX;
112-
s7 = ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
113-
ix += strideX;
114-
115-
M = N % 8;
116-
for ( i = 8; i < N-M; i += 8 ) {
117-
s0 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
118-
ix += strideX;
119-
s1 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
120-
ix += strideX;
121-
s2 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
122-
ix += strideX;
123-
s3 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
124-
ix += strideX;
125-
s4 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
126-
ix += strideX;
127-
s5 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
128-
ix += strideX;
129-
s6 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
130-
ix += strideX;
131-
s7 += ( isnan( x[ ix ] ) ) ? 0.0 : x[ ix ];
132-
ix += strideX;
133-
}
134-
// Pairwise sum the accumulators:
135-
s = ( (s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7) );
136-
137-
// Clean-up loop...
138-
for ( i; i < N; i++ ) {
139-
if ( isnan( x[ ix ] ) === false ) {
140-
s += x[ ix ];
141-
}
142-
ix += strideX;
143-
}
144-
return s;
145-
}
146-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
147-
n = floor( N/2 );
148-
n -= n % 8;
149-
return dnansumpw( n, x, strideX, ix ) + dnansumpw( N-n, x, strideX, ix+(n*strideX) ); // eslint-disable-line max-len
60+
dnannsumpw( N, x, strideX, offsetX, WORKSPACE, 1, 0 );
61+
return WORKSPACE[ 0 ];
15062
}
15163

15264

lib/node_modules/@stdlib/blas/ext/base/dnansumpw/manifest.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"libraries": [],
3939
"libpath": [],
4040
"dependencies": [
41-
"@stdlib/math/base/assert/is-nan",
41+
"@stdlib/blas/ext/base/dnannsumpw",
4242
"@stdlib/napi/export",
4343
"@stdlib/napi/argv",
4444
"@stdlib/napi/argv-int64",
@@ -60,7 +60,7 @@
6060
"libraries": [],
6161
"libpath": [],
6262
"dependencies": [
63-
"@stdlib/math/base/assert/is-nan",
63+
"@stdlib/blas/ext/base/dnannsumpw",
6464
"@stdlib/blas/base/shared",
6565
"@stdlib/strided/base/stride2offset"
6666
]
@@ -77,7 +77,7 @@
7777
"libraries": [],
7878
"libpath": [],
7979
"dependencies": [
80-
"@stdlib/math/base/assert/is-nan",
80+
"@stdlib/blas/ext/base/dnannsumpw",
8181
"@stdlib/blas/base/shared",
8282
"@stdlib/strided/base/stride2offset"
8383
]
@@ -94,7 +94,7 @@
9494
"libraries": [],
9595
"libpath": [],
9696
"dependencies": [
97-
"@stdlib/math/base/assert/is-nan",
97+
"@stdlib/blas/ext/base/dnannsumpw",
9898
"@stdlib/blas/base/shared",
9999
"@stdlib/strided/base/stride2offset"
100100
]

lib/node_modules/@stdlib/blas/ext/base/dnansumpw/src/main.c

Lines changed: 3 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
*/
1818

1919
#include "stdlib/blas/ext/base/dnansumpw.h"
20-
#include "stdlib/math/base/assert/is_nan.h"
20+
#include "stdlib/blas/ext/base/dnannsumpw.h"
2121
#include "stdlib/strided/base/stride2offset.h"
2222
#include "stdlib/blas/base/shared.h"
2323

@@ -60,94 +60,6 @@ double API_SUFFIX(stdlib_strided_dnansumpw)( const CBLAS_INT N, const double *X,
6060
* @return output value
6161
*/
6262
double API_SUFFIX(stdlib_strided_dnansumpw_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ) {
63-
CBLAS_INT ix;
64-
CBLAS_INT M;
65-
CBLAS_INT n;
66-
CBLAS_INT i;
67-
double sum;
68-
double s0;
69-
double s1;
70-
double s2;
71-
double s3;
72-
double s4;
73-
double s5;
74-
double s6;
75-
double s7;
76-
77-
if ( N <= 0 ) {
78-
return 0.0;
79-
}
80-
if ( strideX == 0 ) {
81-
if ( stdlib_base_is_nan( X[ 0 ] ) ) {
82-
return 0.0;
83-
}
84-
return X[ 0 ] * N;
85-
}
86-
ix = offsetX;
87-
if ( N < 8 ) {
88-
// Use simple summation...
89-
sum = 0.0;
90-
for ( i = 0; i < N; i++ ) {
91-
if ( !stdlib_base_is_nan( X[ ix ] ) ) {
92-
sum += X[ ix ];
93-
}
94-
ix += strideX;
95-
}
96-
return sum;
97-
}
98-
// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
99-
if ( N <= 128 ) {
100-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
101-
s0 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
102-
ix += strideX;
103-
s1 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
104-
ix += strideX;
105-
s2 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
106-
ix += strideX;
107-
s3 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
108-
ix += strideX;
109-
s4 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
110-
ix += strideX;
111-
s5 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
112-
ix += strideX;
113-
s6 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
114-
ix += strideX;
115-
s7 = ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
116-
ix += strideX;
117-
118-
M = N % 8;
119-
for ( i = 8; i < N-M; i += 8 ) {
120-
s0 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
121-
ix += strideX;
122-
s1 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
123-
ix += strideX;
124-
s2 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
125-
ix += strideX;
126-
s3 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
127-
ix += strideX;
128-
s4 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
129-
ix += strideX;
130-
s5 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
131-
ix += strideX;
132-
s6 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
133-
ix += strideX;
134-
s7 += ( stdlib_base_is_nan( X[ ix ] ) ) ? 0.0 : X[ ix ];
135-
ix += strideX;
136-
}
137-
// Pairwise sum the accumulators:
138-
sum = ( (s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7) );
139-
140-
// Clean-up loop...
141-
for (; i < N; i++ ) {
142-
if ( !stdlib_base_is_nan( X[ ix ] ) ) {
143-
sum += X[ ix ];
144-
}
145-
ix += strideX;
146-
}
147-
return sum;
148-
}
149-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
150-
n = N / 2;
151-
n -= n % 8;
152-
return API_SUFFIX(stdlib_strided_dnansumpw_ndarray)( n, X, strideX, ix ) + API_SUFFIX(stdlib_strided_dnansumpw_ndarray)( N-n, X, strideX, ix+(n*strideX) );
63+
CBLAS_INT n = 0;
64+
return API_SUFFIX(stdlib_strided_dnannsumpw_ndarray)( N, X, strideX, offsetX, &n );
15365
}

0 commit comments

Comments
 (0)