Skip to content

Commit ed17b71

Browse files
committed
feat: complete implementation
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 52e9ed3 commit ed17b71

File tree

2 files changed

+78
-2
lines changed

2 files changed

+78
-2
lines changed

lib/node_modules/@stdlib/blas/ext/base/gsumpw/lib/accessors.js

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@
1818

1919
'use strict';
2020

21+
// MODULES //
22+
23+
var floor = require( '@stdlib/math/base/special/floor' );
24+
25+
26+
// VARIABLES //
27+
28+
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
29+
var BLOCKSIZE = 128;
30+
31+
2132
// MAIN //
2233

2334
/**
@@ -52,22 +63,80 @@ function gsumpw( N, x, strideX, offsetX ) {
5263
var xbuf;
5364
var get;
5465
var ix;
66+
var s0;
67+
var s1;
68+
var s2;
69+
var s3;
70+
var s4;
71+
var s5;
72+
var s6;
73+
var s7;
74+
var M;
75+
var n;
5576
var s;
5677
var i;
5778

79+
if ( N <= 0 ) {
80+
return 0.0;
81+
}
82+
5883
// Cache reference to array data:
5984
xbuf = x.data;
6085

6186
// Cache a reference to the element accessor:
6287
get = x.accessors[ 0 ];
88+
6389
ix = offsetX;
6490
if ( strideX === 0 ) {
6591
return N * get( xbuf, ix );
6692
}
6793
s = 0.0;
6894

69-
// TODO --- for ( i = 0; i < N; i++ ) {}
70-
return s;
95+
if ( N <= 8 ) {
96+
s = 0.0;
97+
for ( i = 0; i < N; i++ ) {
98+
s += get( xbuf, ix );
99+
ix += strideX;
100+
}
101+
return s;
102+
}
103+
if ( N <= BLOCKSIZE ) {
104+
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
105+
s0 = get( xbuf, ix );
106+
s1 = get( xbuf, ix+strideX );
107+
s2 = get( xbuf, ix+(2*strideX) );
108+
s3 = get( xbuf, ix+(3*strideX) );
109+
s4 = get( xbuf, ix+(4*strideX) );
110+
s5 = get( xbuf, ix+(5*strideX) );
111+
s6 = get( xbuf, ix+(6*strideX) );
112+
s7 = get( xbuf, ix+(7*strideX) );
113+
114+
M = N % 8;
115+
for ( i = 8; i < N-M; i += 8 ) {
116+
s0 += get( xbuf, ix );
117+
s1 += get( xbuf, ix+strideX );
118+
s2 += get( xbuf, ix+(2*strideX) );
119+
s3 += get( xbuf, ix+(3*strideX) );
120+
s4 += get( xbuf, ix+(4*strideX) );
121+
s5 += get( xbuf, ix+(5*strideX) );
122+
s6 += get( xbuf, ix+(6*strideX) );
123+
s7 += get( xbuf, ix+(7*strideX) );
124+
ix += 8 * strideX;
125+
}
126+
// Pairwise sum the accumulators:
127+
s = ( (s0+s1) + (s2+s3) ) + ( (s4+s5) + (s6+s7) );
128+
129+
// Clean-up loop...
130+
for ( i; i < N; i++ ) {
131+
s += x[ ix ];
132+
ix += strideX;
133+
}
134+
return s;
135+
}
136+
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
137+
n = floor( N/2 );
138+
n -= n % 8;
139+
return gsumpw( n, x, strideX, ix ) + gsumpw( N-n, x, strideX, ix+(n*strideX) ); // eslint-disable-line max-len
71140
}
72141

73142

lib/node_modules/@stdlib/blas/ext/base/gsumpw/lib/ndarray.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
// MODULES //
2222

2323
var floor = require( '@stdlib/math/base/special/floor' );
24+
var arraylike2object = require( '@stdlib/array/base/arraylike2object' );
25+
var accessors = require( './accessors.js' );
2426

2527

2628
// VARIABLES //
@@ -66,12 +68,17 @@ function gsumpw( N, x, strideX, offsetX ) {
6668
var s7;
6769
var M;
6870
var s;
71+
var o;
6972
var n;
7073
var i;
7174

7275
if ( N <= 0 ) {
7376
return 0.0;
7477
}
78+
o = arraylike2object( x );
79+
if ( o.accessorProtocol ) {
80+
return accessors( N, o, strideX, offsetX );
81+
}
7582
ix = offsetX;
7683
if ( strideX === 0 ) {
7784
return N * x[ ix ];

0 commit comments

Comments
 (0)