Skip to content

Commit b853deb

Browse files
aman-095kgryte
andauthored
feat: refactor JavaScript implementation and add C ndarray implementation for blas/base/dnrm2
PR-URL: #2941 Ref: #2039 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]> Signed-off-by: Athan Reines <[email protected]>
1 parent b21bf16 commit b853deb

File tree

20 files changed

+524
-168
lines changed

20 files changed

+524
-168
lines changed

lib/node_modules/@stdlib/blas/base/dnrm2/README.md

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ var z = dnrm2( 4, x1, 2 );
9191
// returns 5.0
9292
```
9393

94-
If either `N` or `stride` is less than or equal to `0`, the function returns `0`.
94+
If `N` is less than or equal to `0`, the function returns `0`.
9595

9696
#### dnrm2.ndarray( N, x, stride, offset )
9797

@@ -207,6 +207,28 @@ The function accepts the following arguments:
207207
double c_dnrm2( const CBLAS_INT N, const double *X, const CBLAS_INT stride );
208208
```
209209

210+
#### c_dnrm2_ndarray( N, \*X, stride, offset )
211+
212+
Computes the L2-norm of a double-precision floating-point vector using alternative indexing semantics.
213+
214+
```c
215+
const double x[] = { 1.0, -2.0, 2.0 };
216+
217+
double v = c_dnrm2( 3, x, -1, 2 );
218+
// returns 3.0
219+
```
220+
221+
The function accepts the following arguments:
222+
223+
- **N**: `[in] CBLAS_INT` number of indexed elements.
224+
- **X**: `[in] double*` input array.
225+
- **stride**: `[in] CBLAS_INT` index increment for `X`.
226+
- **offset**: `[in] CBLAS_INT` starting index for `X`.
227+
228+
```c
229+
double c_dnrm2_ndarray( const CBLAS_INT N, const double *X, const CBLAS_INT stride, const CBLAS_INT offset );
230+
```
231+
210232
</section>
211233

212234
<!-- /.usage -->
@@ -244,6 +266,12 @@ int main( void ) {
244266

245267
// Print the result:
246268
printf( "L2-norm: %lf\n", l2 );
269+
270+
// Compute the L2-norm:
271+
l2 = c_dnrm2_ndarray( N, x, -strideX, N-1 );
272+
273+
// Print the result:
274+
printf( "L2-norm: %lf\n", l2 );
247275
}
248276
```
249277

lib/node_modules/@stdlib/blas/base/dnrm2/benchmark/c/benchmark.length.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static double rand_double( void ) {
9494
* @param len array length
9595
* @return elapsed time in seconds
9696
*/
97-
static double benchmark( int iterations, int len ) {
97+
static double benchmark1( int iterations, int len ) {
9898
double elapsed;
9999
double x[ len ];
100100
double z;
@@ -120,6 +120,39 @@ static double benchmark( int iterations, int len ) {
120120
return elapsed;
121121
}
122122

123+
/**
124+
* Runs a benchmark.
125+
*
126+
* @param iterations number of iterations
127+
* @param len array length
128+
* @return elapsed time in seconds
129+
*/
130+
static double benchmark2( int iterations, int len ) {
131+
double elapsed;
132+
double x[ len ];
133+
double z;
134+
double t;
135+
int i;
136+
137+
for ( i = 0; i < len; i++ ) {
138+
x[ i ] = ( rand_double() * 20000.0 ) - 10000.0;
139+
}
140+
z = 0.0;
141+
t = tic();
142+
for ( i = 0; i < iterations; i++ ) {
143+
z = c_dnrm2_ndarray( len, x, 1, 0 );
144+
if ( z != z ) {
145+
printf( "should not return NaN\n" );
146+
break;
147+
}
148+
}
149+
elapsed = tic() - t;
150+
if ( z != z ) {
151+
printf( "should not return NaN\n" );
152+
}
153+
return elapsed;
154+
}
155+
123156
/**
124157
* Main execution sequence.
125158
*/
@@ -142,7 +175,14 @@ int main( void ) {
142175
for ( j = 0; j < REPEATS; j++ ) {
143176
count += 1;
144177
printf( "# c::%s:len=%d\n", NAME, len );
145-
elapsed = benchmark( iter, len );
178+
elapsed = benchmark1( iter, len );
179+
print_results( iter, elapsed );
180+
printf( "ok %d benchmark finished\n", count );
181+
}
182+
for ( j = 0; j < REPEATS; j++ ) {
183+
count += 1;
184+
printf( "# c::%s:ndarray:len=%d\n", NAME, len );
185+
elapsed = benchmark2( iter, len );
146186
print_results( iter, elapsed );
147187
printf( "ok %d benchmark finished\n", count );
148188
}

lib/node_modules/@stdlib/blas/base/dnrm2/docs/repl.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
Indexing is relative to the first index. To introduce an offset, use a typed
99
array view.
1010

11-
If `N <= 0` or `stride <= 0`, the function returns `0`.
11+
If `N <= 0`, the function returns `0`.
1212

1313
Parameters
1414
----------

lib/node_modules/@stdlib/blas/base/dnrm2/examples/c/example.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,10 @@ int main( void ) {
3434

3535
// Print the result:
3636
printf( "L2-norm: %lf\n", l2 );
37+
38+
// Compute the L2-norm:
39+
l2 = c_dnrm2_ndarray( N, x, -strideX, N-1 );
40+
41+
// Print the result:
42+
printf( "L2-norm: %lf\n", l2 );
3743
}

lib/node_modules/@stdlib/blas/base/dnrm2/include/stdlib/blas/base/dnrm2.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ extern "C" {
3636
*/
3737
double API_SUFFIX(c_dnrm2)( const CBLAS_INT N, const double *X, const CBLAS_INT stride );
3838

39+
/**
40+
* Computes the L2-norm of a double-precision floating-point vector using alternative indexing semantics.
41+
*/
42+
double API_SUFFIX(c_dnrm2_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT stride, const CBLAS_INT offset );
43+
3944
#ifdef __cplusplus
4045
}
4146
#endif

lib/node_modules/@stdlib/blas/base/dnrm2/lib/dnrm2.js

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020

2121
// MODULES //
2222

23-
var sqrt = require( '@stdlib/math/base/special/sqrt' );
24-
var abs = require( '@stdlib/math/base/special/abs' );
25-
var pow = require( '@stdlib/math/base/special/pow' );
23+
var stride2offset = require( '@stdlib/strided/base/stride2offset' );
24+
var ndarray = require( './ndarray.js' );
2625

2726

2827
// MAIN //
@@ -32,7 +31,7 @@ var pow = require( '@stdlib/math/base/special/pow' );
3231
*
3332
* @param {PositiveInteger} N - number of indexed elements
3433
* @param {Float64Array} x - input array
35-
* @param {PositiveInteger} stride - stride length
34+
* @param {integer} stride - stride length
3635
* @returns {number} L2-norm
3736
*
3837
* @example
@@ -44,32 +43,8 @@ var pow = require( '@stdlib/math/base/special/pow' );
4443
* // returns 3.0
4544
*/
4645
function dnrm2( N, x, stride ) {
47-
var scale;
48-
var ssq;
49-
var ax;
50-
var i;
51-
52-
if ( N <= 0 || stride <= 0 ) {
53-
return 0.0;
54-
}
55-
if ( N === 1 ) {
56-
return abs( x[ 0 ] );
57-
}
58-
scale = 0.0;
59-
ssq = 1.0;
60-
N *= stride;
61-
for ( i = 0; i < N; i += stride ) {
62-
if ( x[ i ] !== 0.0 ) {
63-
ax = abs( x[ i ] );
64-
if ( scale < ax ) {
65-
ssq = 1.0 + ( ssq * pow( scale/ax, 2 ) );
66-
scale = ax;
67-
} else {
68-
ssq += pow( ax/scale, 2 );
69-
}
70-
}
71-
}
72-
return scale * sqrt( ssq );
46+
var ox = stride2offset( N, stride );
47+
return ndarray( N, x, stride, ox );
7348
}
7449

7550

lib/node_modules/@stdlib/blas/base/dnrm2/lib/dnrm2.native.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ var addon = require( './../src/addon.node' );
3030
*
3131
* @param {PositiveInteger} N - number of indexed elements
3232
* @param {Float64Array} x - input array
33-
* @param {PositiveInteger} stride - stride length
33+
* @param {integer} stride - stride length
3434
* @returns {number} L2-norm
3535
*
3636
* @example

lib/node_modules/@stdlib/blas/base/dnrm2/lib/ndarray.js

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @license Apache-2.0
33
*
4-
* Copyright (c) 2020 The Stdlib Authors.
4+
* Copyright (c) 2023 The Stdlib Authors.
55
*
66
* Licensed under the Apache License, Version 2.0 (the "License");
77
* you may not use this file except in compliance with the License.
@@ -20,9 +20,19 @@
2020

2121
// MODULES //
2222

23-
var sqrt = require( '@stdlib/math/base/special/sqrt' );
23+
var FLOAT64_MAX = require( '@stdlib/constants/float64/max' );
2424
var abs = require( '@stdlib/math/base/special/abs' );
25-
var pow = require( '@stdlib/math/base/special/pow' );
25+
var abs2 = require( '@stdlib/math/base/special/abs2' );
26+
var sqrt = require( '@stdlib/math/base/special/sqrt' );
27+
28+
29+
// VARIABLES //
30+
31+
// Blue's scaling constants:
32+
var tsml = 1.4916681462400413E-154;
33+
var tbig = 1.9979190722022350E+146;
34+
var ssml = 4.4989137945431964E+161;
35+
var sbig = 1.1113793747425387E-162;
2636

2737

2838
// MAIN //
@@ -41,38 +51,83 @@ var pow = require( '@stdlib/math/base/special/pow' );
4151
*
4252
* var x = new Float64Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] );
4353
*
44-
* var out = dnrm2( 4, x, 2, 1 );
54+
* var z = dnrm2( 4, x, 2, 1 );
4555
* // returns 5.0
4656
*/
4757
function dnrm2( N, x, stride, offset ) {
48-
var scale;
49-
var ssq;
58+
var notbig;
59+
var sumsq;
60+
var abig;
61+
var amed;
62+
var asml;
63+
var ymax;
64+
var ymin;
65+
var scl;
5066
var ax;
5167
var ix;
5268
var i;
5369

5470
if ( N <= 0 ) {
5571
return 0.0;
5672
}
57-
if ( N === 1 ) {
58-
return abs( x[ offset ] );
59-
}
6073
ix = offset;
61-
scale = 0.0;
62-
ssq = 1.0;
74+
75+
// Initialize loop values for accumulation:
76+
notbig = true;
77+
78+
sumsq = 0.0;
79+
abig = 0.0;
80+
amed = 0.0;
81+
asml = 0.0;
82+
scl = 1.0;
83+
84+
// Compute the sum of squares using 3 accumulators--`abig` (sum of squares scaled down to avoid overflow), `asml` (sum of squares scaled up to avoid underflow), `amed` (sum of squares that do not require scaling)--and thresholds and multipliers--`tbig` (values bigger than this are scaled down by `sbig`) and `tsml` (values smaller than this are scaled up by `ssml`)...
6385
for ( i = 0; i < N; i++ ) {
64-
if ( x[ ix ] !== 0.0 ) {
65-
ax = abs( x[ ix ] );
66-
if ( scale < ax ) {
67-
ssq = 1.0 + ( ssq * pow( scale/ax, 2 ) );
68-
scale = ax;
69-
} else {
70-
ssq += pow( ax/scale, 2 );
86+
ax = abs( x[ ix ] );
87+
if ( ax > tbig ) {
88+
abig += abs2( ax * sbig );
89+
notbig = false;
90+
} else if ( ax < tsml ) {
91+
if ( notbig ) {
92+
asml += abs2( ax * ssml );
7193
}
94+
} else {
95+
amed += ( ax * ax );
7296
}
7397
ix += stride;
7498
}
75-
return scale * sqrt( ssq );
99+
// Combine `abig` and `amed` or `amed` and `asml` if more than one accumulator was used...
100+
if ( abig > 0.0 ) {
101+
// Combine `abig` and `amed` if `abig` > 0...
102+
if ( amed > 0.0 || ( amed > FLOAT64_MAX ) || ( amed !== amed ) ) {
103+
abig += ( ( amed * sbig ) * sbig );
104+
}
105+
scl = 1.0 / sbig;
106+
sumsq = abig;
107+
} else if ( asml > 0.0 ) {
108+
// Combine `amed` and `asml` if `asml` > 0...
109+
if ( amed > 0.0 || amed > FLOAT64_MAX || ( amed !== amed ) ) {
110+
amed = sqrt( amed );
111+
asml = sqrt( asml ) / ssml;
112+
if ( asml > amed ) {
113+
ymin = amed;
114+
ymax = asml;
115+
} else {
116+
ymin = asml;
117+
ymax = amed;
118+
}
119+
scl = 1.0;
120+
sumsq = ( ymax * ymax ) * ( 1.0 + abs2( ymin / ymax ) );
121+
} else {
122+
scl = 1.0 / ssml;
123+
sumsq = asml;
124+
}
125+
} else {
126+
// All values are mid-range...
127+
scl = 1.0;
128+
sumsq = amed;
129+
}
130+
return sqrt( sumsq ) * scl;
76131
}
77132

78133

lib/node_modules/@stdlib/blas/base/dnrm2/lib/ndarray.native.js

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020

2121
// MODULES //
2222

23-
var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' );
24-
var offsetView = require( '@stdlib/strided/base/offset-view' );
25-
var addon = require( './dnrm2.native.js' );
23+
var addon = require( './../src/addon.node' );
2624

2725

2826
// MAIN //
@@ -45,13 +43,7 @@ var addon = require( './dnrm2.native.js' );
4543
* // returns 5.0
4644
*/
4745
function dnrm2( N, x, stride, offset ) {
48-
var view;
49-
offset = minViewBufferIndex( N, stride, offset );
50-
if ( stride < 0 ) {
51-
stride *= -1;
52-
}
53-
view = offsetView( x, offset );
54-
return addon( N, view, stride );
46+
return addon.ndarray( N, x, stride, offset );
5547
}
5648

5749

0 commit comments

Comments
 (0)