Skip to content

Commit 1e1ea6f

Browse files
authored
feat: update JavaScript implementation and add C ndarray API for blas/base/snrm2
PR-URL: #2924 Ref: #2039 Reviewed-by: Athan Reines <[email protected]>
1 parent 57d03ad commit 1e1ea6f

File tree

21 files changed

+825
-187
lines changed

21 files changed

+825
-187
lines changed

lib/node_modules/@stdlib/blas/base/snrm2/README.md

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ var z = snrm2( 4, x1, 2 );
9191
// returns 5.0
9292
```
9393

94-
If either `N` or `stride` is less than or equal to `0`, the function returns `0`.
94+
If `N` is less than or equal to `0`, the function returns `0`.
9595

9696
#### snrm2.ndarray( N, x, stride, offset )
9797

@@ -160,6 +160,129 @@ console.log( out );
160160

161161
<!-- /.examples -->
162162

163+
<!-- C interface documentation. -->
164+
165+
* * *
166+
167+
<section class="c">
168+
169+
## C APIs
170+
171+
<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
172+
173+
<section class="intro">
174+
175+
</section>
176+
177+
<!-- /.intro -->
178+
179+
<!-- C usage documentation. -->
180+
181+
<section class="usage">
182+
183+
### Usage
184+
185+
```c
186+
#include "stdlib/blas/base/snrm2.h"
187+
```
188+
189+
#### c_snrm2( N, \*X, stride )
190+
191+
Computes the L2-norm of a complex single-precision floating-point vector.
192+
193+
```c
194+
const float x[] = { 1.0f, 2.0f, 2.0f, -7.0f, -2.0f, 3.0f, 4.0f, 2.0f };
195+
196+
float norm = c_snrm2( 4, x, 2 );
197+
// returns 5.0f
198+
```
199+
200+
The function accepts the following arguments:
201+
202+
- **N**: `[in] CBLAS_INT` number of indexed elements.
203+
- **X**: `[in] float*` input array.
204+
- **stride**: `[in] CBLAS_INT` index increment for `X`.
205+
206+
```c
207+
float c_snrm2( const CBLAS_INT N, const float *X, const CBLAS_INT stride );
208+
```
209+
210+
#### c_snrm2_ndarray( N, \*X, stride, offset )
211+
212+
Computes the L2-norm of a complex single-precision floating-point vector using alternative indexing semantics.
213+
214+
```c
215+
const float x[] = { 1.0f, 2.0f, 2.0f, -7.0f, -2.0f, 3.0f, 4.0f, 2.0f };
216+
217+
float norm = c_snrm2_ndarray( 4, x, 2, 0 );
218+
// returns 5.0f
219+
```
220+
221+
The function accepts the following arguments:
222+
223+
- **N**: `[in] CBLAS_INT` number of indexed elements.
224+
- **X**: `[in] float*` input array.
225+
- **stride**: `[in] CBLAS_INT` index increment for `X`.
226+
- **offset**: `[in] CBLAS_INT` starting index for `X`.
227+
228+
```c
229+
float c_snrm2_ndarray( const CBLAS_INT N, const float *X, const CBLAS_INT stride, const CBLAS_INT offset );
230+
```
231+
232+
</section>
233+
234+
<!-- /.usage -->
235+
236+
<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
237+
238+
<section class="notes">
239+
240+
</section>
241+
242+
<!-- /.notes -->
243+
244+
<!-- C API usage examples. -->
245+
246+
<section class="examples">
247+
248+
### Examples
249+
250+
```c
251+
#include "stdlib/blas/base/snrm2.h"
252+
#include <stdio.h>
253+
254+
int main( void ) {
255+
// Create a strided array:
256+
const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
257+
258+
// Specify the number of indexed elements:
259+
const int N = 8;
260+
261+
// Specify a stride:
262+
const int strideX = 1;
263+
264+
// Compute the L2-norm:
265+
float l2 = c_snrm2( N, x, strideX );
266+
267+
// Print the result:
268+
printf( "L2-norm: %f\n", l2 );
269+
270+
// Compute the L2-norm:
271+
l2 = c_snrm2_ndarray( N, x, -strideX, 7 );
272+
273+
// Print the result:
274+
printf( "L2-norm: %f\n", l2 );
275+
}
276+
```
277+
278+
</section>
279+
280+
<!-- /.examples -->
281+
282+
</section>
283+
284+
<!-- /.c -->
285+
163286
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
164287
165288
<section class="related">

lib/node_modules/@stdlib/blas/base/snrm2/benchmark/c/benchmark.length.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static float rand_float( void ) {
9494
* @param len array length
9595
* @return elapsed time in seconds
9696
*/
97-
static double benchmark( int iterations, int len ) {
97+
static double benchmark1( int iterations, int len ) {
9898
double elapsed;
9999
float x[ len ];
100100
float z;
@@ -120,6 +120,39 @@ static double benchmark( int iterations, int len ) {
120120
return elapsed;
121121
}
122122

123+
/**
124+
* Runs a benchmark.
125+
*
126+
* @param iterations number of iterations
127+
* @param len array length
128+
* @return elapsed time in seconds
129+
*/
130+
static double benchmark2( int iterations, int len ) {
131+
double elapsed;
132+
float x[ len ];
133+
float z;
134+
double t;
135+
int i;
136+
137+
for ( i = 0; i < len; i++ ) {
138+
x[ i ] = ( rand_float() * 20000.0f ) - 10000.0f;
139+
}
140+
z = 0.0f;
141+
t = tic();
142+
for ( i = 0; i < iterations; i++ ) {
143+
z = c_snrm2_ndarray( len, x, 1, 0 );
144+
if ( z != z ) {
145+
printf( "should not return NaN\n" );
146+
break;
147+
}
148+
}
149+
elapsed = tic() - t;
150+
if ( z != z ) {
151+
printf( "should not return NaN\n" );
152+
}
153+
return elapsed;
154+
}
155+
123156
/**
124157
* Main execution sequence.
125158
*/
@@ -142,7 +175,14 @@ int main( void ) {
142175
for ( j = 0; j < REPEATS; j++ ) {
143176
count += 1;
144177
printf( "# c::%s:len=%d\n", NAME, len );
145-
elapsed = benchmark( iter, len );
178+
elapsed = benchmark1( iter, len );
179+
print_results( iter, elapsed );
180+
printf( "ok %d benchmark finished\n", count );
181+
}
182+
for ( j = 0; j < REPEATS; j++ ) {
183+
count += 1;
184+
printf( "# c::%s:ndarray:len=%d\n", NAME, len );
185+
elapsed = benchmark2( iter, len );
146186
print_results( iter, elapsed );
147187
printf( "ok %d benchmark finished\n", count );
148188
}

lib/node_modules/@stdlib/blas/base/snrm2/docs/repl.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
Indexing is relative to the first index. To introduce an offset, use a typed
99
array view.
1010

11-
If `N <= 0` or `stride <= 0`, the function returns `0`.
11+
If `N <= 0` the function returns `0`.
1212

1313
Parameters
1414
----------

lib/node_modules/@stdlib/blas/base/snrm2/examples/c/example.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,10 @@ int main( void ) {
3434

3535
// Print the result:
3636
printf( "L2-norm: %f\n", l2 );
37+
38+
// Compute the L2-norm:
39+
l2 = c_snrm2_ndarray( N, x, -strideX, 7 );
40+
41+
// Print the result:
42+
printf( "L2-norm: %f\n", l2 );
3743
}

lib/node_modules/@stdlib/blas/base/snrm2/include/stdlib/blas/base/snrm2.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef SNRM2_H
2323
#define SNRM2_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,12 @@ extern "C" {
3234
/**
3335
* Computes the L2-norm of a single-precision floating-point vector.
3436
*/
35-
float c_snrm2( const int N, const float *X, const int stride );
37+
float API_SUFFIX(c_snrm2)( const CBLAS_INT N, const float *X, const CBLAS_INT stride );
38+
39+
/**
40+
* Computes the L2-norm of a single-precision floating-point vector using alternative indexing semantics.
41+
*/
42+
float API_SUFFIX(c_snrm2_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT stride, const CBLAS_INT offset );
3643

3744
#ifdef __cplusplus
3845
}

lib/node_modules/@stdlib/blas/base/snrm2/include/stdlib/blas/base/snrm2_cblas.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef SNRM2_CBLAS_H
2323
#define SNRM2_CBLAS_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,7 @@ extern "C" {
3234
/**
3335
* Computes the L2-norm of a single-precision floating-point vector.
3436
*/
35-
float cblas_snrm2( const int N, const float *X, const int stride );
37+
float API_SUFFIX(cblas_snrm2)( const CBLAS_INT N, const float *X, const CBLAS_INT stride );
3638

3739
#ifdef __cplusplus
3840
}

lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.js

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,20 @@
2020

2121
// MODULES //
2222

23-
var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
23+
var FLOAT32_MAX = require( '@stdlib/constants/float32/max' );
24+
var f32 = require( '@stdlib/number/float64/base/to-float32' );
2425
var absf = require( '@stdlib/math/base/special/absf' );
25-
var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
26+
var abs2f = require( '@stdlib/math/base/special/abs2f' );
27+
var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
28+
29+
30+
// VARIABLES //
31+
32+
// Blue's scaling constants:
33+
var tsml = 1.08420217E-19;
34+
var tbig = 4.50359963E+15;
35+
var ssml = 3.77789319E+22;
36+
var sbig = 1.32348898E-23;
2637

2738

2839
// MAIN //
@@ -45,37 +56,79 @@ var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
4556
* // returns 5.0
4657
*/
4758
function snrm2( N, x, stride, offset ) {
48-
var scale;
49-
var ssq;
59+
var notbig;
60+
var sumsq;
61+
var abig;
62+
var amed;
63+
var asml;
64+
var ymax;
65+
var ymin;
66+
var scl;
5067
var ax;
5168
var ix;
52-
var v;
5369
var i;
5470

5571
if ( N <= 0 ) {
5672
return 0.0;
5773
}
58-
if ( N === 1 ) {
59-
return absf( x[ offset ] );
60-
}
6174
ix = offset;
62-
scale = 0.0;
63-
ssq = 1.0;
75+
76+
// Initialize loop values for accumulation:
77+
notbig = true;
78+
79+
sumsq = 0.0;
80+
abig = 0.0;
81+
amed = 0.0;
82+
asml = 0.0;
83+
scl = 1.0;
84+
85+
// Compute the sum of squares using 3 accumulators--`abig` (sum of squares scaled down to avoid overflow), `asml` (sum of squares scaled up to avoid underflow), `amed` (sum of squares that do not require scaling)--and thresholds and multipliers--`tbig` (values bigger than this are scaled down by `sbig`) and `tsml` (values smaller than this are scaled up by `ssml`)...
6486
for ( i = 0; i < N; i++ ) {
65-
if ( x[ ix ] !== 0.0 ) {
66-
ax = absf( x[ ix ] );
67-
if ( scale < ax ) {
68-
v = float64ToFloat32( scale/ax );
69-
ssq = float64ToFloat32( 1.0 + float64ToFloat32( ssq * float64ToFloat32( v*v ) ) ); // eslint-disable-line max-len
70-
scale = ax;
71-
} else {
72-
v = float64ToFloat32( ax/scale );
73-
ssq = float64ToFloat32( ssq + float64ToFloat32( v*v ) );
87+
ax = absf( x[ ix ] );
88+
if ( ax > tbig ) {
89+
abig = f32( abig + abs2f( ax * sbig ) );
90+
notbig = false;
91+
} else if ( ax < tsml ) {
92+
if ( notbig ) {
93+
asml = f32( asml + abs2f( ax * ssml ) );
7494
}
95+
} else {
96+
amed = f32( amed + f32( ax * ax ) );
7597
}
7698
ix += stride;
7799
}
78-
return float64ToFloat32( scale * sqrtf( ssq ) );
100+
// Combine `abig` and `amed` or `amed` and `asml` if more than one accumulator was used...
101+
if ( abig > 0.0 ) {
102+
// Combine `abig` and `amed` if `abig` > 0...
103+
if ( amed > 0.0 || ( amed > FLOAT32_MAX ) || ( amed !== amed ) ) {
104+
abig = f32( abig + f32( f32( amed * sbig ) * sbig ) );
105+
}
106+
scl = f32( 1.0 / sbig );
107+
sumsq = abig;
108+
} else if ( asml > 0.0 ) {
109+
// Combine `amed` and `asml` if `asml` > 0...
110+
if ( amed > 0.0 || amed > FLOAT32_MAX || ( amed !== amed ) ) {
111+
amed = sqrtf( amed );
112+
asml = f32( sqrtf( asml ) / ssml );
113+
if ( asml > amed ) {
114+
ymin = amed;
115+
ymax = asml;
116+
} else {
117+
ymin = asml;
118+
ymax = amed;
119+
}
120+
scl = 1.0;
121+
sumsq = f32( f32( ymax * ymax ) * f32( 1.0 + abs2f( ymin / ymax ) ) ); // eslint-disable-line max-len
122+
} else {
123+
scl = f32( 1.0 / ssml );
124+
sumsq = asml;
125+
}
126+
} else {
127+
// All values are mid-range...
128+
scl = 1.0;
129+
sumsq = amed;
130+
}
131+
return f32( sqrtf( sumsq ) * scl );
79132
}
80133

81134

0 commit comments

Comments
 (0)