Skip to content

Commit f7edbe0

Browse files
aman-095kgryte
andauthored
feat: add C ndarray implementation for blas/base/sdsdot
PR-URL: #2921 Ref: #2039 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]> Signed-off-by: Athan Reines <[email protected]>
1 parent 2bedae9 commit f7edbe0

File tree

12 files changed

+448
-94
lines changed

12 files changed

+448
-94
lines changed

lib/node_modules/@stdlib/blas/base/sdsdot/README.md

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,140 @@ console.log( out );
178178

179179
<!-- /.examples -->
180180

181+
<!-- C interface documentation. -->
182+
183+
* * *
184+
185+
<section class="c">
186+
187+
## C APIs
188+
189+
<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
190+
191+
<section class="intro">
192+
193+
</section>
194+
195+
<!-- /.intro -->
196+
197+
<!-- C usage documentation. -->
198+
199+
<section class="usage">
200+
201+
### Usage
202+
203+
```c
204+
#include "stdlib/blas/base/sdsdot.h"
205+
```
206+
207+
#### c_sdsdot( N, scalar, \*X, strideX, \*Y, strideY )
208+
209+
Calculates the dot product of vectors `x` and `y` with extended accumulation.
210+
211+
```c
212+
const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
213+
const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
214+
215+
float v = c_sdsdot( 5, 0.0f, x, 1, y, -1 );
216+
// returns -120.0f
217+
```
218+
219+
The function accepts the following arguments:
220+
221+
- **N**: `[in] CBLAS_INT` number of indexed elements.
222+
- **scalar**: `[in] float` scalar constant to add to dot product.
223+
- **X**: `[in] float*` first input array.
224+
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
225+
- **Y**: `[in] float*` second input array.
226+
- **strideY**: `[in] CBLAS_INT` index increment for `Y`.
227+
228+
```c
229+
float c_sdsdot( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY );
230+
```
231+
232+
#### c_sdsdot_ndarray( N, scalar, \*X, strideX, offsetX, \*Y, strideY, offsetY )
233+
234+
Calculates the dot product of vectors `x` and `y` with extended accumulation using alternative indexing semantics.
235+
236+
```c
237+
const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
238+
const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
239+
240+
float v = c_sdsdot_ndarray( 5, 0.0f, x, 1, 0, y, -1, 7 );
241+
// returns -80.0f
242+
```
243+
244+
The function accepts the following arguments:
245+
246+
- **N**: `[in] CBLAS_INT` number of indexed elements.
247+
- **scalar**: `[in] float` scalar constant to add to dot product.
248+
- **X**: `[in] float*` first input array.
249+
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
250+
- **offsetX**: `[in] CBLAS_INT` starting index for `X`.
251+
- **Y**: `[in] float*` second input array.
252+
- **strideY**: `[in] CBLAS_INT` index increment for `Y`.
253+
- **offsetY**: `[in] CBLAS_INT` starting index for `Y`.
254+
255+
```c
256+
float c_sdsdot_ndarray( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY );
257+
```
258+
259+
</section>
260+
261+
<!-- /.usage -->
262+
263+
<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
264+
265+
<section class="notes">
266+
267+
</section>
268+
269+
<!-- /.notes -->
270+
271+
<!-- C API usage examples. -->
272+
273+
<section class="examples">
274+
275+
### Examples
276+
277+
```c
278+
#include "stdlib/blas/base/sdsdot.h"
279+
#include <stdio.h>
280+
281+
int main( void ) {
282+
// Create strided arrays:
283+
const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
284+
const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
285+
286+
// Specify the number of indexed elements:
287+
const int N = 8;
288+
289+
// Specify strides:
290+
const int strideX = 1;
291+
const int strideY = -1;
292+
293+
// Compute the dot product:
294+
float d = c_sdsdot( N, 0.0f, x, strideX, y, strideY );
295+
296+
// Print the result:
297+
printf( "dot product: %f\n", d );
298+
299+
// Compute the dot product:
300+
d = c_sdsdot_ndarray( N, 0.0f, x, strideX, 0, y, strideY, 7 );
301+
302+
// Print the result:
303+
printf( "dot product: %f\n", d );
304+
}
305+
```
306+
307+
</section>
308+
309+
<!-- /.examples -->
310+
311+
</section>
312+
313+
<!-- /.c -->
314+
181315
* * *
182316
183317
<section class="references">

lib/node_modules/@stdlib/blas/base/sdsdot/benchmark/c/benchmark.length.c

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static float rand_float( void ) {
9494
* @param len array length
9595
* @return elapsed time in seconds
9696
*/
97-
static double benchmark( int iterations, int len ) {
97+
static double benchmark1( int iterations, int len ) {
9898
double elapsed;
9999
float x[ len ];
100100
float y[ len ];
@@ -122,6 +122,41 @@ static double benchmark( int iterations, int len ) {
122122
return elapsed;
123123
}
124124

125+
/**
126+
* Runs a benchmark.
127+
*
128+
* @param iterations number of iterations
129+
* @param len array length
130+
* @return elapsed time in seconds
131+
*/
132+
static double benchmark2( int iterations, int len ) {
133+
double elapsed;
134+
float x[ len ];
135+
float y[ len ];
136+
float z;
137+
double t;
138+
int i;
139+
140+
for ( i = 0; i < len; i++ ) {
141+
x[ i ] = ( rand_float()*20000.0f ) - 10000.0f;
142+
y[ i ] = ( rand_float()*20000.0f ) - 10000.0f;
143+
}
144+
z = 0.0f;
145+
t = tic();
146+
for ( i = 0; i < iterations; i++ ) {
147+
z = c_sdsdot_ndarray( len, 0.0f, x, 1, 0, y, 1, 0 );
148+
if ( z != z ) {
149+
printf( "should not return NaN\n" );
150+
break;
151+
}
152+
}
153+
elapsed = tic() - t;
154+
if ( z != z ) {
155+
printf( "should not return NaN\n" );
156+
}
157+
return elapsed;
158+
}
159+
125160
/**
126161
* Main execution sequence.
127162
*/
@@ -144,7 +179,14 @@ int main( void ) {
144179
for ( j = 0; j < REPEATS; j++ ) {
145180
count += 1;
146181
printf( "# c::%s:len=%d\n", NAME, len );
147-
elapsed = benchmark( iter, len );
182+
elapsed = benchmark1( iter, len );
183+
print_results( iter, elapsed );
184+
printf( "ok %d benchmark finished\n", count );
185+
}
186+
for ( j = 0; j < REPEATS; j++ ) {
187+
count += 1;
188+
printf( "# c::%s:ndarray:len=%d\n", NAME, len );
189+
elapsed = benchmark2( iter, len );
148190
print_results( iter, elapsed );
149191
printf( "ok %d benchmark finished\n", count );
150192
}

lib/node_modules/@stdlib/blas/base/sdsdot/examples/c/example.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,10 @@ int main( void ) {
3636

3737
// Print the result:
3838
printf( "dot product: %f\n", d );
39+
40+
// Compute the dot product:
41+
d = c_sdsdot_ndarray( N, 0.0f, x, strideX, 0, y, strideY, 7 );
42+
43+
// Print the result:
44+
printf( "dot product: %f\n", d );
3945
}

lib/node_modules/@stdlib/blas/base/sdsdot/include/stdlib/blas/base/sdsdot.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef SDSDOT_H
2323
#define SDSDOT_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,12 @@ extern "C" {
3234
/**
3335
* Computes the dot product of two single-precision floating-point vectors with extended accumulation.
3436
*/
35-
float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY );
37+
float API_SUFFIX(c_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY );
38+
39+
/**
40+
* Computes the dot product of two single-precision floating-point vectors with extended accumulation using alternative indexing semantics.
41+
*/
42+
float API_SUFFIX(c_sdsdot_ndarray)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY );
3643

3744
#ifdef __cplusplus
3845
}

lib/node_modules/@stdlib/blas/base/sdsdot/include/stdlib/blas/base/sdsdot_cblas.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef SDSDOT_CBLAS_H
2323
#define SDSDOT_CBLAS_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,7 @@ extern "C" {
3234
/**
3335
* Computes the dot product of two single-precision floating-point vectors with extended accumulation.
3436
*/
35-
float cblas_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY );
37+
float API_SUFFIX(cblas_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY );
3638

3739
#ifdef __cplusplus
3840
}

lib/node_modules/@stdlib/blas/base/sdsdot/lib/ndarray.native.js

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020

2121
// MODULES //
2222

23-
var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' );
24-
var offsetView = require( '@stdlib/strided/base/offset-view' );
25-
var addon = require( './sdsdot.native.js' );
23+
var addon = require( './../src/addon.node' );
2624

2725

2826
// MAIN //
@@ -50,16 +48,7 @@ var addon = require( './sdsdot.native.js' );
5048
* // returns -5.0
5149
*/
5250
function sdsdot( N, scalar, x, strideX, offsetX, y, strideY, offsetY ) {
53-
var viewX;
54-
var viewY;
55-
56-
offsetX = minViewBufferIndex( N, strideX, offsetX );
57-
offsetY = minViewBufferIndex( N, strideY, offsetY );
58-
59-
viewX = offsetView( x, offsetX );
60-
viewY = offsetView( y, offsetY );
61-
62-
return addon( N, scalar, viewX, strideX, viewY, strideY );
51+
return addon.ndarray( N, scalar, x, strideX, offsetX, y, strideY, offsetY );
6352
}
6453

6554

0 commit comments

Comments
 (0)