Skip to content

Commit ada1567

Browse files
aman-095kgryte
andauthored
feat: add C ndarray implementation for blas/base/zswap
PR-URL: #3080 Ref: #2039 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]> Signed-off-by: Athan Reines <[email protected]>
1 parent 020e09e commit ada1567

File tree

13 files changed

+452
-146
lines changed

13 files changed

+452
-146
lines changed

lib/node_modules/@stdlib/blas/base/zswap/README.md

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,142 @@ console.log( y.get( y.length-1 ).toString() );
265265

266266
<!-- /.examples -->
267267

268+
<!-- C interface documentation. -->
269+
270+
* * *
271+
272+
<section class="c">
273+
274+
## C APIs
275+
276+
<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
277+
278+
<section class="intro">
279+
280+
</section>
281+
282+
<!-- /.intro -->
283+
284+
<!-- C usage documentation. -->
285+
286+
<section class="usage">
287+
288+
### Usage
289+
290+
```c
291+
#include "stdlib/blas/base/zswap.h"
292+
```
293+
294+
#### c_zswap( N, \*X, strideX, \*Y, strideY )
295+
296+
Interchanges two complex double-precision floating-point vectors.
297+
298+
```c
299+
double x[] = { 1.0, 2.0, 3.0, 4.0 }; // interleaved real and imaginary components
300+
double y[] = { 5.0, 6.0, 7.0, 8.0 };
301+
302+
c_zswap( 2, (void *)x, 1, (void *)y, 1 );
303+
```
304+
305+
The function accepts the following arguments:
306+
307+
- **N**: `[in] CBLAS_INT` number of indexed elements.
308+
- **X**: `[inout] void*` first input array.
309+
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
310+
- **Y**: `[inout] void*` second input array.
311+
- **strideY**: `[in] CBLAS_INT` index increment for `Y`.
312+
313+
```c
314+
void c_zswap( const CBLAS_INT N, void *X, const CBLAS_INT strideX, void *Y, const CBLAS_INT strideY );
315+
```
316+
317+
#### c_zswap_ndarray( N, \*X, strideX, offsetX, \*Y, strideY, offsetY )
318+
319+
Interchanges two complex double-precision floating-point vectors using alternative indexing semantics.
320+
321+
```c
322+
double x[] = { 1.0, 2.0, 3.0, 4.0 }; // interleaved real and imaginary components
323+
double y[] = { 5.0, 6.0, 7.0, 8.0 };
324+
325+
c_zswap_ndarray( 2, (void *)x, 1, 0, (void *)y, 1, 0 );
326+
```
327+
328+
The function accepts the following arguments:
329+
330+
- **N**: `[in] CBLAS_INT` number of indexed elements.
331+
- **X**: `[inout] void*` first input array.
332+
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
333+
- **offsetX**: `[in] CBLAS_INT` starting index for `X`.
334+
- **Y**: `[inout] void*` second input array.
335+
- **strideY**: `[in] CBLAS_INT` index increment for `Y`.
336+
- **offsetY**: `[in] CBLAS_INT` starting index for `Y`.
337+
338+
```c
339+
void c_zswap_ndarray( const CBLAS_INT N, void *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, void *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY );
340+
```
341+
342+
</section>
343+
344+
<!-- /.usage -->
345+
346+
<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
347+
348+
<section class="notes">
349+
350+
</section>
351+
352+
<!-- /.notes -->
353+
354+
<!-- C API usage examples. -->
355+
356+
<section class="examples">
357+
358+
### Examples
359+
360+
```c
361+
#include "stdlib/blas/base/zswap.h"
362+
#include <stdio.h>
363+
364+
int main( void ) {
365+
// Create strided arrays:
366+
double x[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 };
367+
double y[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
368+
369+
// Specify the number of elements:
370+
const int N = 4;
371+
372+
// Specify stride lengths:
373+
const int strideX = 1;
374+
const int strideY = -1;
375+
376+
// Swap elements:
377+
c_zswap( N, (void *)x, strideX, (void *)y, strideY );
378+
379+
// Print the result:
380+
for ( int i = 0; i < N; i++ ) {
381+
printf( "x[ %i ] = %lf + %lfj\n", i, x[ i*2 ], x[ (i*2)+1 ] );
382+
printf( "y[ %i ] = %lf + %lfj\n", i, y[ i*2 ], y[ (i*2)+1 ] );
383+
}
384+
385+
// Swap elements using alternative indexing semantics:
386+
c_zswap_ndarray( N, (void *)x, -strideX, N-1, (void *)y, strideY, N-1 );
387+
388+
// Print the result:
389+
for ( int i = 0; i < N; i++ ) {
390+
printf( "x[ %i ] = %lf + %lfj\n", i, x[ i*2 ], x[ (i*2)+1 ] );
391+
printf( "y[ %i ] = %lf + %lfj\n", i, y[ i*2 ], y[ (i*2)+1 ] );
392+
}
393+
}
394+
```
395+
396+
</section>
397+
398+
<!-- /.examples -->
399+
400+
</section>
401+
402+
<!-- /.c -->
403+
268404
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
269405
270406
<section class="related">

lib/node_modules/@stdlib/blas/base/zswap/benchmark/c/benchmark.length.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static double rand_double( void ) {
9494
* @param len array length
9595
* @return elapsed time in seconds
9696
*/
97-
static double benchmark( int iterations, int len ) {
97+
static double benchmark1( int iterations, int len ) {
9898
double elapsed;
9999
double *x;
100100
double *y;
@@ -127,6 +127,46 @@ static double benchmark( int iterations, int len ) {
127127
return elapsed;
128128
}
129129

130+
/**
131+
* Runs a benchmark.
132+
*
133+
* @param iterations number of iterations
134+
* @param len array length
135+
* @return elapsed time in seconds
136+
*/
137+
static double benchmark2( int iterations, int len ) {
138+
double elapsed;
139+
double *x;
140+
double *y;
141+
double t;
142+
int i;
143+
144+
x = (double *) malloc( len*2 * sizeof( double ) );
145+
y = (double *) malloc( len*2 * sizeof( double ) );
146+
for ( i = 0; i < len; i++ ) {
147+
x[ i ] = ( rand_double()*10000.0 ) - 5000.0;
148+
x[ i+1 ] = ( rand_double()*10000.0 ) - 5000.0;
149+
y[ i ] = 0.0;
150+
y[ i+1 ] = 0.0;
151+
}
152+
t = tic();
153+
for ( i = 0; i < iterations; i++ ) {
154+
c_zswap_ndarray( len, (void *)x, 1, 0, (void *)y, 1, 0 );
155+
if ( y[ 0 ] != y[ 0 ] ) {
156+
printf( "should not return NaN\n" );
157+
break;
158+
}
159+
}
160+
elapsed = tic() - t;
161+
if ( y[ 0 ] != y[ 0 ] ) {
162+
printf( "should not return NaN\n" );
163+
}
164+
free( x );
165+
free( y );
166+
167+
return elapsed;
168+
}
169+
130170
/**
131171
* Main execution sequence.
132172
*/
@@ -149,7 +189,14 @@ int main( void ) {
149189
for ( j = 0; j < REPEATS; j++ ) {
150190
count += 1;
151191
printf( "# c::%s:len=%d\n", NAME, len );
152-
elapsed = benchmark( iter, len );
192+
elapsed = benchmark1( iter, len );
193+
print_results( iter, elapsed );
194+
printf( "ok %d benchmark finished\n", count );
195+
}
196+
for ( j = 0; j < REPEATS; j++ ) {
197+
count += 1;
198+
printf( "# c::%s:ndarray:len=%d\n", NAME, len );
199+
elapsed = benchmark2( iter, len );
153200
print_results( iter, elapsed );
154201
printf( "ok %d benchmark finished\n", count );
155202
}

lib/node_modules/@stdlib/blas/base/zswap/examples/c/example.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,13 @@ int main( void ) {
3232
printf( "x[ %i ] = %f + %fj\n", i, x[ i*2 ], x[ ( i*2 )+1 ] );
3333
printf( "y[ %i ] = %f + %fj\n", i, y[ i*2 ], y[ ( i*2 )+1 ] );
3434
}
35+
36+
// Swap elements using alternative indexing semantics:
37+
c_zswap_ndarray( 4, (void *)x, -1, 3, (void *)y, -1, 3 );
38+
39+
// Print the result:
40+
for ( int i = 0; i < 4; i++ ) {
41+
printf( "x[ %i ] = %f + %fj\n", i, x[ i*2 ], x[ ( i*2 )+1 ] );
42+
printf( "y[ %i ] = %f + %fj\n", i, y[ i*2 ], y[ ( i*2 )+1 ] );
43+
}
3544
}

lib/node_modules/@stdlib/blas/base/zswap/include/stdlib/blas/base/zswap.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef ZSWAP_H
2323
#define ZSWAP_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,12 @@ extern "C" {
3234
/**
3335
* Interchanges two complex double-precision floating-point vectors.
3436
*/
35-
void c_zswap( const int N, void *X, const int strideX, void *Y, const int strideY );
37+
void API_SUFFIX(c_zswap)( const CBLAS_INT N, void *X, const CBLAS_INT strideX, void *Y, const CBLAS_INT strideY );
38+
39+
/**
40+
* Interchanges two complex double-precision floating-point vectors using alternative indexing semantics.
41+
*/
42+
void API_SUFFIX(c_zswap_ndarray)( const CBLAS_INT N, void *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, void *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY );
3643

3744
#ifdef __cplusplus
3845
}

lib/node_modules/@stdlib/blas/base/zswap/include/stdlib/blas/base/zswap_cblas.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#ifndef ZSWAP_CBLAS_H
2323
#define ZSWAP_CBLAS_H
2424

25+
#include "stdlib/blas/base/shared.h"
26+
2527
/*
2628
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
2729
*/
@@ -32,7 +34,7 @@ extern "C" {
3234
/**
3335
* Interchanges two complex double-precision floating-point vectors.
3436
*/
35-
void cblas_zswap( const int N, void *X, const int strideX, void *Y, const int strideY );
37+
void API_SUFFIX(cblas_zswap)( const CBLAS_INT N, void *X, const CBLAS_INT strideX, void *Y, const CBLAS_INT strideY );
3638

3739
#ifdef __cplusplus
3840
}

lib/node_modules/@stdlib/blas/base/zswap/lib/ndarray.native.js

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
// MODULES //
2222

2323
var reinterpret = require( '@stdlib/strided/base/reinterpret-complex128' );
24-
var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' );
2524
var addon = require( './../src/addon.node' );
2625

2726

@@ -68,16 +67,10 @@ var addon = require( './../src/addon.node' );
6867
* // returns 8.0
6968
*/
7069
function zswap( N, x, strideX, offsetX, y, strideY, offsetY ) {
71-
var viewX;
72-
var viewY;
70+
var viewX = reinterpret( x, 0 );
71+
var viewY = reinterpret( y, 0 );
7372

74-
offsetX = minViewBufferIndex( N, strideX, offsetX );
75-
offsetY = minViewBufferIndex( N, strideY, offsetY );
76-
77-
viewX = reinterpret( x, offsetX );
78-
viewY = reinterpret( y, offsetY );
79-
80-
addon( N, viewX, strideX, viewY, strideY );
73+
addon.ndarray( N, viewX, strideX, offsetX, viewY, strideY, offsetY );
8174
return y;
8275
}
8376

lib/node_modules/@stdlib/blas/base/zswap/lib/zswap.js

Lines changed: 5 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
// MODULES //
2222

23-
var reinterpret = require( '@stdlib/strided/base/reinterpret-complex128' );
23+
var stride2offset = require( '@stdlib/strided/base/stride2offset' );
24+
var ndarray = require( './ndarray.js' );
2425

2526

2627
// MAIN //
@@ -64,59 +65,9 @@ var reinterpret = require( '@stdlib/strided/base/reinterpret-complex128' );
6465
* // returns 8.0
6566
*/
6667
function zswap( N, x, strideX, y, strideY ) {
67-
var viewX;
68-
var viewY;
69-
var tmp;
70-
var sx;
71-
var sy;
72-
var ix;
73-
var iy;
74-
var i;
75-
var j;
76-
77-
if ( N <= 0 ) {
78-
return y;
79-
}
80-
viewX = reinterpret( x, 0 );
81-
viewY = reinterpret( y, 0 );
82-
if ( strideX === 1 && strideY === 1 ) {
83-
for ( i = 0; i < N*2; i += 2 ) {
84-
tmp = viewX[ i ];
85-
viewX[ i ] = viewY[ i ];
86-
viewY[ i ] = tmp;
87-
88-
j = i + 1;
89-
tmp = viewX[ j ];
90-
viewX[ j ] = viewY[ j ];
91-
viewY[ j ] = tmp;
92-
}
93-
return y;
94-
}
95-
if ( strideX < 0 ) {
96-
ix = 2 * ( 1-N ) * strideX;
97-
} else {
98-
ix = 0;
99-
}
100-
if ( strideY < 0 ) {
101-
iy = 2 * ( 1-N ) * strideY;
102-
} else {
103-
iy = 0;
104-
}
105-
sx = strideX * 2;
106-
sy = strideY * 2;
107-
for ( i = 0; i < N; i++ ) {
108-
tmp = viewX[ ix ];
109-
viewX[ ix ] = viewY[ iy ];
110-
viewY[ iy ] = tmp;
111-
112-
tmp = viewX[ ix+1 ];
113-
viewX[ ix+1 ] = viewY[ iy+1 ];
114-
viewY[ iy+1 ] = tmp;
115-
116-
ix += sx;
117-
iy += sy;
118-
}
119-
return y;
68+
var ox = stride2offset( N, strideX );
69+
var oy = stride2offset( N, strideY );
70+
return ndarray( N, x, strideX, ox, y, strideY, oy );
12071
}
12172

12273

0 commit comments

Comments
 (0)