Skip to content

Commit 4f94e60

Browse files
ShabiShett07kgryte
andauthored
feat: add C implementation to blas/base/ssyr
PR-URL: #7127 Ref: #2039 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]>
1 parent 4cf9054 commit 4f94e60

File tree

21 files changed

+921
-293
lines changed

21 files changed

+921
-293
lines changed

lib/node_modules/@stdlib/blas/base/ssyr/README.md

Lines changed: 70 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
@license Apache-2.0
44
5-
Copyright (c) 2025 The Stdlib Authors.
5+
Copyright (c) 2024 The Stdlib Authors.
66
77
Licensed under the Apache License, Version 2.0 (the "License");
88
you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ limitations under the License.
2020

2121
# ssyr
2222

23-
> Perform the symmetric rank 1 operation `A = α*x*x**T + A`.
23+
> Perform the symmetric rank 1 operation `A = α*x*x^T + A`.
2424
2525
<section class="usage">
2626

@@ -32,16 +32,16 @@ var ssyr = require( '@stdlib/blas/base/ssyr' );
3232

3333
#### ssyr( order, uplo, N, α, x, sx, A, LDA )
3434

35-
Performs the symmetric rank 1 operation `A = α*x*x**T + A` where `α` is a scalar, `x` is an `N` element vector, and `A` is an `N` by `N` symmetric matrix.
35+
Performs the symmetric rank 1 operation `A = α*x*x^T + A` where `α` is a scalar, `x` is an `N` element vector, and `A` is an `N` by `N` symmetric matrix.
3636

3737
```javascript
3838
var Float32Array = require( '@stdlib/array/float32' );
3939

40-
var A = new Float32Array( [ 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0 ] );
40+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 ] );
4141
var x = new Float32Array( [ 1.0, 2.0, 3.0 ] );
4242

4343
ssyr( 'row-major', 'upper', 3, 1.0, x, 1, A, 3 );
44-
// A => <Float32Array>[ 2.0, 4.0, 6.0, 0.0, 5.0, 8.0, 0.0, 0.0, 10.0 ]
44+
// A => <Float32Array>[ 2.0, 4.0, 6.0, 2.0, 5.0, 8.0, 3.0, 2.0, 10.0 ]
4545
```
4646

4747
The function has the following parameters:
@@ -51,20 +51,20 @@ The function has the following parameters:
5151
- **N**: number of elements along each dimension of `A`.
5252
- **α**: scalar constant.
5353
- **x**: input [`Float32Array`][mdn-float32array].
54-
- **sx**: index increment for `x`.
54+
- **sx**: stride length for `x`.
5555
- **A**: input matrix stored in linear memory as a [`Float32Array`][mdn-float32array].
56-
- **lda**: stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`).
56+
- **LDA**: stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`).
5757

58-
The stride parameters determine how elements in the input arrays are accessed at runtime. For example, to iterate over every other element of `x` in reverse order,
58+
The stride parameters determine how elements in the input arrays are accessed at runtime. For example, to iterate over the elements of `x` in reverse order,
5959

6060
```javascript
6161
var Float32Array = require( '@stdlib/array/float32' );
6262

63-
var A = new Float32Array( [ 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0 ] );
64-
var x = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0 ] );
63+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 ] );
64+
var x = new Float32Array( [ 3.0, 2.0, 1.0 ] );
6565

66-
ssyr( 'row-major', 'upper', 3, 1.0, x, -2, A, 3 );
67-
// A => <Float32Array>[ 26.0, 17.0, 8.0, 0.0, 10.0, 5.0, 0.0, 0.0, 2.0 ]
66+
ssyr( 'row-major', 'upper', 3, 1.0, x, -1, A, 3 );
67+
// A => <Float32Array>[ 2.0, 4.0, 6.0, 2.0, 5.0, 8.0, 3.0, 2.0, 10.0 ]
6868
```
6969

7070
Note that indexing is relative to the first index. To introduce an offset, use [`typed array`][mdn-typed-array] views.
@@ -75,14 +75,14 @@ Note that indexing is relative to the first index. To introduce an offset, use [
7575
var Float32Array = require( '@stdlib/array/float32' );
7676

7777
// Initial arrays...
78-
var x0 = new Float32Array( [ 1.0, 1.0, 1.0, 1.0 ] );
79-
var A = new Float32Array( [ 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0 ] );
78+
var x0 = new Float32Array( [ 0.0, 3.0, 2.0, 1.0 ] );
79+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 ] );
8080

8181
// Create offset views...
8282
var x1 = new Float32Array( x0.buffer, x0.BYTES_PER_ELEMENT*1 ); // start at 2nd element
8383

8484
ssyr( 'row-major', 'upper', 3, 1.0, x1, -1, A, 3 );
85-
// A => <Float32Array>[ 2.0, 3.0, 4.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0 ]
85+
// A => <Float32Array>[ 2.0, 4.0, 6.0, 2.0, 5.0, 8.0, 3.0, 2.0, 10.0 ]
8686
```
8787

8888
#### ssyr.ndarray( uplo, N, α, x, sx, ox, A, sa1, sa2, oa )
@@ -92,11 +92,11 @@ Performs the symmetric rank 1 operation `A = α*x*x^T + A`, using alternative in
9292
```javascript
9393
var Float32Array = require( '@stdlib/array/float32' );
9494

95-
var A = new Float32Array( [ 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0 ] );
95+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 ] );
9696
var x = new Float32Array( [ 1.0, 2.0, 3.0 ] );
9797

9898
ssyr.ndarray( 'upper', 3, 1.0, x, 1, 0, A, 3, 1, 0 );
99-
// A => <Float32Array>[ 2.0, 4.0, 6.0, 0.0, 5.0, 8.0, 0.0, 0.0, 10.0 ]
99+
// A => <Float32Array>[ 2.0, 4.0, 6.0, 2.0, 5.0, 8.0, 3.0, 2.0, 10.0 ]
100100
```
101101

102102
The function has the following additional parameters:
@@ -111,11 +111,11 @@ While [`typed array`][mdn-typed-array] views mandate a view offset based on the
111111
```javascript
112112
var Float32Array = require( '@stdlib/array/float32' );
113113

114-
var A = new Float32Array( [ 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0 ] );
114+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 ] );
115115
var x = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0 ] );
116116

117117
ssyr.ndarray( 'upper', 3, 1.0, x, -2, 4, A, 3, 1, 0 );
118-
// A => <Float32Array>[ 26.0, 17.0, 8.0, 0.0, 10.0, 5.0, 0.0, 0.0, 2.0 ]
118+
// A => <Float32Array>[ 26.0, 17.0, 8.0, 2.0, 10.0, 5.0, 3.0, 2.0, 2.0 ]
119119
```
120120

121121
</section>
@@ -149,11 +149,18 @@ var opts = {
149149

150150
var N = 3;
151151

152-
var A = ones( N*N, opts.dtype );
152+
// Create N-by-N symmetric matrices:
153+
var A1 = ones( N*N, opts.dtype );
154+
var A2 = ones( N*N, opts.dtype );
155+
156+
// Create a random vector:
153157
var x = discreteUniform( N, -10.0, 10.0, opts );
154158

155-
ssyr( 'row-major', 'upper', 3, 1.0, x, 1, A, 3 );
156-
console.log( A );
159+
ssyr( 'row-major', 'upper', 3, 1.0, x, 1, A1, 3 );
160+
console.log( A1 );
161+
162+
ssyr.ndarray( 'upper', 3, 1.0, x, 1, 0, A2, 3, 1, 0 );
163+
console.log( A2 );
157164
```
158165

159166
</section>
@@ -186,62 +193,62 @@ console.log( A );
186193
#include "stdlib/blas/base/ssyr.h"
187194
```
188195

189-
#### c_ssyr( order, uplo, N, alpha, \*X, strideX, \*A, LDA )
196+
#### c_ssyr( layout, uplo, N, alpha, \*X, sx, \*A, LDA )
190197

191-
Performs the symmetric rank 1 operation `A = α*x*x^T + A`.
198+
Performs the symmetric rank 1 operation `A = α*x*x^T + A` where `α` is a scalar, `x` is an `N` element vector, and `A` is an `N` by `N` symmetric matrix.
192199

193200
```c
194201
#include "stdlib/blas/base/shared.h"
195202

196-
float A[] = { 1.0f, 0.0f, 0.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f };
197-
const float x[] = { 1.0f, 2.0f, 3.0f };
203+
float A[] = { 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 };
204+
const float x[] = { 1.0, 2.0, 3.0 };
198205

199-
c_ssyr( CblasColMajor, CblasUpper, 3, 1.0f, x, 1, A, 3 );
206+
c_ssyr( CblasColMajor, CblasUpper, 3, 1.0, x, 1, A, 3 );
200207
```
201208
202209
The function accepts the following arguments:
203210
204-
- **order**: `[in] CBLAS_LAYOUT` storage layout.
211+
- **layout**: `[in] CBLAS_LAYOUT` storage layout.
205212
- **uplo**: `[in] CBLAS_UPLO` specifies whether the upper or lower triangular part of the symmetric matrix `A` should be referenced.
206213
- **N**: `[in] CBLAS_INT` number of elements along each dimension of `A`.
207-
- **alpha**: `[in] float` scalar.
214+
- **alpha**: `[in] float` scalar constant.
208215
- **X**: `[in] float*` input array.
209-
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
216+
- **sx**: `[in] CBLAS_INT` stride length for `X`.
210217
- **A**: `[inout] float*` input matrix.
211218
- **LDA**: `[in] CBLAS_INT` stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`).
212219
213220
```c
214-
void c_ssyr( const CBLAS_LAYOUT order, const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, float *A, const CBLAS_INT LDA )
221+
void c_ssyr( const CBLAS_LAYOUT layout, const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, float *A, const CBLAS_INT LDA )
215222
```
216223

217-
#### c_ssyr_ndarray( uplo, N, alpha, \*X, strideX, offsetX, \*A, sa1, sa2, oa )
224+
#### c_ssyr_ndarray( uplo, N, alpha, \*X, sx, ox, \*A, sa1, sa2, oa )
218225

219-
Performs the symmetric rank 1 operation `A = α*x*x^T + A` using alternative indexing semantics.
226+
Performs the symmetric rank 1 operation `A = α*x*x^T + A`, using alternative indexing semantics and where `α` is a scalar, `x` is an `N` element vector, and `A` is an `N` by `N` symmetric matrix.
220227

221228
```c
222229
#include "stdlib/blas/base/shared.h"
223230

224-
float A[] = { 1.0f, 2.0f, 3.0f, 0.0f, 1.0f, 2.0f, 0.0f, 0.0f, 1.0f };
225-
const float x[] = { 1.0f, 2.0f, 3.0f };
231+
float A[] = { 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0 };
232+
const float x[] = { 1.0, 2.0, 3.0 };
226233

227-
c_ssyr_ndarray( CblasUpper, 3, 1.0f, x, 1, 0, A, 3, 1, 0 );
234+
c_ssyr_ndarray( CblasUpper, 3, 1.0, x, 1, 0, A, 3, 1, 0 );
228235
```
229236
230237
The function accepts the following arguments:
231238
232239
- **uplo**: `[in] CBLAS_UPLO` specifies whether the upper or lower triangular part of the symmetric matrix `A` should be referenced.
233240
- **N**: `[in] CBLAS_INT` number of elements along each dimension of `A`.
234-
- **alpha**: `[in] float` scalar.
241+
- **alpha**: `[in] float` scalar constant.
235242
- **X**: `[in] float*` input array.
236-
- **strideX**: `[in] CBLAS_INT` index increment for `X`.
237-
- **offsetX**: `[in] CBLAS_INT` starting index for `X`.
243+
- **sx**: `[in] CBLAS_INT` stride length for `X`.
244+
- **ox**: `[in] CBLAS_INT` starting index for `X`.
238245
- **A**: `[inout] float*` input matrix.
239246
- **sa1**: `[in] CBLAS_INT` stride of the first dimension of `A`.
240247
- **sa2**: `[in] CBLAS_INT` stride of the second dimension of `A`.
241248
- **oa**: `[in] CBLAS_INT` starting index for `A`.
242249
243250
```c
244-
void c_ssyr_ndarray( const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, float *A, const CBLAS_INT sa1, const CBLAS_INT sa2, const CBLAS_INT oa )
251+
void c_ssyr_ndarray( const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, float *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA )
245252
```
246253

247254
</section>
@@ -268,27 +275,39 @@ void c_ssyr_ndarray( const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha
268275
#include <stdio.h>
269276

270277
int main( void ) {
271-
// Create strided arrays:
272-
float A[] = { 1.0f, 0.0f, 0.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f };
273-
const float x[] = { 1.0f, 2.0f, 3.0f };
274-
275-
// Specify the number of elements along each dimension of `A`:
278+
// Define 3x3 symmetric matrices stored in row-major layout:
279+
float A1[ 3*3 ] = {
280+
1.0f, 2.0f, 3.0f,
281+
2.0f, 1.0f, 2.0f,
282+
3.0f, 2.0f, 1.0f
283+
};
284+
285+
float A2[ 3*3 ] = {
286+
1.0f, 2.0f, 3.0f,
287+
2.0f, 1.0f, 2.0f,
288+
3.0f, 2.0f, 1.0f
289+
};
290+
291+
// Define a vector:
292+
const float x[ 3 ] = { 1.0f, 2.0f, 3.0f };
293+
294+
// Specify the number of elements along each dimension of `A1` and `A2`:
276295
const int N = 3;
277296

278297
// Perform the symmetric rank 1 operation `A = α*x*x^T + A`:
279-
c_ssyr( CblasColMajor, CblasUpper, N, 1.0f, x, 1, A, N );
298+
c_ssyr( CblasColMajor, CblasUpper, N, 1.0f, x, 1, A1, N );
280299

281300
// Print the result:
282301
for ( int i = 0; i < N*N; i++ ) {
283-
printf( "A[ %i ] = %f\n", i, A[ i ] );
302+
printf( "A1[ %i ] = %f\n", i, A1[ i ] );
284303
}
285304

286-
// Perform the symmetric rank 1 operation `A = α*x*x^T + A`:
287-
c_ssyr_ndarray( CblasUpper, N, 1.0f, x, 1, 0, A, N, 1, 0 );
305+
// Perform the symmetric rank 1 operation `A = α*x*x^T + A` using alternative indexing semantics:
306+
c_ssyr_ndarray( CblasUpper, N, 1.0, x, 1, 0, A2, N, 1, 0 );
288307

289308
// Print the result:
290309
for ( int i = 0; i < N*N; i++ ) {
291-
printf( "A[ %i ] = %f\n", i, A[ i ] );
310+
printf( "A2[ %i ] = %f\n", i, A[ i ] );
292311
}
293312
}
294313
```
@@ -315,7 +334,7 @@ int main( void ) {
315334
316335
[blas]: http://www.netlib.org/blas
317336
318-
[blas-ssyr]: https://www.netlib.org/lapack/explore-html/dc/d82/group__her_gad7585662770cdd3001ed08c7a864cd21.html#gad7585662770cdd3001ed08c7a864cd21
337+
[blas-ssyr]: https://www.netlib.org/lapack/explore-html-3.6.1/d6/d30/group__single__blas__level2_ga7b8a99048765ed2bf7c1e770bff0b622.html
319338
320339
[mdn-float32array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array
321340

lib/node_modules/@stdlib/blas/base/ssyr/benchmark/c/benchmark.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ static double tic( void ) {
8383
* Runs a benchmark.
8484
*
8585
* @param iterations number of iterations
86-
* @param N number of elements along each dimension
86+
* @param N array dimension size
8787
* @return elapsed time in seconds
8888
*/
8989
static double benchmark1( int iterations, int N ) {
@@ -93,18 +93,18 @@ static double benchmark1( int iterations, int N ) {
9393
double t;
9494
int i;
9595

96-
stdlib_strided_sfill( N, 0.5f, x, 1 );
96+
stdlib_strided_sfill( N, 1.0f, x, 1 );
9797
stdlib_strided_sfill( N*N, 1.0f, A, 1 );
9898
t = tic();
9999
for ( i = 0; i < iterations; i++ ) {
100-
c_ssyr( CblasRowMajor, CblasUpper, N, 1.0, x, 1, A, N );
101-
if ( A[ 0 ] != A[ 0 ] ) {
100+
c_ssyr( CblasRowMajor, CblasUpper, N, 1.0f, x, 1, A, N );
101+
if ( A[ i%(N*2) ] != A[ i%(N*2) ] ) {
102102
printf( "should not return NaN\n" );
103103
break;
104104
}
105105
}
106106
elapsed = tic() - t;
107-
if ( A[ 0 ] != A[ 0 ] ) {
107+
if ( A[ i%(N*2) ] != A[ i%(N*2) ] ) {
108108
printf( "should not return NaN\n" );
109109
}
110110
return elapsed;
@@ -114,7 +114,7 @@ static double benchmark1( int iterations, int N ) {
114114
* Runs a benchmark.
115115
*
116116
* @param iterations number of iterations
117-
* @param N number of elements along each dimension
117+
* @param N array dimension size
118118
* @return elapsed time in seconds
119119
*/
120120
static double benchmark2( int iterations, int N ) {
@@ -124,18 +124,18 @@ static double benchmark2( int iterations, int N ) {
124124
double t;
125125
int i;
126126

127-
stdlib_strided_sfill( N, 0.5f, x, 1 );
127+
stdlib_strided_sfill( N, 1.0f, x, 1 );
128128
stdlib_strided_sfill( N*N, 1.0f, A, 1 );
129129
t = tic();
130130
for ( i = 0; i < iterations; i++ ) {
131-
c_ssyr_ndarray( CblasUpper, N, 1.0, x, 1, 0, A, N, 1, 0 );
132-
if ( A[ 0 ] != A[ 0 ] ) {
131+
c_ssyr_ndarray( CblasUpper, N, 1.0f, x, 1, 0, A, N, 1, 0 );
132+
if ( A[ i%(N*2) ] != A[ i%(N*2) ] ) {
133133
printf( "should not return NaN\n" );
134134
break;
135135
}
136136
}
137137
elapsed = tic() - t;
138-
if ( A[ 0 ] != A[ 0 ] ) {
138+
if ( A[ i%(N*2) ] != A[ i%(N*2) ] ) {
139139
printf( "should not return NaN\n" );
140140
}
141141
return elapsed;

lib/node_modules/@stdlib/blas/base/ssyr/docs/repl.txt

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,24 @@
4646

4747
Examples
4848
--------
49+
// Standard usage:
4950
> var x = new {{alias:@stdlib/array/float32}}( [ 1.0, 1.0 ] );
50-
> var A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 0.0, 2.0 ] );
51+
> var A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 2.0, 1.0 ] );
5152
> {{alias}}( 'row-major', 'upper', 2, 1.0, x, 1, A, 2 )
52-
<Float32Array>[ 2.0, 3.0, 0.0, 3.0 ]
53+
<Float32Array>[ 2.0, 3.0, 2.0, 2.0 ]
54+
55+
// Advanced indexing:
56+
> var x = new {{alias:@stdlib/array/float32}}( [ 1.0, 1.0 ] );
57+
> var A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 2.0, 1.0 ] );
58+
> {{alias}}( 'row-major', 'upper', 2, 1.0, x, -1, A, 2 )
59+
<Float32Array>[ 2.0, 3.0, 2.0, 2.0 ]
60+
61+
// Using typed array views:
62+
> var x0 = new {{alias:@stdlib/array/float32}}( [ 0.0, 1.0, 1.0 ] );
63+
> A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 2.0, 1.0 ] );
64+
> var x1 = new {{alias:@stdlib/array/float32}}( x0.buffer, x0.BYTES_PER_ELEMENT*1 );
65+
> {{alias}}( 'row-major', 'upper', 2, 1.0, x, 1, A, 2 )
66+
<Float32Array>[ 2.0, 3.0, 2.0, 2.0 ]
5367

5468

5569
{{alias}}.ndarray( uplo, N, α, x, sx, ox, A, sa1, sa2, oa )
@@ -102,9 +116,9 @@
102116
Examples
103117
--------
104118
> var x = new {{alias:@stdlib/array/float32}}( [ 1.0, 1.0 ] );
105-
> var A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 0.0, 2.0 ] );
119+
> var A = new {{alias:@stdlib/array/float32}}( [ 1.0, 2.0, 2.0, 1.0 ] );
106120
> {{alias}}.ndarray( 'upper', 2, 1.0, x, 1, 0, A, 2, 1, 0 )
107-
<Float32Array>[ 2.0, 3.0, 0.0, 3.0 ]
121+
<Float32Array>[ 2.0, 3.0, 2.0, 2.0 ]
108122

109123
See Also
110124
--------

0 commit comments

Comments
 (0)