Skip to content

Commit f1e8702

Browse files
aman-095kgrytestdlib-botShabiShett07
authored
feat: add blas/base/sger
PR-URL: #2767 Ref: #2039 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]> Co-authored-by: stdlib-bot <[email protected]> Co-authored-by: Shabareesh Shetty <[email protected]> Reviewed-by: Shabareesh Shetty <[email protected]>
1 parent a2f0378 commit f1e8702

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+9076
-0
lines changed
Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2025 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# sger
22+
23+
> Perform the rank 1 operation `A = α*x*y^T + A`.
24+
25+
<section class="intro">
26+
27+
</section>
28+
29+
<!-- /.intro -->
30+
31+
<section class="usage">
32+
33+
## Usage
34+
35+
```javascript
36+
var sger = require( '@stdlib/blas/base/sger' );
37+
```
38+
39+
#### sger( order, M, N, α, x, sx, y, sy, A, lda )
40+
41+
Performs the rank 1 operation `A = α*x*y^T + A`, where `α` is a scalar, `x` is an `M` element vector, `y` is an `N` element vector, and `A` is an `M` by `N` matrix.
42+
43+
```javascript
44+
var Float32Array = require( '@stdlib/array/float32' );
45+
46+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
47+
var x = new Float32Array( [ 1.0, 1.0 ] );
48+
var y = new Float32Array( [ 1.0, 1.0, 1.0 ] );
49+
50+
sger( 'row-major', 2, 3, 1.0, x, 1, y, 1, A, 3 );
51+
// A => <Float32Array>[ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 ]
52+
```
53+
54+
The function has the following parameters:
55+
56+
- **order**: storage layout.
57+
- **M**: number of rows in the matrix `A`.
58+
- **N**: number of columns in the matrix `A`.
59+
- **α**: scalar constant.
60+
- **x**: an `M` element [`Float32Array`][mdn-float32array].
61+
- **sx**: stride length for `x`.
62+
- **y**: an `N` element [`Float32Array`][mdn-float32array].
63+
- **sy**: stride length for `y`.
64+
- **A**: input matrix stored in linear memory as a [`Float32Array`][mdn-float32array].
65+
- **lda**: stride of the first dimension of `A` (leading dimension of `A`).
66+
67+
The stride parameters determine which elements in the strided arrays are accessed at runtime. For example, to iterate over every other element in `x` and `y`,
68+
69+
```javascript
70+
var Float32Array = require( '@stdlib/array/float32' );
71+
72+
var A = new Float32Array( [ 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 ] );
73+
var x = new Float32Array( [ 1.0, 0.0, 1.0, 0.0 ] );
74+
var y = new Float32Array( [ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0 ] );
75+
76+
sger( 'column-major', 2, 3, 1.0, x, 2, y, 2, A, 2 );
77+
// A => <Float32Array>[ 2.0, 5.0, 3.0, 6.0, 4.0, 7.0 ]
78+
```
79+
80+
Note that indexing is relative to the first index. To introduce an offset, use [`typed array`][mdn-typed-array] views.
81+
82+
<!-- eslint-disable stdlib/capitalized-comments -->
83+
84+
```javascript
85+
var Float32Array = require( '@stdlib/array/float32' );
86+
87+
// Initial arrays...
88+
var x0 = new Float32Array( [ 0.0, 1.0, 1.0 ] );
89+
var y0 = new Float32Array( [ 0.0, 1.0, 1.0, 1.0 ] );
90+
var A = new Float32Array( [ 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 ] );
91+
92+
// Create offset views...
93+
var x1 = new Float32Array( x0.buffer, x0.BYTES_PER_ELEMENT*1 ); // start at 2nd element
94+
var y1 = new Float32Array( y0.buffer, y0.BYTES_PER_ELEMENT*1 ); // start at 2nd element
95+
96+
sger( 'column-major', 2, 3, 1.0, x1, -1, y1, -1, A, 2 );
97+
// A => <Float32Array>[ 2.0, 5.0, 3.0, 6.0, 4.0, 7.0 ]
98+
```
99+
100+
#### sger.ndarray( M, N, α, x, sx, ox, y, sy, oy, A, sa1, sa2, oa )
101+
102+
Performs the rank 1 operation `A = α*x*y^T + A`, using alternative indexing semantics and where `α` is a scalar, `x` is an `M` element vector, `y` is an `N` element vector, and `A` is an `M` by `N` matrix.
103+
104+
```javascript
105+
var Float32Array = require( '@stdlib/array/float32' );
106+
107+
var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
108+
var x = new Float32Array( [ 1.0, 1.0 ] );
109+
var y = new Float32Array( [ 1.0, 1.0, 1.0 ] );
110+
111+
sger.ndarray( 2, 3, 1.0, x, 1, 0, y, 1, 0, A, 3, 1, 0 );
112+
// A => <Float32Array>[ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 ]
113+
```
114+
115+
The function has the following additional parameters:
116+
117+
- **sa1**: stride of the first dimension of `A`.
118+
- **sa2**: stride of the second dimension of `A`.
119+
- **oa**: starting index for `A`.
120+
- **ox**: starting index for `x`.
121+
- **oy**: starting index for `y`.
122+
123+
While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying buffer, the offset parameters support indexing semantics based on starting indices. For example,
124+
125+
```javascript
126+
var Float32Array = require( '@stdlib/array/float32' );
127+
128+
var A = new Float32Array( [ 0.0, 0.0, 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 ] );
129+
var x = new Float32Array( [ 0.0, 1.0, 0.0, 1.0, 0.0 ] );
130+
var y = new Float32Array( [ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0 ] );
131+
132+
sger.ndarray( 2, 3, 1.0, x, 2, 1, y, 2, 1, A, 1, 2, 2 );
133+
// A => <Float32Array>[ 0.0, 0.0, 2.0, 5.0, 3.0, 6.0, 4.0, 7.0 ]
134+
```
135+
136+
</section>
137+
138+
<!-- /.usage -->
139+
140+
<section class="notes">
141+
142+
## Notes
143+
144+
- `sger()` corresponds to the [BLAS][blas] level 2 function [`sger`][blas-sger].
145+
146+
</section>
147+
148+
<!-- /.notes -->
149+
150+
<section class="examples">
151+
152+
## Examples
153+
154+
<!-- eslint no-undef: "error" -->
155+
156+
```javascript
157+
var discreteUniform = require( '@stdlib/random/array/discrete-uniform' );
158+
var sger = require( '@stdlib/blas/base/sger' );
159+
160+
var opts = {
161+
'dtype': 'float32'
162+
};
163+
164+
var M = 3;
165+
var N = 5;
166+
167+
var A = discreteUniform( M*N, 0, 255, opts );
168+
var x = discreteUniform( M, 0, 255, opts );
169+
var y = discreteUniform( N, 0, 255, opts );
170+
171+
sger( 'row-major', M, N, 1.0, x, 1, y, 1, A, N );
172+
console.log( A );
173+
174+
sger.ndarray( M, N, 1.0, x, 1, 0, y, 1, 0, A, 1, M, 0 );
175+
console.log( A );
176+
177+
```
178+
179+
</section>
180+
181+
<!-- /.examples -->
182+
183+
<!-- C interface documentation. -->
184+
185+
* * *
186+
187+
<section class="c">
188+
189+
## C APIs
190+
191+
<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
192+
193+
<section class="intro">
194+
195+
</section>
196+
197+
<!-- /.intro -->
198+
199+
<!-- C usage documentation. -->
200+
201+
<section class="usage">
202+
203+
### Usage
204+
205+
```c
206+
#include "stdlib/blas/base/sger.h"
207+
```
208+
209+
#### c_sger( layout, M, N, alpha, \*X, strideX, \*Y, strideY, \*A, LDA )
210+
211+
Performs the rank 1 operation `A = alpha*x*y^T + A`, where `alpha` is a scalar, `X` is an `M` element vector, `Y` is an `N` element vector, and `A` is an `M`-by-`N` matrix.
212+
213+
```c
214+
#include "stdlib/blas/base/shared.h"
215+
216+
float A[ 3*4 ] = {
217+
0.0f, 0.0f, 0.0f, 0.0f,
218+
0.0f, 0.0f, 0.0f, 0.0f,
219+
0.0f, 0.0f, 0.0f, 0.0f
220+
};
221+
222+
const float x[ 3 ] = { 1.0f, 4.0f, 0.0f };
223+
const float y[ 4 ] = { 0.0f, 1.0f, 2.0f, 3.0f };
224+
225+
c_sger( CblasRowMajor, 3, 4, 1.0f, x, 1, y, 1, A, 4 );
226+
```
227+
228+
The function accepts the following arguments:
229+
230+
- **layout**: `[in] CBLAS_LAYOUT` storage layout.
231+
- **M**: `[in] CBLAS_INT` number of rows in the matrix `A`.
232+
- **N**: `[in] CBLAS_INT` number of columns in the matrix `A`.
233+
- **alpha**: `[in] float` scalar constant.
234+
- **X**: `[in] float*` an `M` element vector.
235+
- **strideX**: `[in] CBLAS_INT` stride length for `X`.
236+
- **Y**: `[in] float*` an `N` element vector.
237+
- **strideY**: `[in] CBLAS_INT` stride length for `Y`.
238+
- **A**: `[inout] float*` input matrix.
239+
- **LDA**: `[in] CBLAS_INT` stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`).
240+
241+
```c
242+
void c_sger( const CBLAS_LAYOUT layout, const CBLAS_INT M, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY, float *A, const CBLAS_INT LDA );
243+
```
244+
245+
#### c_sger_ndarray( M, N, alpha, \*X, sx, ox, \*Y, sy, oy, \*A, sa1, sa2, oa )
246+
247+
Performs the rank 1 operation `A = alpha*x*y^T + A`, using alternative indexing semantics and where `alpha` is a scalar, `X` is an `M` element vector, `Y` is an `N` element vector, and `A` is an `M`-by-`N` matrix.
248+
249+
```c
250+
#include "stdlib/blas/base/shared.h"
251+
252+
float A[ 3*4 ] = {
253+
0.0f, 0.0f, 0.0f, 0.0f,
254+
0.0f, 0.0f, 0.0f, 0.0f,
255+
0.0f, 0.0f, 0.0f, 0.0f
256+
};
257+
258+
const float x[ 3 ] = { 1.0f, 4.0f, 0.0f };
259+
const float y[ 4 ] = { 0.0f, 1.0f, 2.0f, 3.0f };
260+
261+
c_sger_ndarray( 3, 4, 1.0f, x, 1, 0, y, 1, 0, A, 4, 1, 0 );
262+
```
263+
264+
The function accepts the following arguments:
265+
266+
- **layout**: `[in] CBLAS_LAYOUT` storage layout.
267+
- **M**: `[in] CBLAS_INT` number of rows in the matrix `A`.
268+
- **N**: `[in] CBLAS_INT` number of columns in the matrix `A`.
269+
- **alpha**: `[in] float` scalar constant.
270+
- **X**: `[in] float*` an `M` element vector.
271+
- **sx**: `[in] CBLAS_INT` stride length for `X`.
272+
- **ox**: `[in] CBLAS_INT` starting index for `X`.
273+
- **Y**: `[in] float*` an `N` element vector.
274+
- **sy**: `[in] CBLAS_INT` stride length for `Y`.
275+
- **oy**: `[in] CBLAS_INT` starting index for `Y`.
276+
- **A**: `[inout] float*` input matrix.
277+
- **sa1**: `[in] CBLAS_INT` stride of the first dimension of `A`.
278+
- **sa2**: `[in] CBLAS_INT` stride of the second dimension of `A`.
279+
- **oa**: `[in] CBLAS_INT` starting index for `A`.
280+
281+
```c
282+
void c_sger( onst CBLAS_INT M, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY, float *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA );
283+
```
284+
285+
</section>
286+
287+
<!-- /.usage -->
288+
289+
<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
290+
291+
<section class="notes">
292+
293+
</section>
294+
295+
<!-- /.notes -->
296+
297+
<!-- C API usage examples. -->
298+
299+
<section class="examples">
300+
301+
### Examples
302+
303+
```c
304+
#include "stdlib/blas/base/sger.h"
305+
#include "stdlib/blas/base/shared.h"
306+
#include <stdio.h>
307+
308+
int main( void ) {
309+
// Define a 3x4 matrix stored in row-major order:
310+
float A[ 3*4 ] = {
311+
0.0f, 0.0f, 0.0f, 0.0f,
312+
0.0f, 0.0f, 0.0f, 0.0f,
313+
0.0f, 0.0f, 0.0f, 0.0f
314+
};
315+
// Define `x` and `y^T` vectors:
316+
const float x[ 3 ] = { 1.0f, 4.0f, 0.0f }; // M
317+
const float y[ 4 ] = { 0.0f, 1.0f, 2.0f, 3.0f }; // N
318+
319+
// Specify the number of rows and columns:
320+
const int M = 3;
321+
const int N = 4;
322+
323+
// Specify stride lengths:
324+
const int strideX = 1;
325+
const int strideY = 1;
326+
327+
// Perform operation:
328+
c_sger( CblasRowMajor, M, N, 1.0f, x, strideX, y, strideY, A, N );
329+
330+
// Print the result:
331+
for ( int i = 0; i < M; i++ ) {
332+
for ( int j = 0; j < N; j++ ) {
333+
printf( "A[%i,%i] = %f\n", i, j, A[ (i*N)+j ] );
334+
}
335+
}
336+
337+
// Perform operation using alterntive indexing semantics:
338+
c_sger( CblasRowMajor, M, N, 1.0f, x, strideX, 0, y, 0, strideY, A, N, 1, 0 );
339+
340+
// Print the result:
341+
for ( int i = 0; i < M; i++ ) {
342+
for ( int j = 0; j < N; j++ ) {
343+
printf( "A[%i,%i] = %f\n", i, j, A[ (i*N)+j ] );
344+
}
345+
}
346+
}
347+
```
348+
349+
</section>
350+
351+
<!-- /.examples -->
352+
353+
</section>
354+
355+
<!-- /.c -->
356+
357+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
358+
359+
<section class="related">
360+
361+
</section>
362+
363+
<!-- /.related -->
364+
365+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
366+
367+
<section class="links">
368+
369+
[blas]: http://www.netlib.org/blas
370+
371+
[blas-sger]: https://www.netlib.org/lapack/explore-html/d8/d75/group__ger_ga95baec6bb0a84393d7bc67212b566ab0.html
372+
373+
[mdn-float32array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array
374+
375+
[mdn-typed-array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
376+
377+
</section>
378+
379+
<!-- /.links -->

0 commit comments

Comments
 (0)