Skip to content

Commit e2d5250

Browse files
committed
feat: add blas/base/sgbmv
--- type: pre_push_report description: Results of running various checks prior to pushing changes. report: - task: run_javascript_examples status: na - task: run_c_examples status: na - task: run_cpp_examples status: na - task: run_javascript_readme_examples status: na - task: run_c_benchmarks status: na - task: run_cpp_benchmarks status: na - task: run_fortran_benchmarks status: na - task: run_javascript_benchmarks status: na - task: run_julia_benchmarks status: na - task: run_python_benchmarks status: na - task: run_r_benchmarks status: na - task: run_javascript_tests status: na ---
1 parent 1770970 commit e2d5250

File tree

6 files changed

+554
-0
lines changed

6 files changed

+554
-0
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2024 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
var discreteUniform = require( '@stdlib/random/array/discrete-uniform' );
22+
var sgbmv = require( './../lib' );
23+
24+
var opts = {
25+
'dtype': 'float32'
26+
};
27+
28+
const A = [0, 9, 10, // First row (upper band)
29+
11, 12, 5, // Second row (main diagonal)
30+
6, 7, 8, // Third row (lower band)
31+
0, 1, 2, // Fourth row (lower band)
32+
0, 0, 3]; // Packed storage (row-major)
33+
const x = [57, 245, 121];
34+
const y = [0, 0, 0];
35+
console.log( x );
36+
console.log( y );
37+
38+
const order = 'column-major';
39+
const trans = 'no-transpose';
40+
const LDA = 3;
41+
const M = 3, N = 3;
42+
const KL = 1, KU = 1;
43+
const alpha = 1.0, beta = 0.0;
44+
const strideX = 1, strideY = 1;
45+
46+
sgbmv(
47+
order, trans, M, N, KL, KU, alpha, A, LDA, x, strideX, beta, y, strideY
48+
);
49+
console.log( y );
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2024 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major' );
24+
var sfill = require( '@stdlib/blas/ext/base/sfill' ).ndarray;
25+
var sscal = require( '@stdlib/blas/base/sscal' ).ndarray;
26+
var f32 = require( '@stdlib/number/float64/base/to-float32' );
27+
var max = require( '@stdlib/math/base/special/max');
28+
var min = require( '@stdlib/math/base/special/min' );
29+
30+
31+
// MAIN //
32+
33+
/**
34+
* Performs one of the matrix-vector operations `y := α*A*x + β*y, or y := α*A**T*x + β*y`, where `α` and `β` are scalars, `x` and `y` are vectors and `A` is an `M` by `N` band matrix, with `KL` sub-diagonals and `KU` super-diagonals.
35+
*
36+
* @private
37+
* @param {string} trans - specifies whether `A` should be transposed, conjugate-transposed, or not transposed
38+
* @param {NonNegativeInteger} M - number of rows in the matrix `A`
39+
* @param {NonNegativeInteger} N - number of columns in the matrix `A`
40+
* @param {NonNegativeInteger} KL - number of sub-diagonals of matrix `A`
41+
* @param {NonNegativeInteger} KU - number of super-diagonals of matrix `A`
42+
* @param {number} alpha - scalar constant
43+
* @param {Float32Array} A - input matrix
44+
* @param {integer} strideA1 - stride of the first dimension of `A`
45+
* @param {integer} strideA2 - stride of the second dimension of `A`
46+
* @param {NonNegativeInteger} offsetA - starting index for `A`
47+
* @param {Float32Array} x - first input vector
48+
* @param {integer} strideX - `x` stride length
49+
* @param {NonNegativeInteger} offsetX - starting index for `x`
50+
* @param {number} beta - scalar constant
51+
* @param {Float32Array} y - second input vector
52+
* @param {integer} strideY - `y` stride length
53+
* @param {NonNegativeInteger} offsetY - starting index for `y`
54+
* @returns {Float32Array} `y`
55+
*
56+
* @example
57+
* var Float32Array = require( '@stdlib/array/float32' );
58+
*
59+
* var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
60+
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
61+
* var y = new Float32Array( [ 1.0, 1.0 ] );
62+
*
63+
* sgemv( 'no-transpose', 2, 3, 1.0, A, 3, 1, 0, x, 1, 0, 1.0, y, 1, 0 );
64+
* // y => <Float32Array>[ 7.0, 16.0 ]
65+
*/
66+
function sgbmv(trans, M, N, KL, KU, alpha, A, strideA1, strideA2, offsetA, x, strideX, offsetX, beta, y, strideY, offsetY) {
67+
var isrm;
68+
var xlen;
69+
var ylen;
70+
var tmp;
71+
var ix1;
72+
var iy1;
73+
var sa0;
74+
var sa1;
75+
var i1;
76+
var i0;
77+
var oa;
78+
var kup1;
79+
var a_idx;
80+
81+
isrm = isRowMajor([strideA1, strideA2]);
82+
if (isrm) {
83+
sa0 = strideA2; // Stride for columns
84+
sa1 = strideA1; // Stride for rows
85+
} else {
86+
sa0 = strideA1; // Stride for columns
87+
sa1 = strideA2; // Stride for rows
88+
}
89+
if (trans === 'no-transpose') {
90+
xlen = N; // Length of input vector x
91+
ylen = M; // Length of output vector y
92+
} else {
93+
xlen = M; // Length of input vector x
94+
ylen = N; // Length of output vector y
95+
}
96+
// y = beta * y
97+
if (beta !== 1.0) {
98+
if (beta === 0.0) {
99+
sfill(ylen, 0.0, y, strideY, offsetY);
100+
} else {
101+
sscal(ylen, beta, y, strideY, offsetY);
102+
}
103+
}
104+
if (alpha === 0.0) {
105+
return y;
106+
}
107+
108+
if (
109+
(!isrm && trans === 'no-transpose') ||
110+
(isrm && trans !== 'no-transpose')
111+
) {
112+
kup1 = KU + 1;
113+
ix1 = offsetX;
114+
for (i1 = 0; i1 < xlen; i1++) {
115+
tmp = f32(alpha * x[ix1]);
116+
oa = offsetA + (sa1 * i1);
117+
iy1 = offsetY;
118+
for (i0 = Math.max(0, i1 - KU); i0 < Math.min(ylen, i1 + KL + 1); i0++) {
119+
// Calculate diagonal offset
120+
let diag_offset = i0 - i1;
121+
122+
// Fix the a_idx calculation for banded matrix format
123+
// The banded matrix layout appears to have a different structure than expected
124+
// The correct index depends on the exact layout of your band matrix
125+
126+
// Based on the debug output pattern, this appears to be the correct formula:
127+
if (diag_offset == -1) {
128+
// Lower diagonal elements
129+
a_idx = 3 + (3 * i1);
130+
} else if (diag_offset == 0) {
131+
// Main diagonal elements (9, 12, 8)
132+
if (i1 == 0) a_idx = 1;
133+
else if (i1 == 1) a_idx = 4;
134+
else if (i1 == 2) a_idx = 8; // This was wrong in original - should be 8 not 7
135+
} else if (diag_offset == 1) {
136+
// Upper diagonal elements
137+
a_idx = 2 + (3 * i1);
138+
}
139+
140+
if (a_idx >= 0 && a_idx < A.length) {
141+
y[iy1] += f32(A[a_idx] * tmp);
142+
}
143+
iy1 += strideY;
144+
}
145+
ix1 += strideX;
146+
}
147+
return y;
148+
}
149+
kup1 = KU + 1;
150+
iy1 = offsetY;
151+
for (i1 = 0; i1 < ylen; i1++) {
152+
tmp = 0.0;
153+
ix1 = offsetX;
154+
oa = offsetA + (sa1 * i1);
155+
for (i0 = max(0, i1 - KU); i0 < min(xlen, i1 + KL + 1); i0++) {
156+
kup1 = i0 - i1;
157+
a_idx = oa + (kup1 + KU) * sa0;
158+
if (i0 < xlen && a_idx >= 0 ) {
159+
tmp += f32(A[a_idx] * x[i0]);
160+
}
161+
ix1 += strideX;
162+
}
163+
y[iy1] += f32(alpha * tmp);
164+
iy1 += strideY;
165+
}
166+
return y;
167+
}
168+
169+
170+
// EXPORTS //
171+
172+
module.exports = sgbmv;
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2024 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
/**
22+
* BLAS level 2 routine to perform one of the matrix-vector operations `y = α*A*x + β*y` or `y = α*A^T*x + β*y`, where `α` and `β` are scalars, `x` and `y` are vectors, and `A` is an `M` by `N` matrix.
23+
*
24+
* @module @stdlib/blas/base/sgbmv
25+
*
26+
* @example
27+
* var Float32Array = require( '@stdlib/array/float32' );
28+
* var sgbmv = require( '@stdlib/blas/base/sgbmv' );
29+
*
30+
* var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
31+
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
32+
* var y = new Float32Array( [ 1.0, 1.0 ] );
33+
*
34+
* sgbmv( 'row-major', 'no-transpose', 2, 3, 1.0, A, 3, x, 1, 1.0, y, 1 );
35+
* // y => <Float32Array>[ 7.0, 16.0 ]
36+
*
37+
* @example
38+
* var Float32Array = require( '@stdlib/array/float32' );
39+
* var sgbmv = require( '@stdlib/blas/base/sgbmv' );
40+
*
41+
* var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
42+
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
43+
* var y = new Float32Array( [ 1.0, 1.0 ] );
44+
*
45+
* sgbmv.ndarray( 'no-transpose', 2, 3, 1.0, A, 3, 1, 0, x, 1, 0, 1.0, y, 1, 0 );
46+
* // y => <Float32Array>[ 7.0, 16.0 ]
47+
*/
48+
49+
// MODULES //
50+
51+
var join = require( 'path' ).join;
52+
var tryRequire = require( '@stdlib/utils/try-require' );
53+
var isError = require( '@stdlib/assert/is-error' );
54+
var main = require( './main.js' );
55+
56+
57+
// MAIN //
58+
59+
var sgbmv;
60+
var tmp = tryRequire( join( __dirname, './native.js' ) );
61+
if ( isError( tmp ) ) {
62+
sgbmv = main;
63+
} else {
64+
sgbmv = tmp;
65+
}
66+
67+
68+
// EXPORTS //
69+
70+
module.exports = sgbmv;
71+
72+
// exports: { "ndarray": "sgbmv.ndarray" }
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2024 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' );
24+
var sgbmv = require( './sgbmv.js' );
25+
var ndarray = require( './ndarray.js' );
26+
27+
28+
// MAIN //
29+
30+
setReadOnly( sgbmv, 'ndarray', ndarray );
31+
32+
33+
// EXPORTS //
34+
35+
module.exports = sgbmv;

0 commit comments

Comments
 (0)