Skip to content

Commit af8ee93

Browse files
committed
chore: add implementation
1 parent bf45d08 commit af8ee93

39 files changed

+2837
-270
lines changed

lib/node_modules/@stdlib/blas/base/strsm/docs/types/index.d.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ interface Routine {
5050
* var B = new Float32Array( [ 5.0, 7.0, 0.0, 8.0 ] );
5151
*
5252
* strsm( 'row-major', 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, B, 2 );
53-
* // B => <Float32Array>[ 30.0, 6.0, 0.0, 12.0 ]
53+
* // B => <Float32Array>[ 30.0, 0.0, 0.0, 12.0 ]
5454
*/
5555
( order: Layout, side: OperationSide, uplo: MatrixTriangle, transa: TransposeOperation, diag: DiagonalType, m: number, n: number, alpha: number, A: Float32Array, LDA: number, B: Float32Array, LDB: number ): Float32Array;
5656

@@ -81,7 +81,7 @@ interface Routine {
8181
* var B = new Float32Array( [ 0.0, 5.0, 7.0, 0.0, 8.0 ] );
8282
*
8383
* strsm.ndarray( 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, 1, 2, B, 2, 1, 1 );
84-
* // B => <Float32Array>[ 0.0, 30.0, 6.0, 0.0, 12.0 ]
84+
* // B => <Float32Array>[ 0.0, 30.0, 0.0, 0.0, 12.0 ]
8585
*/
8686
ndarray( side: OperationSide, uplo: MatrixTriangle, transa: TransposeOperation, diag: DiagonalType, m: number, n: number, alpha: number, A: Float32Array, strideA1: number, strideA2: number, offsetA: number, B: Float32Array, strideB1: number, strideB2: number, offsetB: number ): Float32Array;
8787
}
@@ -110,7 +110,7 @@ interface Routine {
110110
* var B = new Float32Array( [ 5.0, 7.0, 0.0, 8.0 ] );
111111
*
112112
* strsm( 'row-major', 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, B, 2 );
113-
* // B => <Float32Array>[ 30.0, 6.0, 0.0, 12.0 ]
113+
* // B => <Float32Array>[ 30.0, 0.0, 0.0, 12.0 ]
114114
*
115115
* @example
116116
* var Float32Array = require( '@stdlib/array/float32' );
@@ -119,7 +119,7 @@ interface Routine {
119119
* var B = new Float32Array( [ 0.0, 5.0, 7.0, 0.0, 8.0 ] );
120120
*
121121
* strsm.ndarray( 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, 1, 2, B, 2, 1, 1 );
122-
* // B => <Float32Array>[ 0.0, 30.0, 6.0, 0.0, 12.0 ]
122+
* // B => <Float32Array>[ 0.0, 30.0, 0.0, 0.0, 12.0 ]
123123
*/
124124
declare var strsm: Routine;
125125

lib/node_modules/@stdlib/blas/base/strsm/lib/base.js

Lines changed: 132 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ function zeros( M, N, X, strideX1, strideX2, offsetX ) { // TODO: consider movin
117117
* var Float32Array = require( '@stdlib/array/float32' );
118118
*
119119
* var A = new Float32Array( [ 1.0, 3.0, 0.0, 4.0 ] );
120-
* var B = new Float32Array( [ 5.0, 7.0, 0.0, 8.0 ] );
120+
* var B = new Float32Array( [ 5.0, 6.0, 0.0, 8.0 ] );
121121
*
122122
* strsm( 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, 1, 0, B, 2, 1, 0 );
123-
* // B => <Float32Array>[ 30.0, 6.0, 0.0, 12.0 ]
123+
* // B => <Float32Array>[ 30.0, 0.0, 0.0, 12.0 ]
124124
*/
125125
function strsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB ) { // eslint-disable-line max-params
126126
var nonunit;
@@ -171,123 +171,146 @@ function strsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
171171
zeros( M, N, B, sb0, sb1, offsetB );
172172
return B;
173173
}
174-
if ( side === 'left' ) {
175-
if ( transa === 'no-transpose' ) {
176-
// B := alpha * inv( A ) * B
177-
if ( uplo === 'upper' ) {
178-
for ( j = 0; j < N; j++ ) {
179-
ob = offsetB + ( j * sb0 );
180-
if ( alpha !== 1.0 ) {
181-
for ( i = 0; i < M; i++ ) {
182-
B[ ob + ( i * sb1 ) ] = f32( B[ ob + ( i * sb1 ) ] * alpha );
183-
}
184-
}
185-
for ( k = M - 1; k >= 0; k-- ) {
186-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
187-
ob2 = ob + ( k * sb1 );
188-
if ( B[ ob2 ] !== 0.0 ) {
189-
if ( nonunit ) {
190-
B[ ob2 ] = f32( B[ ob2 ] / A[ oa2 ] );
191-
}
192-
for ( i = 0; i < k; i++ ) {
193-
B[ ob + ( i * sb1 ) ] -= f32( B[ ob2 ] * A[ offsetA + ( i * sa1 ) + ( k * sa0 ) ] );
194-
}
195-
}
196-
}
174+
if (
175+
( isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' ) ||
176+
( !isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' )
177+
) {
178+
for ( k = N - 1; k >= 0; k-- ) {
179+
if ( nonunit ) {
180+
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
181+
tmp = f32( 1.0 / A[ oa2 ] );
182+
for ( i = 0; i < M; i++ ) {
183+
ob2 = offsetB + ( i * sb0 ) + ( k * sb1 );
184+
B[ ob2 ] = f32( B[ ob2 ] * tmp );
197185
}
198-
return B;
199186
}
200-
// Lower
201-
for ( j = 0; j < N; j++ ) {
202-
ob = offsetB + ( j * sb0 );
203-
if ( alpha !== 1.0 ) {
187+
for ( j = 0; j < k; j++ ) {
188+
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
189+
if ( A[ oa2 ] !== 0.0 ) {
204190
for ( i = 0; i < M; i++ ) {
205-
B[ ob + ( i * sb1 ) ] = f32( B[ ob + ( i * sb1 ) ] * alpha );
191+
ob = offsetB + ( i * sb0 );
192+
B[ ob + j * sb1 ] -= f32( A[ oa2 ] * B[ ob + k * sb1 ] );
206193
}
207194
}
208-
for ( k = 0; k < M; k++ ) {
209-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
210-
ob2 = ob + ( k * sb1 );
211-
if ( B[ ob2 ] !== 0.0 ) {
212-
if ( nonunit ) {
213-
B[ ob2 ] = f32( B[ ob2 ] / A[ oa2 ] );
214-
}
215-
for ( i = k + 1; i < M; i++ ) {
216-
oa2 = offsetA + i * sa1 + k * sa0;
217-
B[ ob + ( i * sb1 ) ] -= f32( B[ ob2 ] * A[ oa2 ] );
218-
}
219-
}
195+
}
196+
if ( alpha !== 1.0 ) {
197+
for ( i = 0; i < M; i++ ) {
198+
ob2 = offsetB + ( i * sb0 ) + ( k * sb1 );
199+
B[ ob2 ] = f32( B[ ob2 ] * alpha );
220200
}
221201
}
222-
return B;
223202
}
224-
// B := alpha * inv( A**T ) * B
225-
if ( uplo === 'upper' ) {
226-
for ( j = 0; j < N; j++ ) {
227-
ob = offsetB + ( j * sb0 );
203+
return B;
204+
}
205+
if (
206+
( isrma && side === 'left' && uplo === 'lower' && transa === 'no-transpose' ) ||
207+
( !isrma && side === 'right' && uplo === 'upper' && transa === 'no-transpose' )
208+
) {
209+
for ( j = 0; j < N; j++ ) {
210+
ob = offsetB + ( j * sb0 );
211+
if ( alpha !== 1.0 ) {
228212
for ( i = 0; i < M; i++ ) {
229-
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
230-
tmp = f32( B[ ob + ( i * sb1 ) ] * alpha );
231-
for ( k = 0; k < i; k++ ) {
232-
oa = offsetA + ( k * sa1 );
233-
tmp -= f32( A[ oa + ( i * sa0 ) ] * B[ ob + ( k * sb1 ) ] );
234-
}
213+
B[ ob + ( i * sb1 ) ] = f32( B[ ob + ( i * sb1 ) ] * alpha );
214+
}
215+
}
216+
for ( k = 0; k < M; k++ ) {
217+
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
218+
ob2 = ob + ( k * sb1 );
219+
if ( B[ ob2 ] !== 0.0 ) {
235220
if ( nonunit ) {
236-
tmp = f32( tmp / A[ oa2 ] );
221+
B[ ob2 ] = f32( B[ ob2 ] / A[ oa2 ] );
222+
}
223+
for ( i = k + 1; i < M; i++ ) {
224+
oa2 = offsetA + ( i * sa1 ) + ( k * sa0 );
225+
B[ ob + ( i * sb1 ) ] -= f32( B[ ob2 ] * A[ oa2 ] );
226+
}
227+
}
228+
}
229+
}
230+
return B;
231+
}
232+
if (
233+
( isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' ) ||
234+
( !isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' )
235+
) {
236+
for ( j = 0; j < N; j++ ) {
237+
for ( i = 0; i < M; i++ ) {
238+
ob2 = offsetB + ( i * sb0 ) + ( j * sb1 );
239+
if ( alpha !== 1.0 ) {
240+
B[ ob2 ] = f32( B[ ob2 ] * alpha );
241+
}
242+
}
243+
for ( k = 0; k < j; k++ ) {
244+
for ( i = 0; i < M; i++ ) {
245+
ob = offsetB + ( i * sb0 );
246+
oa2 = offsetA + ( k * sa1 ) + ( j * sa0 );
247+
if ( A[ oa2 ] !== 0.0 ) {
248+
B[ ob + ( j * sb1 ) ] -= f32( A[ oa2 ] * B[ ob + ( k * sb1 ) ] );
237249
}
238-
B[ ob + ( i * sb1 ) ] = tmp;
239250
}
240251
}
241-
return B;
252+
if ( nonunit ) {
253+
oa2 = offsetA + ( j * sa1 ) + (j * sa0 );
254+
tmp = f32( 1.0 / A[ oa2 ] );
255+
for ( i = 0; i < M; i++ ) {
256+
ob2 = offsetB + ( i * sb0 ) + ( j * sb1 );
257+
B[ ob2 ] = f32( B[ ob2 ] * tmp );
258+
}
259+
}
242260
}
243-
// Lower
261+
return B;
262+
}
263+
if (
264+
( isrma && side === 'left' && uplo === 'lower' && transa !== 'no-transpose' ) ||
265+
( !isrma && side === 'right' && uplo === 'upper' && transa === 'transpose' )
266+
) {
244267
for ( j = 0; j < N; j++ ) {
245268
ob = offsetB + ( j * sb0 );
269+
if ( alpha !== 1.0 ) {
270+
for ( i = 0; i < M; i++ ) {
271+
B[ ob + ( i * sb1 ) ] = f32( B[ ob + ( i * sb1 ) ] * alpha );
272+
}
273+
}
246274
for ( i = M - 1; i >= 0; i-- ) {
247-
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
248-
tmp = f32( B[ ob + ( i * sb1 ) ] * alpha );
275+
ob2 = ob + ( i * sb1 );
249276
for ( k = i + 1; k < M; k++ ) {
250-
tmp -= f32( A[ offsetA + ( k * sa1 ) + ( i * sa0 ) ] * B[ ob + ( k * sb1 ) ] );
277+
oa2 = offsetA + ( i * sa0 ) + ( k * sa1 );
278+
B[ ob2 ] -= f32( A[ oa2 ] * B[ ob + ( k * sb1 ) ] );
251279
}
252280
if ( nonunit ) {
253-
tmp = f32( tmp / A[ oa2 ] );
281+
oa2 = offsetA + ( i * sa0 ) + ( i * sa1 );
282+
B[ ob2 ] = f32( B[ ob2 ] / A[ oa2 ] );
254283
}
255-
B[ ob + ( i * sb1 ) ] = tmp;
284+
B[ ob + ( i * sb1 ) ] = B[ ob2 ];
256285
}
257286
}
258287
return B;
259288
}
260-
// Right
261-
if ( transa === 'no-transpose' ) {
262-
if ( uplo === 'upper' ) {
263-
for ( j = 0; j < N; j++ ) {
264-
for ( i = 0; i < M; i++ ) {
265-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
266-
if ( alpha !== 1.0 ) {
267-
B[ ob2 ] = f32( B[ ob2 ] * alpha );
268-
}
269-
}
270-
for ( k = 0; k < j; k++ ) {
271-
for ( i = 0; i < M; i++ ) {
272-
ob2 = offsetB + i * sb1;
273-
oa2 = offsetA + k * sa1 + j * sa0;
274-
if ( A[ oa2 ] !== 0.0 ) {
275-
B[ ob2 + j * sb0 ] -= f32( A[ oa2 ] * B[ ob2 + k * sb0 ] );
276-
}
277-
}
289+
if (
290+
( isrma && side === 'right' && uplo === 'upper' && transa === 'no-transpose' ) ||
291+
( !isrma && side === 'left' && uplo === 'lower' && transa === 'no-transpose' )
292+
) {
293+
for ( j = 0; j < N; j++ ) {
294+
ob = offsetB + ( j * sb1 );
295+
for ( i = 0; i < M; i++ ) {
296+
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
297+
tmp = f32( B[ ob + ( i * sb0 ) ] * alpha );
298+
for ( k = 0; k < i; k++ ) {
299+
oa = offsetA + ( k * sa1 );
300+
tmp -= f32( A[ oa + ( i * sa0 ) ] * B[ ob + ( k * sb0 ) ] );
278301
}
279302
if ( nonunit ) {
280-
oa2 = offsetA + j * sa1 + j * sa0;
281-
tmp = f32( 1.0 / A[ oa2 ] );
282-
for ( i = 0; i < M; i++ ) {
283-
ob2 = offsetB + i * sb1 + j * sb0;
284-
B[ ob2 ] = f32( B[ ob2 ] * tmp );
285-
}
303+
tmp = f32( tmp / A[ oa2 ] );
286304
}
305+
B[ ob + ( i * sb0 ) ] = tmp;
287306
}
288-
return B;
289307
}
290-
// Lower
308+
return B;
309+
}
310+
if (
311+
( isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' ) ||
312+
( !isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' )
313+
) {
291314
for ( j = N - 1; j >= 0; j-- ) {
292315
for ( i = 0; i < M; i++ ) {
293316
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
@@ -315,36 +338,34 @@ function strsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
315338
}
316339
return B;
317340
}
318-
// B := alpha * B * inv( A**T )
319-
if ( uplo === 'upper' ) {
320-
for ( k = N - 1; k >= 0; k-- ) {
321-
if ( nonunit ) {
322-
oa2 = offsetA + k * sa1 + k * sa0;
323-
tmp = f32( 1.0 / A[ oa2 ] );
341+
if (
342+
( isrma && side === 'right' && uplo === 'upper' && transa !== 'no-transpose' ) ||
343+
( !isrma && side === 'left' && uplo === 'lower' && transa !== 'no-transpose' )
344+
) {
345+
for ( j = 0; j < N; j++ ) {
346+
ob = offsetB + ( j * sb1 );
347+
if ( alpha !== 1.0 ) {
324348
for ( i = 0; i < M; i++ ) {
325-
ob2 = offsetB + i * sb1 + k * sb0;
326-
B[ ob2 ] = f32( B[ ob2 ] * tmp );
349+
B[ ob + ( i * sb0 ) ] = f32( B[ ob + ( i * sb0 ) ] * alpha );
327350
}
328351
}
329-
for ( j = 0; j < k; j++ ) {
330-
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
331-
if ( A[ oa2 ] !== 0.0 ) {
332-
for ( i = 0; i < M; i++ ) {
333-
ob = offsetB + i * sb1;
334-
B[ ob + j * sb0 ] -= f32( A[ oa2 ] * B[ ob + k * sb0 ] );
352+
for ( k = M - 1; k >= 0; k-- ) {
353+
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
354+
ob2 = ob + ( k * sb0 );
355+
if ( B[ ob2 ] !== 0.0 ) {
356+
if ( nonunit ) {
357+
B[ ob2 ] = f32( B[ ob2 ] / A[ oa2 ] );
358+
}
359+
for ( i = 0; i < k; i++ ) {
360+
oa = offsetA + ( i * sa1 ) + ( k * sa0 );
361+
B[ ob + ( i * sb0 ) ] -= f32( B[ ob2 ] * A[ oa ] );
335362
}
336-
}
337-
}
338-
if ( alpha !== 1.0 ) {
339-
for ( i = 0; i < M; i++ ) {
340-
ob2 = offsetB + i * sb1 + k * sb0;
341-
B[ ob2 ] = f32( B[ ob2 ] * alpha );
342363
}
343364
}
344365
}
345366
return B;
346367
}
347-
// Lower
368+
// ( isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) || ( !isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' )
348369
for ( k = 0; k < N; k++ ) {
349370
ob = offsetB + ( k * sb0 );
350371
oa = offsetA + ( k * sa1 );
@@ -360,7 +381,7 @@ function strsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
360381
oa2 = offsetA + j * sa1 + k * sa0;
361382
if ( A[ oa2 ] !== 0.0 ) {
362383
for ( i = 0; i < M; i++ ) {
363-
B[ ob2 + ( i * sb1 ) ] -= f32( a_jk * B[ ob + ( i * sb1 ) ] );
384+
B[ ob2 + ( i * sb1 ) ] -= f32( A[ oa2 ] * B[ ob + ( i * sb1 ) ] );
364385
}
365386
}
366387
}

lib/node_modules/@stdlib/blas/base/strsm/lib/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
* var B = new Float32Array( [ 5.0, 7.0, 0.0, 8.0 ] );
3232
*
3333
* strsm( 'row-major', 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, B, 2 );
34-
* // B => <Float32Array>[ 30.0, 6.0, 0.0, 12.0 ]
34+
* // B => <Float32Array>[ 30.0, 0.0, 0.0, 12.0 ]
3535
*
3636
* @example
3737
* var Float32Array = require( '@stdlib/array/float32' );

lib/node_modules/@stdlib/blas/base/strsm/lib/ndarray.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ var base = require( './base.js' );
6565
* var B = new Float32Array( [ 0.0, 5.0, 7.0, 0.0, 8.0 ] );
6666
*
6767
* strsm( 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, 1, 2, B, 2, 1, 1 );
68-
* // B => <Float32Array>[ 0.0, 30.0, 6.0, 0.0, 12.0 ]
68+
* // B => <Float32Array>[ 0.0, 30.0, 0.0, 0.0, 12.0 ]
6969
*/
7070
function strsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB ) { // eslint-disable-line max-len, max-params
7171
if ( !isOperationSide( side ) ) {

lib/node_modules/@stdlib/blas/base/strsm/lib/strsm.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ var base = require( './base.js' );
6666
* var B = new Float32Array( [ 5.0, 7.0, 0.0, 8.0 ] );
6767
*
6868
* strsm( 'row-major', 'left', 'upper', 'no-transpose', 'non-unit', 2, 2, 6.0, A, 2, B, 2 );
69-
* // B => <Float32Array>[ 30.0, 6.0, 0.0, 12.0 ]
69+
* // B => <Float32Array>[ 30.0, 0.0, 0.0, 12.0 ]
7070
*/
7171
function strsm( order, side, uplo, transa, diag, M, N, alpha, A, LDA, B, LDB ) { // eslint-disable-line max-params
7272
var nrowsa;
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"order": "column-major",
3+
"side": "left",
4+
"uplo": "lower",
5+
"transA": "no-transpose",
6+
"diag": "non-unit",
7+
"M": 2,
8+
"N": 2,
9+
"alpha": 1.0,
10+
"A": [ 1.0, 2.0, 0.0, 3.0 ],
11+
"lda": 2,
12+
"strideA1": 1,
13+
"strideA2": 2,
14+
"offsetA": 0,
15+
"B": [ 4.0, 2.0, 1.0, 2.0 ],
16+
"ldb": 2,
17+
"strideB1": 1,
18+
"strideB2": 2,
19+
"offsetB": 0,
20+
"B_out": [ 4.0, -2.0, 1.0, 0.0 ]
21+
}

0 commit comments

Comments
 (0)