Skip to content

Commit 463d580

Browse files
committed
refactor: reduce loops and arithmetic operations
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 6d77297 commit 463d580

File tree

1 file changed

+74
-72
lines changed
  • lib/node_modules/@stdlib/blas/base/dtrmm/lib

1 file changed

+74
-72
lines changed

lib/node_modules/@stdlib/blas/base/dtrmm/lib/base.js

Lines changed: 74 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,15 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
131131
var sa1;
132132
var sb0;
133133
var sb1;
134+
var oa;
134135
var ob;
136+
var ia;
137+
var ib;
135138
var i;
136139
var j;
137140
var k;
138141

139-
// Note on variable naming convention: sa#, ix#, i# where # corresponds to the loop number, with `0` being the innermost loop...
142+
// Note on variable naming convention: sa#, sb# where # corresponds to the loop number, with `0` being the innermost loop...
140143

141144
isrma = isRowMajor( [ strideA1, strideA2 ] );
142145
nonunit = ( diag === 'non-unit' );
@@ -172,10 +175,10 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
172175
tmp = alpha * B[ ob2 ];
173176
ia = offsetA + ( k*sa0 );
174177
for ( i = 0; i < k; i++ ) {
175-
B[ ib+(i*sb1) ] += ( tmp * A[ ia+(i*sa1) ] );
178+
B[ ib + ( i*sb1 ) ] += ( tmp * A[ ia + ( i*sa1 ) ] );
176179
}
177180
if ( nonunit ) {
178-
tmp *= A[ ia+(k*sa1) ];
181+
tmp *= A[ ia + ( k*sa1 ) ];
179182
}
180183
B[ ob2 ] = tmp;
181184
}
@@ -187,17 +190,17 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
187190
( !isrma && side === 'right' && uplo === 'upper' && transa === 'no-transpose' )
188191
) {
189192
for ( j = 0; j < N; j++ ) {
193+
ib = offsetB + ( j*sb0 );
190194
for ( k = M - 1; k >= 0; k-- ) {
191-
ob2 = offsetB + ( k * sb1 ) + ( j * sb0 );
195+
ob2 = ib + ( k*sb1 );
192196
tmp = alpha * B[ ob2 ];
197+
ia = offsetA + ( k*sa0 );
193198
for ( i = k + 1; i < M; i++ ) {
194-
oa2 = offsetA + ( i * sa1 ) + ( k * sa0 );
195-
ob = offsetB + ( i * sb1 ) + ( j * sb0 );
196-
B[ ob ] += ( tmp * A[ oa2 ] );
199+
oa2 = ia + ( i*sa1 );
200+
B[ ib + ( i*sb1 ) ] += ( tmp * A[ oa2 ] );
197201
}
198202
if ( nonunit ) {
199-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
200-
tmp *= A[ oa2 ];
203+
tmp *= A[ ia + ( k*sa1 ) ];
201204
}
202205
B[ ob2 ] = tmp;
203206
}
@@ -209,19 +212,20 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
209212
( !isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' )
210213
) {
211214
for ( j = 0; j < N; j++ ) {
215+
ib = offsetB + ( j*sb0 );
212216
for ( i = M - 1; i >= 0; i-- ) {
213-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
217+
ob2 = ib + ( i*sb1 );
214218
tmp = 0.0;
215-
for ( k = 0; k < i; k++ ) {
216-
oa2 = offsetA + ( k * sa1 ) + ( i * sa0 );
217-
tmp += A[ oa2 ] * B[ offsetB + ( k * sb1 ) + ( j * sb0 ) ];
218-
}
219+
ia = offsetA + ( i*sa0 );
219220
if ( nonunit ) {
220-
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
221-
tmp += ( A[ oa2 ] * B[ ob2 ] );
221+
tmp += ( A[ ia + ( i*sa1 ) ] * B[ ob2 ] );
222222
} else {
223223
tmp += B[ ob2 ];
224224
}
225+
for ( k = 0; k < i; k++ ) {
226+
oa2 = ia + ( k*sa1 );
227+
tmp += A[ oa2 ] * B[ ib + ( k*sb1 ) ];
228+
}
225229
B[ ob2 ] = alpha * tmp;
226230
}
227231
}
@@ -232,16 +236,16 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
232236
( !isrma && side === 'right' && uplo === 'upper' && transa === 'transpose' )
233237
) {
234238
for ( j = 0; j < N; j++ ) {
239+
ib = offsetB + ( j * sb0 );
235240
for ( i = 0; i < M; i++ ) {
236-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
241+
ia = offsetA + ( i * sa0 );
242+
ob2 = ib + ( i * sb1 );
237243
tmp = 0.0;
238244
for ( k = i + 1; k < M; k++ ) {
239-
oa2 = offsetA + ( k * sa1 ) + ( i * sa0 );
240-
tmp += A[ oa2 ] * B[ offsetB + ( k * sb1 ) + ( j * sb0 ) ];
245+
tmp += A[ ia + ( k * sa1 ) ] * B[ ib + ( k * sb1 ) ];
241246
}
242247
if ( nonunit ) {
243-
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
244-
tmp += ( A[ oa2 ] * B[ ob2 ] );
248+
tmp += ( A[ ia + ( i * sa1 ) ] * B[ ob2 ] );
245249
} else {
246250
tmp += B[ ob2 ];
247251
}
@@ -255,25 +259,22 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
255259
( !isrma && side === 'left' && uplo === 'lower' && transa === 'no-transpose' )
256260
) {
257261
for ( j = N - 1; j >= 0; j-- ) {
258-
if ( nonunit ) {
259-
oa2 = offsetA + ( j * sa1 ) + ( j * sa0 );
260-
tmp = A[ oa2 ];
261-
for ( i = 0; i < M; i++ ) {
262-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
263-
B[ ob2 ] *= tmp;
264-
}
265-
}
262+
ia = offsetA + ( j*sa0 );
263+
ib = offsetB + ( j*sb0 );
266264
for ( i = 0; i < M; i++ ) {
267-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
268-
B[ ob2 ] *= alpha;
269-
}
270-
for ( k = 0; k < j; k++ ) {
271-
oa2 = offsetA + ( k * sa1 ) + ( j * sa0 );
272-
if ( A[ oa2 ] !== 0.0 ) {
273-
tmp = alpha * A[ oa2 ];
274-
for ( i = 0; i < M; i++ ) {
275-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
276-
B[ ob2 ] += ( tmp * B[ offsetB + ( i * sb1 ) + ( k * sb0 ) ] );
265+
ob = ib + ( i*sb1 );
266+
B[ ob ] *= alpha;
267+
if ( nonunit ) {
268+
oa2 = ia + ( j*sa1 );
269+
tmp = A[ oa2 ];
270+
B[ ob ] *= tmp;
271+
}
272+
for ( k = 0; k < j; k++ ) {
273+
oa2 = ia + ( k*sa1 );
274+
ob2 = offsetB + ( k*sb0 );
275+
if ( A[ oa2 ] !== 0.0 ) {
276+
tmp = alpha * A[ oa2 ];
277+
B[ ob ] += ( tmp * B[ ob2 + ( i*sb1 ) ] );
277278
}
278279
}
279280
}
@@ -285,25 +286,21 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
285286
( !isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' )
286287
) {
287288
for ( j = 0; j < N; j++ ) {
288-
if ( nonunit ) {
289-
oa2 = offsetA + ( j * sa1 ) + ( j * sa0 );
290-
tmp = A[ oa2 ];
291-
for ( i = 0; i < M; i++ ) {
292-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
293-
B[ ob2 ] *= tmp;
294-
}
295-
}
289+
ia = offsetA + ( j*sa0 );
296290
for ( i = 0; i < M; i++ ) {
297-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
298-
B[ ob2 ] *= alpha;
299-
}
300-
for ( k = j + 1; k < N; k++ ) {
301-
oa2 = offsetA + ( k * sa1 ) + ( j * sa0 );
302-
if ( A[ oa2 ] !== 0.0 ) {
303-
tmp = alpha * A[ oa2 ];
304-
for ( i = 0; i < M; i++ ) {
305-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
306-
B[ ob2 ] += ( tmp * B[ offsetB + ( i * sb1 ) + ( k * sb0 ) ] );
291+
ib = offsetB + ( i*sb1 );
292+
ob = ib + ( j*sb0 );
293+
B[ ob ] *= alpha;
294+
if ( nonunit ) {
295+
oa = ia + ( j*sa1 );
296+
B[ ob ] *= A[ oa ];
297+
}
298+
for ( k = j + 1; k < N; k++ ) {
299+
oa2 = ia + ( k*sa1 );
300+
ob2 = ib + ( k*sb0 );
301+
if ( A[ oa2 ] !== 0.0 ) {
302+
tmp = alpha * A[ oa2 ];
303+
B[ ob ] += ( tmp * B[ ob2 ] );
307304
}
308305
}
309306
}
@@ -315,39 +312,44 @@ function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
315312
( !isrma && side === 'left' && uplo === 'lower' && transa !== 'no-transpose' )
316313
) {
317314
for ( j = 0; j < N; j++ ) {
315+
ia = offsetA + ( j*sa1 );
318316
for ( i = 0; i < M; i++ ) {
319-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
320-
oa2 = offsetA + ( j * sa1 ) + ( j * sa0 );
317+
ib = offsetB + ( i*sb1 );
318+
oa = ia + ( j*sa0 );
319+
ob = ib + ( j*sb0 );
321320
if ( nonunit ) {
322-
tmp = B[ ob2 ] * A[ oa2 ];
321+
tmp = B[ ob ] * A[ oa ];
323322
} else {
324-
tmp = B[ ob2 ];
323+
tmp = B[ ob ];
325324
}
326325
for ( k = j + 1; k < N; k++ ) {
327-
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
328-
tmp += ( B[ offsetB + ( i * sb1 ) + ( k * sb0 ) ] * A[ oa2 ] );
326+
oa2 = ia + ( k*sa0 );
327+
ob2 = ib + ( k*sb0 );
328+
tmp += ( B[ ob2 ] * A[ oa2 ] );
329329
}
330-
B[ ob2 ] = alpha * tmp;
330+
B[ ob ] = alpha * tmp;
331331
}
332332
}
333333
return B;
334334
}
335335
// ( isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) || ( !isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' )
336336
for ( i = 0; i < M; i++ ) {
337+
ib = offsetB + ( i*sb1 );
337338
for ( j = N - 1; j >= 0; j-- ) {
338-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
339-
oa2 = offsetA + ( j * sa1 ) + ( j * sa0 );
339+
ia = offsetA + ( j*sa1 );
340+
oa = ia + ( j*sa0 );
341+
ob = ib + ( j*sb0 );
340342
if ( nonunit ) {
341-
tmp = B[ ob2 ] * A[ oa2 ];
343+
tmp = B[ ob ] * A[ oa ];
342344
} else {
343-
tmp = B[ ob2 ];
345+
tmp = B[ ob ];
344346
}
345347
for ( k = 0; k < j; k++ ) {
346-
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
347-
tmp += ( B[ offsetB + ( i * sb1 ) + ( k * sb0 ) ] * A[ oa2 ] );
348+
oa2 = ia + ( k*sa0 );
349+
ob2 = ib + ( k*sb0 );
350+
tmp += ( B[ ob2 ] * A[ oa2 ] );
348351
}
349-
350-
B[ ob2 ] = alpha * tmp;
352+
B[ ob ] = alpha * tmp;
351353
}
352354
}
353355
return B;

0 commit comments

Comments
 (0)