|
| 1 | +/** |
| 2 | +* @license Apache-2.0 |
| 3 | +* |
| 4 | +* Copyright (c) 2025 The Stdlib Authors. |
| 5 | +* |
| 6 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +* you may not use this file except in compliance with the License. |
| 8 | +* You may obtain a copy of the License at |
| 9 | +* |
| 10 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +* |
| 12 | +* Unless required by applicable law or agreed to in writing, software |
| 13 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +* See the License for the specific language governing permissions and |
| 16 | +* limitations under the License. |
| 17 | +*/ |
| 18 | + |
| 19 | +'use strict'; |
| 20 | + |
| 21 | +// MODULES // |
| 22 | + |
| 23 | +var dgemm = require( '@stdlib/blas/base/dgemm' ).ndarray; |
| 24 | +var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; |
| 25 | +var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' ); |
| 26 | +var dtrmm = require( './dtrmm.js' ); |
| 27 | + |
| 28 | + |
| 29 | +// MAIN // |
| 30 | + |
| 31 | +/** |
| 32 | +* Applies a real block reflector H or its transpose H^T to a real M by N matrix C from the left, with backward direction and row storage. |
| 33 | +* |
| 34 | +* @private |
| 35 | +* @param {NonNegativeInteger} M - number of rows of the matrix C |
| 36 | +* @param {NonNegativeInteger} N - number of columns of the matrix C |
| 37 | +* @param {NonNegativeInteger} K - order of the matrix T |
| 38 | +* @param {Float64Array} V - input matrix |
| 39 | +* @param {integer} strideV1 - stride of the first dimension of V |
| 40 | +* @param {integer} strideV2 - stride of the second dimension of V |
| 41 | +* @param {integer} offsetV - index offset for V |
| 42 | +* @param {Float64Array} T - input matrix |
| 43 | +* @param {integer} strideT1 - stride of the first dimension of T |
| 44 | +* @param {integer} strideT2 - stride of the second dimension of T |
| 45 | +* @param {integer} offsetT - index offset for T |
| 46 | +* @param {Float64Array} C - input matrix |
| 47 | +* @param {integer} strideC1 - stride of the first dimension of C |
| 48 | +* @param {integer} strideC2 - stride of the second dimension of C |
| 49 | +* @param {integer} offsetC - index offset for C |
| 50 | +* @param {Float64Array} work - work array |
| 51 | +* @param {integer} strideWork1 - stride of the first dimension of work |
| 52 | +* @param {integer} strideWork2 - stride of the second dimension of work |
| 53 | +* @param {integer} offsetWork - index offset for work |
| 54 | +* @param {string} trans - specifies whether to apply H or H^T |
| 55 | +* @returns {Float64Array} updated matrix C |
| 56 | +* |
| 57 | +* @example |
| 58 | +* var Float64Array = require( '@stdlib/array/float64' ); |
| 59 | +* |
| 60 | +* var V = new Float64Array( [ 10.0, 40.0, 70.0, 20.0, 50.0, 80.0, 30.0, 60.0, 90.0 ] ); |
| 61 | +* var T = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0 ] ); |
| 62 | +* var C = new Float64Array( [ 11.0, 12.0, 13.0, 21.0, 22.0, 23.0, 31.0, 32.0, 33.0 ] ); |
| 63 | +* var work = new Float64Array( 9 ); |
| 64 | +* |
| 65 | +* var result = leftBackwardRows( 3, 3, 3, V, 3, 1, 0, T, 3, 1, 0, C, 3, 1, 0, work, 3, 1, 0, 'no-transpose' ); |
| 66 | +* // returns <Float64Array>[ -155530.0, -164560.0, -173590.0, -292241.0, -308662.0, -325083.0, -4832.0, -5104.0, -5376.0 ] |
| 67 | +*/ |
| 68 | +function leftBackwardRows( M, N, K, V, strideV1, strideV2, offsetV, T, strideT1, strideT2, offsetT, C, strideC1, strideC2, offsetC, work, strideWork1, strideWork2, offsetWork, trans ) { // eslint-disable-line max-params, max-len |
| 69 | + var transt; |
| 70 | + var da0; |
| 71 | + var da1; |
| 72 | + var db0; |
| 73 | + var db1; |
| 74 | + var i0; |
| 75 | + var i1; |
| 76 | + var sh; |
| 77 | + var sa; |
| 78 | + var sb; |
| 79 | + var S0; |
| 80 | + var S1; |
| 81 | + var ia; |
| 82 | + var ib; |
| 83 | + var ic; |
| 84 | + var iv; |
| 85 | + var iw; |
| 86 | + var j; |
| 87 | + var o; |
| 88 | + |
| 89 | + // Quick return if possible |
| 90 | + if ( M <= 0 || N <= 0 ) { |
| 91 | + return C; |
| 92 | + } |
| 93 | + |
| 94 | + if ( trans === 'no-transpose' ) { |
| 95 | + transt = 'transpose'; |
| 96 | + } else { |
| 97 | + transt = 'no-transpose'; |
| 98 | + } |
| 99 | + |
| 100 | + /* Let V = ( V1 V2 ) (V2: last K columns) |
| 101 | + * Where V2 is unit lower triangular. |
| 102 | + */ |
| 103 | + /* Form H * C or H^T * C where C = ( C1 ) |
| 104 | + * ( C2 ) |
| 105 | + * W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) |
| 106 | + * W := C2**T |
| 107 | + */ |
| 108 | + ic = offsetC; |
| 109 | + iw = offsetWork; |
| 110 | + for ( j = 0; j < K; j++ ) { |
| 111 | + dcopy( N, C, strideC2, ic, work, strideWork1, iw ); |
| 112 | + ic += strideC1; |
| 113 | + iw += strideWork2; |
| 114 | + } |
| 115 | + // W := W * V2**T |
| 116 | + iv = offsetV + ( (M-K) * strideV2 ); |
| 117 | + dtrmm( 'right', 'lower', 'transpose', 'unit', N, K, 1.0, V, strideV1, strideV2, iv, work, strideWork1, strideWork2, offsetWork ); |
| 118 | + if ( M > K ) { |
| 119 | + // W := W + C1**T * V1**T |
| 120 | + dgemm( 'transpose', 'transpose', N, K, M-K, 1.0, C, strideC1, strideC2, offsetC, V, strideV1, strideV2, offsetV, 1.0, work, strideWork1, strideWork2, offsetWork ); |
| 121 | + } |
| 122 | + // W := W * T**T or W * T |
| 123 | + dtrmm( 'right', 'lower', transt, 'non-unit', N, K, 1.0, T, strideT1, strideT2, offsetT, work, strideWork1, strideWork2, offsetWork ); |
| 124 | + |
| 125 | + // C := C - V**T * W**T |
| 126 | + if ( M > K ) { |
| 127 | + // C1 := C1 - V1**T * W**T |
| 128 | + dgemm( 'transpose', 'transpose', M-K, N, K, -1.0, V, strideV1, strideV2, offsetV, work, strideWork1, strideWork2, offsetWork, 1.0, C, strideC1, strideC2, offsetC ); |
| 129 | + } |
| 130 | + // W := W * V2 |
| 131 | + iv = offsetV + ( (M-K) * strideV2 ); |
| 132 | + dtrmm( 'right', 'lower', 'no-transpose', 'unit', N, K, 1.0, V, strideV1, strideV2, iv, work, strideWork1, strideWork2, offsetWork ); |
| 133 | + |
| 134 | + // C2 := C2 - W**T |
| 135 | + o = loopOrder( [ K, N ], [ strideC1, strideC2 ], [ strideWork2, strideWork1 ] ); // eslint-disable-line max-len |
| 136 | + sh = o.sh; |
| 137 | + sa = o.sx; |
| 138 | + sb = o.sy; |
| 139 | + |
| 140 | + // Extract loop variables for loop interchange |
| 141 | + S0 = sh[ 0 ]; |
| 142 | + S1 = sh[ 1 ]; |
| 143 | + da0 = sa[ 0 ]; |
| 144 | + da1 = sa[ 1 ] - ( S0 * sa[ 0 ] ); |
| 145 | + db0 = sb[ 0 ]; |
| 146 | + db1 = sb[ 1 ] - ( S0 * sb[ 0 ] ); |
| 147 | + |
| 148 | + // Set pointers to first indexed elements |
| 149 | + ia = offsetC + ( (M-K) * strideC1 ); |
| 150 | + ib = offsetWork; |
| 151 | + |
| 152 | + // Iterate over matrix dimensions with optimized loop order |
| 153 | + for ( i1 = 0; i1 < S1; i1++ ) { |
| 154 | + for ( i0 = 0; i0 < S0; i0++ ) { |
| 155 | + C[ ia ] -= work[ ib ]; |
| 156 | + ia += da0; |
| 157 | + ib += db0; |
| 158 | + } |
| 159 | + ia += da1; |
| 160 | + ib += db1; |
| 161 | + } |
| 162 | + |
| 163 | + return C; |
| 164 | +} |
| 165 | + |
| 166 | + |
| 167 | +// EXPORTS // |
| 168 | + |
| 169 | +module.exports = leftBackwardRows; |
0 commit comments