|  | 
|  | 1 | +/** | 
|  | 2 | +* @license Apache-2.0 | 
|  | 3 | +* | 
|  | 4 | +* Copyright (c) 2025 The Stdlib Authors. | 
|  | 5 | +* | 
|  | 6 | +* Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 7 | +* you may not use this file except in compliance with the License. | 
|  | 8 | +* You may obtain a copy of the License at | 
|  | 9 | +* | 
|  | 10 | +*    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | +* | 
|  | 12 | +* Unless required by applicable law or agreed to in writing, software | 
|  | 13 | +* distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 14 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 15 | +* See the License for the specific language governing permissions and | 
|  | 16 | +* limitations under the License. | 
|  | 17 | +*/ | 
|  | 18 | + | 
|  | 19 | +'use strict'; | 
|  | 20 | + | 
|  | 21 | +// MODULES // | 
|  | 22 | + | 
|  | 23 | +var dgemm = require( '@stdlib/blas/base/dgemm' ).ndarray; | 
|  | 24 | +var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; | 
|  | 25 | +var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' ); | 
|  | 26 | +var dtrmm = require( './dtrmm.js' ); | 
|  | 27 | + | 
|  | 28 | + | 
|  | 29 | +// MAIN // | 
|  | 30 | + | 
|  | 31 | +/** | 
|  | 32 | +* Applies a real block reflector H or its transpose H^T to a real M by N matrix C from the left, with backward direction and row storage. | 
|  | 33 | +* | 
|  | 34 | +* @private | 
|  | 35 | +* @param {NonNegativeInteger} M - number of rows of the matrix C | 
|  | 36 | +* @param {NonNegativeInteger} N - number of columns of the matrix C | 
|  | 37 | +* @param {NonNegativeInteger} K - order of the matrix T | 
|  | 38 | +* @param {Float64Array} V - input matrix | 
|  | 39 | +* @param {integer} strideV1 - stride of the first dimension of V | 
|  | 40 | +* @param {integer} strideV2 - stride of the second dimension of V | 
|  | 41 | +* @param {integer} offsetV - index offset for V | 
|  | 42 | +* @param {Float64Array} T - input matrix | 
|  | 43 | +* @param {integer} strideT1 - stride of the first dimension of T | 
|  | 44 | +* @param {integer} strideT2 - stride of the second dimension of T | 
|  | 45 | +* @param {integer} offsetT - index offset for T | 
|  | 46 | +* @param {Float64Array} C - input matrix | 
|  | 47 | +* @param {integer} strideC1 - stride of the first dimension of C | 
|  | 48 | +* @param {integer} strideC2 - stride of the second dimension of C | 
|  | 49 | +* @param {integer} offsetC - index offset for C | 
|  | 50 | +* @param {Float64Array} work - work array | 
|  | 51 | +* @param {integer} strideWork1 - stride of the first dimension of work | 
|  | 52 | +* @param {integer} strideWork2 - stride of the second dimension of work | 
|  | 53 | +* @param {integer} offsetWork - index offset for work | 
|  | 54 | +* @param {string} trans - specifies whether to apply H or H^T | 
|  | 55 | +* @returns {Float64Array} updated matrix C | 
|  | 56 | +* | 
|  | 57 | +* @example | 
|  | 58 | +* var Float64Array = require( '@stdlib/array/float64' ); | 
|  | 59 | +* | 
|  | 60 | +* var V = new Float64Array( [ 10.0, 40.0, 70.0, 20.0, 50.0, 80.0, 30.0, 60.0, 90.0 ] ); | 
|  | 61 | +* var T = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0 ] ); | 
|  | 62 | +* var C = new Float64Array( [ 11.0, 12.0, 13.0, 21.0, 22.0, 23.0, 31.0, 32.0, 33.0 ] ); | 
|  | 63 | +* var work = new Float64Array( 9 ); | 
|  | 64 | +* | 
|  | 65 | +* var result = leftBackwardRows( 3, 3, 3, V, 3, 1, 0, T, 3, 1, 0, C, 3, 1, 0, work, 3, 1, 0, 'no-transpose' ); | 
|  | 66 | +* // returns <Float64Array>[ -155530.0, -164560.0, -173590.0, -292241.0, -308662.0, -325083.0, -4832.0, -5104.0, -5376.0 ] | 
|  | 67 | +*/ | 
|  | 68 | +function leftBackwardRows( M, N, K, V, strideV1, strideV2, offsetV, T, strideT1, strideT2, offsetT, C, strideC1, strideC2, offsetC, work, strideWork1, strideWork2, offsetWork, trans ) { // eslint-disable-line max-params, max-len | 
|  | 69 | +	var transt; | 
|  | 70 | +	var da0; | 
|  | 71 | +	var da1; | 
|  | 72 | +	var db0; | 
|  | 73 | +	var db1; | 
|  | 74 | +	var i0; | 
|  | 75 | +	var i1; | 
|  | 76 | +	var sh; | 
|  | 77 | +	var sa; | 
|  | 78 | +	var sb; | 
|  | 79 | +	var S0; | 
|  | 80 | +	var S1; | 
|  | 81 | +	var ia; | 
|  | 82 | +	var ib; | 
|  | 83 | +	var ic; | 
|  | 84 | +	var iv; | 
|  | 85 | +	var iw; | 
|  | 86 | +	var j; | 
|  | 87 | +	var o; | 
|  | 88 | + | 
|  | 89 | +	// Quick return if possible | 
|  | 90 | +	if ( M <= 0 || N <= 0 ) { | 
|  | 91 | +		return C; | 
|  | 92 | +	} | 
|  | 93 | + | 
|  | 94 | +	if ( trans === 'no-transpose' ) { | 
|  | 95 | +		transt = 'transpose'; | 
|  | 96 | +	} else { | 
|  | 97 | +		transt = 'no-transpose'; | 
|  | 98 | +	} | 
|  | 99 | + | 
|  | 100 | +	/* Let V = ( V1 V2 )    (V2: last K columns) | 
|  | 101 | +	* Where V2 is unit lower triangular. | 
|  | 102 | +	*/ | 
|  | 103 | +	/* Form H * C or H^T * C where C = ( C1 ) | 
|  | 104 | +	*                                    ( C2 ) | 
|  | 105 | +	* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) | 
|  | 106 | +	* W := C2**T | 
|  | 107 | +	*/ | 
|  | 108 | +	ic = offsetC; | 
|  | 109 | +	iw = offsetWork; | 
|  | 110 | +	for ( j = 0; j < K; j++ ) { | 
|  | 111 | +		dcopy( N, C, strideC2, ic, work, strideWork1, iw ); | 
|  | 112 | +		ic += strideC1; | 
|  | 113 | +		iw += strideWork2; | 
|  | 114 | +	} | 
|  | 115 | +	// W := W * V2**T | 
|  | 116 | +	iv = offsetV + ( (M-K) * strideV2 ); | 
|  | 117 | +	dtrmm( 'right', 'lower', 'transpose', 'unit', N, K, 1.0, V, strideV1, strideV2, iv, work, strideWork1, strideWork2, offsetWork ); | 
|  | 118 | +	if ( M > K ) { | 
|  | 119 | +		// W := W + C1**T * V1**T | 
|  | 120 | +		dgemm( 'transpose', 'transpose', N, K, M-K, 1.0, C, strideC1, strideC2, offsetC, V, strideV1, strideV2, offsetV, 1.0, work, strideWork1, strideWork2, offsetWork ); | 
|  | 121 | +	} | 
|  | 122 | +	// W := W * T**T or W * T | 
|  | 123 | +	dtrmm( 'right', 'lower', transt, 'non-unit', N, K, 1.0, T, strideT1, strideT2, offsetT, work, strideWork1, strideWork2, offsetWork ); | 
|  | 124 | + | 
|  | 125 | +	// C := C - V**T * W**T | 
|  | 126 | +	if ( M > K ) { | 
|  | 127 | +		// C1 := C1 - V1**T * W**T | 
|  | 128 | +		dgemm( 'transpose', 'transpose', M-K, N, K, -1.0, V, strideV1, strideV2, offsetV, work, strideWork1, strideWork2, offsetWork, 1.0, C, strideC1, strideC2, offsetC ); | 
|  | 129 | +	} | 
|  | 130 | +	// W := W * V2 | 
|  | 131 | +	iv = offsetV + ( (M-K) * strideV2 ); | 
|  | 132 | +	dtrmm( 'right', 'lower', 'no-transpose', 'unit', N, K, 1.0, V, strideV1, strideV2, iv, work, strideWork1, strideWork2, offsetWork ); | 
|  | 133 | + | 
|  | 134 | +	// C2 := C2 - W**T | 
|  | 135 | +	o = loopOrder( [ K, N ], [ strideC1, strideC2 ], [ strideWork2, strideWork1 ] ); // eslint-disable-line max-len | 
|  | 136 | +	sh = o.sh; | 
|  | 137 | +	sa = o.sx; | 
|  | 138 | +	sb = o.sy; | 
|  | 139 | + | 
|  | 140 | +	// Extract loop variables for loop interchange | 
|  | 141 | +	S0 = sh[ 0 ]; | 
|  | 142 | +	S1 = sh[ 1 ]; | 
|  | 143 | +	da0 = sa[ 0 ]; | 
|  | 144 | +	da1 = sa[ 1 ] - ( S0 * sa[ 0 ] ); | 
|  | 145 | +	db0 = sb[ 0 ]; | 
|  | 146 | +	db1 = sb[ 1 ] - ( S0 * sb[ 0 ] ); | 
|  | 147 | + | 
|  | 148 | +	// Set pointers to first indexed elements | 
|  | 149 | +	ia = offsetC + ( (M-K) * strideC1 ); | 
|  | 150 | +	ib = offsetWork; | 
|  | 151 | + | 
|  | 152 | +	// Iterate over matrix dimensions with optimized loop order | 
|  | 153 | +	for ( i1 = 0; i1 < S1; i1++ ) { | 
|  | 154 | +		for ( i0 = 0; i0 < S0; i0++ ) { | 
|  | 155 | +			C[ ia ] -= work[ ib ]; | 
|  | 156 | +			ia += da0; | 
|  | 157 | +			ib += db0; | 
|  | 158 | +		} | 
|  | 159 | +		ia += da1; | 
|  | 160 | +		ib += db1; | 
|  | 161 | +	} | 
|  | 162 | + | 
|  | 163 | +	return C; | 
|  | 164 | +} | 
|  | 165 | + | 
|  | 166 | + | 
|  | 167 | +// EXPORTS // | 
|  | 168 | + | 
|  | 169 | +module.exports = leftBackwardRows; | 
0 commit comments