@@ -130,23 +130,28 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
130130		} 
131131	} 
132132
133+ 	// Scale the column norms by `tscal` if the maximum element in `CNORM` is greater than `bignum` 
133134	imax  =  idamax (  N ,  CNORM ,  strideCNORM ,  offsetCNORM  ) ; 
134135	tmax  =  CNORM [  offsetCNORM  +  (  imax  *  strideCNORM  )  ] ; 
135136	if  (  tmax  <=  bignum  )  { 
137+ 		// All entries in `CNORM` are valid floating point numbers... 
136138		tscal  =  1.0 ; 
137139	}  else  { 
140+ 		// At least one entry in `CNORM` can't be represented as a floating point number. Find the largest off diagonal element, if this is not `Infinity` use it as `tscal`... 
138141		if  (  tmax  <=  dlamch (  'overflow'  )  )  { 
139142			tscal  =  1.0  /  (  smlnum  *  tmax  ) ; 
140143			dscal (  N ,  tscal ,  CNORM ,  strideCNORM ,  offsetCNORM  ) ; 
141144		}  else  { 
142145			tmax  =  0.0 ; 
143146			if  (  uplo  ===  'upper'  )  { 
147+ 				// Upper triangular 
144148				ia  =  offsetA  +  strideA2 ; 
145149				for  (  j  =  1 ;  j  <  N ;  j ++  )  { 
146150					tmax  =  max (  dlange (  'max' ,  j ,  1 ,  A ,  strideA1 ,  strideA2 ,  ia  ) ,  tmax  ) ; 
147151					ia  +=  strideA2 ; 
148152				} 
149153			}  else  { 
154+ 				// Lower triangular 
150155				ia  =  offsetA  +  strideA1 ; 
151156				for  (  j  =  0 ;  j  <  N  -  1 ;  j ++  )  { 
152157					tmax  =  max (  dlange (  'max' ,  N  -  (  j  +  1  ) ,  1 ,  A ,  strideA1 ,  strideA2 ,  ia  ) ,  tmax  ) ; 
@@ -162,6 +167,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
162167					if  (  CNORM [  ic  ]  <=  dlamch (  'overflow'  )  )  { 
163168						CNORM [  ic  ]  *=  tscal ; 
164169					}  else  { 
170+ 						// Recompute the 1 norm without introducing infinity in the summation 
165171						CNORM [  ic  ]  =  0.0 ; 
166172						if  (  uplo  ===  'upper'  )  { 
167173							ia1  =  0 ; 
@@ -181,16 +187,20 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
181187					ia2  +=  strideA2 ; 
182188				} 
183189			}  else  { 
190+ 				// At least one entry in `A` is not a valid floating point number, use `dtrsv` to propagate `Infinity` and `NaN` 
184191				dtrsv (  uplo ,  trans ,  diag ,  N ,  A ,  strideA1 ,  strideA2 ,  offsetA ,  X ,  strideX ,  offsetX  ) ; 
192+ 				return  scale ; 
185193			} 
186194		} 
187195	} 
188196
197+ 	// Compute the bound on the computed solution vector to see if `dtrsv` can be used. 
189198	j  =  idamax (  N ,  X ,  strideX ,  offsetX  ) ; 
190199	xmax  =  abs (  X [  offsetX  +  (  j  *  strideX  )  ]  ) ; 
191200	xbnd  =  xmax ; 
192201
193202	if  (  trans  ===  'no-transpose'  )  { 
203+ 		// Compute the growth in A * X = B 
194204		if  (  uplo  ===  'upper'  )  { 
195205			jfirst  =  N  -  1 ; 
196206			jlast  =  0 ; 
@@ -209,42 +219,59 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
209219
210220		if  (  ! GOTO  )  { 
211221			if  (  diag  ===  'non-unit'  )  { 
222+ 				/* 
223+ 					A is non-unit and triangular 
224+ 					Compute `grow` = 1 / G( j ), `xbnd` = 1 / M( j ). Initially G( 0 ) = max( X( i ), for 0 <= i < N ) 
225+ 				*/ 
212226				grow  =  1.0  /  max (  xbnd ,  smlnum  ) ; 
213227				xbnd  =  grow ; 
214228				ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ; 
215229				ia  =  offsetA  +  (  jfirst  *  (  strideA1  +  strideA2  )  ) ; 
216230				for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j  +=  jinc  )  { 
217231					if  (  grow  <=  smlnum  )  { 
232+ 						// Exit the loop if the growth factor is very small. 
218233						GOTO  =  true ; 
219234						break ; 
220235					} 
236+ 
237+ 					// M( j ) = G( j - 1 ) / abs( A( j, j ) ) 
221238					tjj  =  abs (  A [  ia  ]  ) ; 
222239					xbnd  =  min (  xbnd ,  min (  1.0 ,  tjj  )  *  grow  ) ; 
223240					if  (  tjj  +  CNORM [  ic  ]  >=  smlnum  )  { 
241+ 						// G( j ) = G( j - 1 ) * ( 1 + CNORM( j ) / abs( A( j, j ) ) ) 
224242						grow  *=  tjj  /  (  tjj  +  CNORM [  ic  ]  ) ; 
225243					}  else  { 
244+ 						// G( j ) could overflow, set grow to 0 
226245						grow  =  0.0 ; 
227246					} 
228247
229248					ia  +=  jinc  *  (  strideA1  +  strideA2  ) ; 
230249					ic  +=  jinc  *  (  strideCNORM  ) ; 
231250				} 
232251				if  (  ! GOTO  )  { 
252+ 					// Control should only reach here if loop exits normally and doesn't "break". 
233253					grow  =  xbnd ; 
234254				} 
235255			}  else  { 
256+ 				/* 
257+ 					A is unit and triangular. 
258+ 					Compute `grow` = 1 / G( j ), where G( 0 ) = max( X( i ), for 0 <= i < N ) 
259+ 				*/ 
236260				grow  =  min (  1.0 ,  1.0  /  max (  xbnd ,  smlnum  )  ) ; 
237261				ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ; 
238262				for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j +=  jinc  )  { 
239263					if  (  grow  <=  smlnum  )  { 
264+ 						// Exit the loop if the growth factor is too small. 
240265						break ; 
241266					} 
267+ 					// G( j ) = G( j - 1 ) * ( 1 + CNORM( j ) ) 
242268					grow  *=  1.0  /  (  1.0  +  CNORM [  ic  ]  ) ; 
243269					ic  +=  strideCNORM ; 
244270				} 
245271			} 
246272		} 
247273	}  else  { 
274+ 		// Compute the growth in A^T * X = B 
248275		if  (  uplo  ===  'upper'  )  { 
249276			jfirst  =  0 ; 
250277			jlast  =  N - 1 ; 
@@ -263,17 +290,27 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
263290
264291		if  (  ! GOTO  )  { 
265292			if  (  diag  ===  'non-unit'  )  { 
293+ 				/* 
294+ 					A is non-unit triangular. 
295+ 					Compute grow = 1 / G( j ) and xbnd = 1 / M( j ). Initially, M( 0 ) = max( X( i ), for 0 <= i < N ) 
296+ 				*/ 
266297				grow  =  1.0  /  max (  xbnd ,  smlnum  ) ; 
267298				xbnd  =  grow ; 
268299				ia  =  offsetA  +  (  jfirst  *  (  strideA1  +  strideA2  )  ) ;  // tracks A( j, j ) 
269300				ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ;  // tracks CNORM( j ) 
301+ 
270302				for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j += jinc  )  { 
271303					if  (  grow  <=  smlnum  )  { 
304+ 						// Exit the loop if grow is too small. 
272305						GOTO  =  true ; 
273306						break ; 
274307					} 
308+ 
309+ 					// G( j ) = max( G( j - 1 ), M( j - 1 ) * ( 1 + CNORM( j ) ) ) 
275310					xj  =  1.0  +  CNORM [  ic  ] ; 
276311					grow  =  min (  grow ,  xbnd  /  xj  ) ; 
312+ 
313+ 					// M( j ) = M( j - 1 ) * ( 1 + CNORM( j ) ) / abs( A( j, j ) ) 
277314					tjj  =  abs (  A [  ia  ]  ) ; 
278315					if  (  xj  >  tjj  )  { 
279316						xbnd  *=  (  tjj  /  xj  ) ; 
@@ -286,12 +323,19 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
286323					grow  =  min (  grow ,  xbnd  ) ; 
287324				} 
288325			}  else  { 
326+ 				/* 
327+ 					A is unit triangular 
328+ 					Compute GROW = 1 / G( j ), where G( 0 ) = max( X( i ), for 0 <= i < N ) 
329+ 				*/ 
289330				grow  =  min (  1.0 ,  1.0  /  max (  xbnd ,  smlnum  )  ) ; 
290331				ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ; 
291332				for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j  +=  jinc  )  { 
292333					if  (  grow  <=  smlnum  )  { 
334+ 						// Exit the loop if the growth factor is too small 
293335						break ; 
294336					} 
337+ 
338+ 					// G( j ) = ( 1 + CNORM( j ) ) * G( j - 1 ) 
295339					xj  =  1.0  +  CNORM [  ic  ] ; 
296340					grow  /=  xj ; 
297341
@@ -302,21 +346,27 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
302346	} 
303347
304348	if  (  grow  *  tscal  >  smlnum  )  { 
349+ 		// Use the Level 2 BLAS solve if the reciprocal of the bound on elements of X is not too small. 
305350		dtrsv (  uplo ,  trans ,  diag ,  N ,  A ,  strideA1 ,  strideA2 ,  offsetA ,  X ,  strideX ,  offsetX  ) ; 
306351	}  else  { 
352+ 		// Use a Level 1 BLAS solve, scaling intermediate results. 
353+ 
307354		if  (  xmax  >  bignum  )  { 
355+ 			// Scale X so that its components are less than or equal to bignum in absolute value. 
308356			scale  =  bignum  /  xmax ; 
309357			dscal (  N ,  scale ,  X ,  strideX ,  offsetX  ) ; 
310358			xmax  =  bignum ; 
311359		} 
312360
313361		if  (  trans  ===  'no-transpose'  )  { 
362+ 			// Solve A * X = B 
314363			ix  =  offsetX  +  (  jfirst  *  strideX  ) ;  // tracks X( j ) 
315364			ia  =  offsetA  +  (  jfirst  *  (  strideA1  +  strideA2  )  ) ;  // tracks A( j, j ) 
316365			ia1  =  offsetA  +  (  jfirst  *  strideA2  ) ;  // tracks A( 1, j ) 
317366			ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ;  // tracks CNORM( j ) 
318367
319368			for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j  +=  jinc  )  { 
369+ 				// Compute X( j ) = B( j ) / A( j, j ), scaling X if necessary. 
320370				xj  =  abs (  X [  ix  ]  ) ; 
321371				GOTO  =  false ; 
322372				if  (  diag  ===  'non-unit'  )  { 
@@ -330,7 +380,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
330380
331381				if  (  ! GOTO  )  { 
332382					tjj  =  abs (  tjjs  ) ; 
333- 					if  (  tjj  >  smlnum  )  { 
383+ 					if  (  tjj  >  smlnum  )  {   // abs( A( j, j ) ) > smlnum 
334384						if  (  tjj  <  1.0  )  { 
335385							if  (  xj  >  tjj  *  bignum  )  { 
336386								rec  =  1.0  /  xj ; 
@@ -342,9 +392,12 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
342392						X [  ix  ]  /=  tjjs ; 
343393						xj  =  abs (  X [  ix  ]  ) ; 
344394					}  else  if  (  tjj  >  0.0  )  { 
395+ 						// 0 < abs( A( j, j ) ) <= smlnum 
345396						if  (  xj  >  tjj  *  bignum  )  { 
397+ 							// Scale X by ( 1 / abs( X( j ) ) ) * abs( A( j, j ) ) * bignum to avoid overflow when dividing by A( j, j ). 
346398							rec  =  (  tjj  *  bignum  )  /  xj ; 
347399							if  (  CNORM [  ic  ]  >  1.0  )  { 
400+ 								// Scale by 1 / CNORM( j ) to avoid overflow when multiplying X( j ) times column j 
348401								rec  /=  CNORM [  ic  ] ; 
349402							} 
350403							dscal (  N ,  rec ,  X ,  strideX ,  offsetX  ) ; 
@@ -354,6 +407,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
354407						X [  ix  ]  /=  tjjs ; 
355408						xj  =  abs (  X [  ix  ]  ) ; 
356409					}  else  { 
410+ 						// A( j, j ) = 0:  Set X( 1 : N ) = 0, x( j ) = 1, and scale = 0, and compute a solution to A * X = 0 
357411						ix1  =  offsetX ; 
358412						for  (  i  =  0 ;  i  <  N ;  i ++  )  { 
359413							X [  ix1  ]  =  0.0 ; 
@@ -364,26 +418,31 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
364418					} 
365419				} 
366420
421+ 				// Scale X if necessary to avoid overflow when adding a multiple of column j of A 
367422				if  (  xj  >  1.0  )  { 
368423					rec  =  1.0  /  xj ; 
369424					if  (  CNORM [  ic  ]  >  (  bignum  -  xmax  )  *  rec  )  { 
425+ 						// Scale X by 1 / ( 2 * abs( X( j ) ) ) 
370426						rec  *=  0.5 ; 
371427						dscal (  N ,  rec ,  X ,  strideX ,  offsetX  ) ; 
372428						scale  *=  rec ; 
373429					} 
374430				}  else  if  (  xj  *  CNORM [  ic  ]  >  (  bignum  -  xmax  )  )  { 
431+ 					// Scale X by 0.5 
375432					dscal (  N ,  0.5 ,  X ,  strideX ,  offsetX  ) ; 
376433					scale  *=  0.5 ; 
377434				} 
378435
379436				if  (  uplo  ===  'upper'  )  { 
380437					if  (  j  >  0  )  { 
438+ 						// Compute the update X( 1 : j - 1 ) := X( 1 : j - 1 ) - X( j ) * A( 1 : j - 1, j ) 
381439						daxpy (  j ,  - 1  *  tscal  *  X [  ix  ] ,  A ,  strideA1 ,  ia1 ,  X ,  strideX ,  offsetX  ) ; 
382440					} 
383441					i  =  idamax (  j ,  X ,  strideX ,  offsetX  ) ; 
384442					xmax  =  abs (  X [  offsetX  +  (  i  *  strideX  )  ]  ) ; 
385443				}  else  { 
386444					if  (  j  <  N  -  1  )  { 
445+ 						// Compute the update X( j + 1 : N ) := X( j + 1 : N ) - X( j ) * A( j + 1 : N , j ) 
387446						daxpy (  N  -  (  j  +  1  ) ,  - 1  *  tscal  *  X [  ix  ] ,  A ,  strideA1 ,  ia  +  strideA1 ,  X ,  strideX ,  ix  +  strideX  ) ; 
388447						i  =  idamax (  N  -  (  j  +  1  ) ,  X ,  strideX ,  ix  +  strideX  ) ; 
389448						xmax  =  abs (  X [  offsetX  +  (  i  *  strideX  )  ]  ) ; 
@@ -396,15 +455,18 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
396455				ic  +=  jinc  *  strideCNORM ; 
397456			} 
398457		}  else  { 
458+ 			// Solve A ^ T * X = B 
399459			ix  =  offsetX  +  (  jfirst  *  strideX  ) ;  // tracks X( j ) 
400460			ia  =  offsetA  +  (  jfirst  *  (  strideA1  +  strideA2  )  ) ;  // tracks A( j, j ) 
401461			ia1  =  offsetA  +  (  jfirst  *  strideA2  ) ;  // tracks A( 1, j ) 
402462			ic  =  offsetCNORM  +  (  jfirst  *  strideCNORM  ) ;  // tracks CNORM( j ) 
403463			for  (  j  =  jfirst ;  ( jinc  >  0 )  ? j  <=  jlast  : j  >=  jlast ;  j  +=  jinc  )  { 
464+ 				// Compute X( j ) = B( j ) - sum A( k , j ) * X( k ). 
404465				xj  =  abs (  X [  ix  ]  ) ; 
405466				uscal  =  tscal ; 
406467				rec  =  1.0  /  max (  xmax ,  1.0  ) ; 
407468				if  (  CNORM [  ic  ]  >  (  bignum  -  xj  )  *  rec  )  { 
469+ 					// If X( j ) could overflow, scale X by 1 / ( 2 * xmax ). 
408470					rec  *=  0.5 ; 
409471					if  (  diag  ===  'non-unit'  )  { 
410472						tjjs  =  A [  ia  ]  *  tscal ; 
@@ -413,6 +475,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
413475					} 
414476					tjj  =  abs (  tjjs  ) ; 
415477					if  (  tjj  >  1.0  )  { 
478+ 						// Divide by A( j, j ) when scaling X if A( j, j ) > 1 
416479						rec  =  min (  1.0 ,  rec  *  tjj  ) ; 
417480						uscal  /=  tjjs ; 
418481					} 
@@ -423,14 +486,16 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
423486					} 
424487				} 
425488
426- 				sumj  =  0.0 ;   // HERE 
489+ 				sumj  =  0.0 ; 
427490				if  (  uscal  ===  1.0  )  { 
491+ 					// If the scaling needed for A in the dot product is 1, call ddot to perform the dot product 
428492					if  (  uplo  ===  'upper'  )  { 
429493						sumj  =  ddot (  j ,  A ,  strideA1 ,  ia1 ,  X ,  strideX ,  offsetX  ) ; 
430494					}  else  { 
431495						sumj  =  ddot (  N  -  (  j  +  1  ) ,  A ,  strideA1 ,  ia  +  strideA1 ,  X ,  strideX ,  ix  +  strideX  ) ; 
432496					} 
433497				}  else  { 
498+ 					// Otherwise, use in-line code for the dot product 
434499					if  (  uplo  ===  'upper'  )  { 
435500						ia2  =  0 ; 
436501						ix1  =  offsetX ; 
@@ -464,10 +529,13 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
464529					} 
465530
466531					if  (  ! GOTO  )  { 
532+ 						// Compute X( j ) = X( j ) / A( j, j ), scaling if necessary 
467533						tjj  =  abs (  tjjs  ) ; 
468534						if  (  tjj  >  smlnum  )  { 
535+ 							// abs( A( j, j ) ) > smlnum 
469536							if  (  tjj  <  1.0  )  { 
470537								if  (  xj  >  tjj  *  bignum  )  { 
538+ 									// Scale X by 1 / abs( X( j ) ) 
471539									rec  =  1.0  /  xj ; 
472540									dscal (  N ,  rec ,  X ,  strideX ,  offsetX  ) ; 
473541									scale  *=  rec ; 
@@ -476,14 +544,17 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
476544							} 
477545							X [  ix  ]  /=  tjjs ; 
478546						}  else  if  (  tjj  >  0.0  )  { 
547+ 							// 0 < abs( A( j, j ) ) <= smlnum 
479548							if  (  xj  >  tjj  *  bignum  )  { 
549+ 								// Scale X by ( 1 / abs( X( j ) ) ) * abs( A( j, j ) ) * bignum 
480550								rec  =  tjj  *  bignum  /  xj ; 
481551								dscal (  N ,  rec ,  X ,  strideX ,  offsetX  ) ; 
482552								xmax  *=  rec ; 
483553								scale  *=  rec ; 
484554							} 
485555							X [  ix  ]  /=  tjjs ; 
486556						}  else  { 
557+ 							// A( j, j ) = 0: Set X( 1 : N ) = 0, X( j ) = 1, and scale = 0, and compute a solution to A^T * X = 0 
487558							ix1  =  offsetX ; 
488559							for  (  i  =  0 ;  i  <  N ;  i ++  )  { 
489560								X [  ix1  ]  =  0.0 ; 
@@ -495,6 +566,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
495566						} 
496567					} 
497568				}  else  { 
569+ 					// Compute X( j ) := X( j ) / A( j, j ) - sumj if the dot product has already been divided by 1 / A( j, j ) 
498570					X [  ix  ]  =  (  X [  ix  ]  /  tjjs  )  -  sumj ; 
499571				} 
500572
@@ -509,6 +581,7 @@ function dlatrs( uplo, trans, diag, normin, N, A, strideA1, strideA2, offsetA, X
509581		scale  /=  tscal ; 
510582	} 
511583
584+ 	// Scale the column norms by 1 / tscal for return 
512585	if  (  tscal  !==  1.0  )  { 
513586		dscal (  N ,  1.0  /  tscal ,  CNORM ,  strideCNORM ,  offsetCNORM  ) ; 
514587	} 
0 commit comments