Skip to content

Commit c0b1856

Browse files
author
Johnathan Rhyne
committed
single precision passes local tests
1 parent 33825bc commit c0b1856

File tree

19 files changed

+3580
-278
lines changed

19 files changed

+3580
-278
lines changed

SRC/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ set(SLASRC
116116
slauu2.f slauum.f sopgtr.f sopmtr.f sorg2l.f sorg2r.f
117117
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
118118
sorgrq.f sorgtr.f sorgtsqr.f sorgtsqr_row.f sorm2l.f sorm2r.f sorm22.f
119+
sorgkr.f sorgrk.f sorgkl.f sorglk.f
120+
slumm.f strtrm.f strmmoop.f
119121
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
120122
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
121123
spbstf.f spbsv.f spbsvx.f

SRC/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ SLASRC = \
145145
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
146146
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
147147
sorgrq.o sorgtr.o sorgtsqr.o sorgtsqr_row.o sorm2l.o sorm2r.o sorm22.o \
148+
sorgkr.o sorgrk.o sorgkl.o sorglk.o \
149+
slumm.o strtrm.o strmmoop.o \
148150
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
149151
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
150152
spbstf.o spbsv.o spbsvx.o \

SRC/dorglq.f

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -190,30 +190,8 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
190190
END IF
191191
*
192192
NBMIN = 2
193-
NX = 0
193+
NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) )
194194
IWS = M
195-
IF( NB.GT.1 .AND. NB.LT.K ) THEN
196-
*
197-
* Determine when to cross over from blocked to unblocked code.
198-
*
199-
NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) )
200-
IF( NX.LT.K ) THEN
201-
*
202-
* Determine if workspace is large enough for blocked code.
203-
*
204-
LDWORK = M
205-
IWS = LDWORK*NB
206-
IF( LWORK.LT.IWS ) THEN
207-
*
208-
* Not enough workspace to use optimal NB: reduce NB and
209-
* determine the minimum value of NB.
210-
*
211-
NB = LWORK / LDWORK
212-
NBMIN = MAX( 2, ILAENV( 2, 'DORGLQ', ' ', M, N, K,
213-
$ -1 ) )
214-
END IF
215-
END IF
216-
END IF
217195
*
218196
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
219197
*

SRC/dorgrq.f

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -207,28 +207,12 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
207207
* Determine when to cross over from blocked to unblocked code.
208208
*
209209
NX = MAX( 0, ILAENV( 3, 'DORGRQ', ' ', M, N, K, -1 ) )
210-
IF( NX.LT.K ) THEN
211-
*
212-
* Determine if workspace is large enough for blocked code.
213-
*
214-
LDWORK = M
215-
IWS = LDWORK*NB
216-
IF( LWORK.LT.IWS ) THEN
217-
*
218-
* Not enough workspace to use optimal NB: reduce NB and
219-
* determine the minimum value of NB.
220-
*
221-
NB = LWORK / LDWORK
222-
NBMIN = MAX( 2, ILAENV( 2, 'DORGRQ', ' ', M, N, K,
223-
$ -1 ) )
224-
END IF
225-
END IF
226210
END IF
227211
*
228212
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
229213
*
230214
* We want to use the blocking method as long as our matrix is big enough
231-
* and it's deemed worthwhile with the extra memory allocations
215+
* and it's deemed worthwhile
232216
*
233217
KK = K
234218
ELSE
@@ -256,13 +240,13 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
256240
CALL DLARFT( 'Transpose', 'Rowwise', N-K+I+IB-1, IB,
257241
$ A( II, 1 ), LDA, TAU( I ), A( II, N-K+I ), LDA )
258242
*
259-
* Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
243+
* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
260244
*
261245
CALL DLARFB0C2(.TRUE., 'Right', 'No Transpose', 'Backward',
262246
$ 'Rowwise', II-1, N-K+I+IB-1, IB, A(II,1), LDA,
263247
$ A( II, N-K+I ), LDA, A, LDA)
264248
*
265-
* Apply H**T to columns 1:n-k+i+ib-1 of current block
249+
* Apply H to columns 1:n-k+i+ib-1 of current block
266250
*
267251
CALL DORGRK( IB, N-K+I+IB-1, A( II, 1 ), LDA )
268252

SRC/dtrmmoop.f

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ RECURSIVE SUBROUTINE DTRMMOOP(SIDE, UPLO, TRANSA, TRANSB,
215215
EXTERNAL LSAME, DDOT
216216
* ..
217217
* .. External Subroutines ..
218-
EXTERNAL DGEMM
218+
EXTERNAL DGEMM, DAXPY, DLASET, DSCAL
219219
* ..
220220
* .. Intrinsic Functions ..
221221
INTRINSIC MIN

SRC/lapack_64.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -911,12 +911,12 @@
911911
#define DORGKL DORGKL_64
912912
#define DORGKR DORGKR_64
913913
#define DORGRK DORGRK_64
914-
#define DLUMM DLUMM_64
915-
#define DTRTRM DTRTRM_64
916-
#define DTRMMOOP DTRMMOOP_64
917914
#define DORGTR DORGTR_64
918915
#define DORGTSQR DORGTSQR_64
919916
#define DORGTSQR_ROW DORGTSQR_ROW_64
917+
#define DLUMM DLUMM_64
918+
#define DTRTRM DTRTRM_64
919+
#define DTRMMOOP DTRMMOOP_64
920920
#define DORHR_COL DORHR_COL_64
921921
#define DORM22 DORM22_64
922922
#define DORM2L DORM2L_64
@@ -1509,9 +1509,16 @@
15091509
#define SORGQR SORGQR_64
15101510
#define SORGR2 SORGR2_64
15111511
#define SORGRQ SORGRQ_64
1512+
#define SORGLK SORGLK_64
1513+
#define SORGKL SORGKL_64
1514+
#define SORGKR SORGKR_64
1515+
#define SORGRK SORGRK_64
15121516
#define SORGTR SORGTR_64
15131517
#define SORGTSQR SORGTSQR_64
15141518
#define SORGTSQR_ROW SORGTSQR_ROW_64
1519+
#define SLUMM SLUMM_64
1520+
#define STRTRM STRTRM_64
1521+
#define STRMMOOP STRMMOOP_64
15151522
#define SORHR_COL SORHR_COL_64
15161523
#define SORM22 SORM22_64
15171524
#define SORM2L SORM2L_64

SRC/slarfb0c2.f

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -186,20 +186,19 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
186186
! and thus don't reference whatever is present in C2
187187
! at the beginning.
188188
LOGICAL C2I
189-
190189
! Array arguments
191190
REAL V(LDV,*), C(LDC,*), T(LDT,*)
192191
! Local scalars
193-
LOGICAL QR, LQ, QL, DIRF, COLV, SIDEL, SIDER,
192+
LOGICAL QR, LQ, QL, RQ, DIRF, COLV, SIDEL, SIDER,
194193
$ TRANST
195194
INTEGER I, J
196195
! External functions
197196
LOGICAL LSAME
198197
EXTERNAL LSAME
199-
! External Subroutines
198+
! External subroutines
200199
EXTERNAL SGEMM, STRMM, XERBLA
201200
! Parameters
202-
REAL ONE, ZERO, NEG_ONE
201+
REAL ONE, ZERO, NEG_ONE
203202
PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE = -1.0E+0)
204203
205204
! Beginning of executable statements
@@ -225,10 +224,7 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
225224
226225
! RQ is when we store the reflectors row by row and have the
227226
! 'first' reflector stored in the last row
228-
! RQ = (.NOT.DIRF).AND.(.NOT.COLV)
229-
! Since we have exactly one of these 4 modes, we don't need to actually
230-
! store the value of RQ, instead we assume this is the case if we fail
231-
! the above 3 checks.
227+
RQ = (.NOT.DIRF).AND.(.NOT.COLV)
232228
233229
IF (QR) THEN
234230
! We are computing C = HC = (I - VTV')C
@@ -313,7 +309,7 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
313309
CALL STRMM('Left', 'Lower', 'No Transpose', 'Unit',
314310
$ K, N, NEG_ONE, V, LDV, C, LDC)
315311
ELSE IF (LQ) THEN
316-
! We are computing C = CH' = C(I-V'T'V)
312+
! We are computing C = C op(H) = C(I-V' op(T) V)
317313
! Where: V = [ V1 V2 ] and C = [ C1 C2 ]
318314
! with the following dimensions:
319315
! V1\in\R^{K\times K}
@@ -325,20 +321,20 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
325321
! without having to allocate anything extra.
326322
! This lets us simplify our above equation to get
327323
!
328-
! C = CH' = [ 0, C2 ](I - [ V1' ]T'[ V1, V2 ])
329-
! [ V2' ]
324+
! C = C op(H) = [ 0, C2 ](I - [ V1' ]op(T)[ V1, V2 ])
325+
! [ V2' ]
330326
!
331-
! = [ 0, C2 ] - [ 0, C2 ][ V1' ]T'[ V1, V2 ]
327+
! = [ 0, C2 ] - [ 0, C2 ][ V1' ]op(T)[ V1, V2 ]
332328
! [ V2' ]
333329
!
334-
! = [ 0, C2 ] - C2*V2'*T'[ V1, V2 ]
330+
! = [ 0, C2 ] - C2*V2'*op(T)[ V1, V2 ]
335331
!
336-
! = [ -C2*V2'*T'*V1, C2 - C2*V2'*T'*V2 ]
332+
! = [ -C2*V2'*op(T)*V1, C2 - C2*V2'*op(T)*V2 ]
337333
!
338334
! So, we can order our computations as follows:
339335
!
340336
! C1 = C2*V2'
341-
! C1 = C1*T'
337+
! C1 = C1*op(T)
342338
! C2 = C2 - C1*V2
343339
! C1 = -C1*V1
344340
!
@@ -349,9 +345,6 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
349345
IF( .NOT.SIDER ) THEN
350346
CALL XERBLA('SLARFB0C2', 2)
351347
RETURN
352-
ELSE IF(.NOT.TRANST) THEN
353-
CALL XERBLA('SLARFB0C2', 3)
354-
RETURN
355348
END IF
356349
!
357350
! C1 = C2*V2'
@@ -370,8 +363,13 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
370363
!
371364
! C1 = C1*T'
372365
!
373-
CALL STRMM('Right', 'Upper', 'Transpose', 'Non-unit',
374-
$ M, K, ONE, T, LDT, C, LDC)
366+
IF (TRANST) THEN
367+
CALL STRMM('Right', 'Upper', 'Transpose',
368+
$ 'Non-unit', M, K, ONE, T, LDT, C, LDC)
369+
ELSE
370+
CALL STRMM('Right', 'Lower', 'No Transpose',
371+
$ 'Non-unit', M, K, ONE, T, LDT, C, LDC)
372+
END IF
375373
!
376374
! C2 = C2 - C1*V2 = -C1*V2 + C2
377375
!
@@ -472,8 +470,8 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
472470
!
473471
CALL STRMM('Left', 'Upper', 'No Transpose', 'Unit',
474472
$ K, N, NEG_ONE, V(M-K+1,1), LDV, C(M-K+1,1), LDC)
475-
ELSE ! IF (RQ) THEN
476-
! We are computing C = CH' = C(I-V'T'V)
473+
ELSE IF (RQ) THEN
474+
! We are computing C = C op(H) = C(I-V' op(T) V)
477475
! Where: V = [ V2 V1] and C = [ C2 C1 ]
478476
! with the following dimensions:
479477
! V1\in\R^{K\times K}
@@ -485,36 +483,33 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
485483
! without having to allocate anything extra.
486484
! This lets us simplify our above equation to get
487485
!
488-
! C = CH' = [ C2, 0 ] (I - [ V2' ]T'[ V2, V1 ]
489-
! [ V1' ]
486+
! C = C op(H) = [ C2, 0 ] (I - [ V2' ]op(T)[ V2, V1 ]
487+
! [ V1' ]
490488
!
491-
! = [ C2, 0 ] - [ C2, 0 ] [ V2' ]T'[ V2, V1 ]
489+
! = [ C2, 0 ] - [ C2, 0 ] [ V2' ]op(T)[ V2, V1 ]
492490
! [ V1' ]
493491
!
494-
! = [ C2, 0 ] - C2*V2'*T'[ V2, V1 ]
492+
! = [ C2, 0 ] - C2*V2'*op(T)[ V2, V1 ]
495493
!
496-
! = [ C2, 0 ] - [ C2*V2'*T'*V2, C2*V2'*T'*V1 ]
494+
! = [ C2, 0 ] - [ C2*V2'*op(T)*V2, C2*V2'*op(T)*V1 ]
497495
!
498-
! = [ C2 - C2*V2'*T'*V2, -C2*V2'*T'*V1 ]
496+
! = [ C2 - C2*V2'*op(T)*V2, -C2*V2'*op(T)*V1 ]
499497
!
500498
! So, we can order our computations as follows:
501499
!
502500
! C1 = C2*V2'
503-
! C1 = C1*T'
501+
! C1 = C1*op(T)
504502
! C2 = C2 - C1*V2
505503
! C1 = -C1*V1
506504
!
507505
!
508506
! To achieve the same end result
509507
!
510-
! Check to ensure side and trans are the expected values
508+
! Check to ensure side has the expected value
511509
!
512510
IF( .NOT.SIDER ) THEN
513511
CALL XERBLA('SLARFB0C2', 2)
514512
RETURN
515-
ELSE IF(.NOT.TRANST) THEN
516-
CALL XERBLA('SLARFB0C2', 3)
517-
RETURN
518513
END IF
519514
!
520515
! C1 = C2*V2'
@@ -530,10 +525,15 @@ SUBROUTINE SLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
530525
$ ONE, C, LDC, V, LDV, ZERO, C(1, N-K+1), LDC)
531526
END IF
532527
!
533-
! C1 = C1*T'
528+
! C1 = C1*op(T)
534529
!
535-
CALL STRMM('Right', 'Lower', 'Transpose', 'Non-unit',
536-
$ M, K, ONE, T, LDT, C(1, N-K+1), LDC)
530+
IF( TRANST ) THEN
531+
CALL STRMM('Right', 'Lower', 'Transpose',
532+
$ 'Non-unit', M, K, ONE, T, LDT, C(1, N-K+1), LDC)
533+
ELSE
534+
CALL STRMM('Right', 'Upper', 'No Transpose',
535+
$ 'Non-unit', M, K, ONE, T, LDT, C(1, N-K+1), LDC)
536+
END IF
537537
!
538538
! C2 = C2 - C1*V2 = -C1*V2 + C2
539539
!

0 commit comments

Comments
 (0)