Skip to content

Commit 7014148

Browse files
authored
Merge pull request #1163 from jprhyne/dlarftTermCase
Add non-trivial terminating case to xLARFT
2 parents c08dcfa + dd07616 commit 7014148

File tree

17 files changed

+1397
-47
lines changed

17 files changed

+1397
-47
lines changed

SRC/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ set(SLASRC
108108
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqp2rk.f slaqp3rk.f slaqsb.f slaqsp.f slaqsy.f
109109
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
110110
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
111-
slarf.f slarf1f.f slarf1l.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f
111+
slarf.f slarf1f.f slarf1l.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarft_lvl2.f slarfx.f slarfy.f
112112
slargv.f slarmm.f slarrv.f slartv.f
113113
slarz.f slarzb.f slarzt.f slasy2.f
114114
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
@@ -220,7 +220,7 @@ set(CLASRC
220220
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqp2rk.f claqp3rk.f claqsb.f
221221
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
222222
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
223-
clarf.f clarf1f.f clarf1l.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
223+
clarf.f clarf1f.f clarf1l.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f clarft_lvl2.f
224224
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f
225225
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f90
226226
claswp.f clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
@@ -309,7 +309,7 @@ set(DLASRC
309309
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqp2rk.f dlaqp3rk.f dlaqsb.f dlaqsp.f dlaqsy.f
310310
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
311311
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
312-
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f dlarf1f.f dlarf1l.f
312+
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarft_lvl2.f dlarfx.f dlarfy.f dlarf1f.f dlarf1l.f
313313
dlargv.f dlarmm.f dlarrv.f dlartv.f
314314
dlarz.f dlarzb.f dlarzt.f dlaswp.f dlasy2.f
315315
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
@@ -421,7 +421,7 @@ set(ZLASRC
421421
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
422422
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
423423
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f zlarf1f.f zlarf1l.f
424-
zlarfg.f zlarfgp.f zlarft.f
424+
zlarfg.f zlarfgp.f zlarft.f zlarft_lvl2.f
425425
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f
426426
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
427427
zlassq.f90 zlaswp.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f

SRC/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ SLASRC = \
137137
slaqgb.o slaqge.o slaqp2.o slaqps.o slaqp2rk.o slaqp3rk.o slaqsb.o slaqsp.o slaqsy.o \
138138
slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
139139
slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
140-
slarf.o slarf1f.o slarf1l.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o \
140+
slarf.o slarf1f.o slarf1l.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarft_lvl2.o slarfx.o slarfy.o \
141141
slargv.o slarmm.o slarrv.o slartv.o \
142142
slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \
143143
slasyf_rk.o \
@@ -249,7 +249,7 @@ CLASRC = \
249249
claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqp2rk.o claqp3rk.o claqsb.o \
250250
claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
251251
claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
252-
clarf.o clarf1f.o clarf1l.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \
252+
clarf.o clarf1f.o clarf1l.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarft_lvl2.o clarfgp.o \
253253
clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
254254
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
255255
claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \
@@ -339,7 +339,7 @@ DLASRC = \
339339
dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqp2rk.o dlaqp3rk.o dlaqsb.o dlaqsp.o dlaqsy.o \
340340
dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
341341
dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
342-
dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o dlarf1f.o dlarf1l.o\
342+
dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarft_lvl2.o dlarfx.o dlarfy.o dlarf1f.o dlarf1l.o\
343343
dlargv.o dlarmm.o dlarrv.o dlartv.o \
344344
dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \
345345
dlasyf.o dlasyf_rook.o dlasyf_rk.o \
@@ -454,7 +454,7 @@ ZLASRC = \
454454
zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
455455
zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
456456
zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o zlarf1f.o zlarf1l.o \
457-
zlarfg.o zlarft.o zlarfgp.o \
457+
zlarfg.o zlarft.o zlarft_lvl2.o zlarfgp.o \
458458
zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
459459
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
460460
zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \

SRC/VARIANTS/Makefile

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,8 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
3030

3131
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
3232

33-
LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o
34-
35-
3633
.PHONY: all
37-
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a
34+
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a
3835

3936
cholrl.a: $(CHOLRL)
4037
$(AR) $(ARFLAGS) $@ $^
@@ -60,13 +57,9 @@ qrll.a: $(QRLL)
6057
$(AR) $(ARFLAGS) $@ $^
6158
$(RANLIB) $@
6259

63-
larftl2.a: $(LARFTL2)
64-
$(AR) $(ARFLAGS) $@ $^
65-
$(RANLIB) $@
66-
6760
.PHONY: clean cleanobj cleanlib
6861
clean: cleanobj cleanlib
6962
cleanobj:
70-
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2)
63+
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL)
7164
cleanlib:
7265
rm -f *.a

SRC/VARIANTS/larft/LL-LVL2/clarft.f

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
1+
*> \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT
22
*
33
* =========== DOCUMENTATION ===========
44
*
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> Download CLARFT + dependencies
8+
*> Download CLARFT_LVL2 + dependencies
99
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
1010
*> [TGZ]</a>
1111
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
@@ -16,7 +16,8 @@
1616
* Definition:
1717
* ===========
1818
*
19-
* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
19+
* SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
20+
* T, LDT )
2021
*
2122
* .. Scalar Arguments ..
2223
* CHARACTER DIRECT, STOREV
@@ -32,7 +33,7 @@
3233
*>
3334
*> \verbatim
3435
*>
35-
*> CLARFT forms the triangular factor T of a complex block reflector H
36+
*> CLARFT_LVL2 forms the triangular factor T of a complex block reflector H
3637
*> of order n, which is defined as a product of k elementary reflectors.
3738
*>
3839
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
@@ -322,6 +323,6 @@ SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
322323
END IF
323324
RETURN
324325
*
325-
* End of CLARFT
326+
* End of CLARFT_LVL2
326327
*
327328
END

SRC/VARIANTS/larft/LL-LVL2/dlarft.f

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
*> \brief \b DLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
1+
*> \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT.
22
*
33
* =========== DOCUMENTATION ===========
44
*
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> Download DLARFT + dependencies
8+
*> Download DLARFT_LVL2 + dependencies
99
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f">
1010
*> [TGZ]</a>
1111
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.f">
@@ -16,7 +16,8 @@
1616
* Definition:
1717
* ===========
1818
*
19-
* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
19+
* SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
20+
* T, LDT )
2021
*
2122
* .. Scalar Arguments ..
2223
* CHARACTER DIRECT, STOREV
@@ -32,7 +33,7 @@
3233
*>
3334
*> \verbatim
3435
*>
35-
*> DLARFT forms the triangular factor T of a real block reflector H
36+
*> DLARFT_LVL2 forms the triangular factor T of a real block reflector H
3637
*> of order n, which is defined as a product of k elementary reflectors.
3738
*>
3839
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
@@ -320,6 +321,6 @@ SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
320321
END IF
321322
RETURN
322323
*
323-
* End of DLARFT
324+
* End of DLARFT_LVL2
324325
*
325326
END

SRC/VARIANTS/larft/LL-LVL2/slarft.f

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
*> \brief \b SLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm.
1+
*> \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT.
22
*
33
* =========== DOCUMENTATION ===========
44
*
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> Download SLARFT + dependencies
8+
*> Download SLARFT_LVL2 + dependencies
99
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.f">
1010
*> [TGZ]</a>
1111
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarft.f">
@@ -16,7 +16,8 @@
1616
* Definition:
1717
* ===========
1818
*
19-
* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
19+
* SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
20+
* T, LDT )
2021
*
2122
* .. Scalar Arguments ..
2223
* CHARACTER DIRECT, STOREV
@@ -32,7 +33,7 @@
3233
*>
3334
*> \verbatim
3435
*>
35-
*> SLARFT forms the triangular factor T of a real block reflector H
36+
*> SLARFT_LVL2 forms the triangular factor T of a real block reflector H
3637
*> of order n, which is defined as a product of k elementary reflectors.
3738
*>
3839
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
@@ -320,6 +321,6 @@ SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
320321
END IF
321322
RETURN
322323
*
323-
* End of SLARFT
324+
* End of SLARFT_LVL2
324325
*
325326
END

SRC/VARIANTS/larft/LL-LVL2/zlarft.f

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
*> \brief \b ZLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm.
1+
*> \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT.
22
*
33
* =========== DOCUMENTATION ===========
44
*
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> Download ZLARFT + dependencies
8+
*> Download ZLARFT_LVL2 + dependencies
99
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f">
1010
*> [TGZ]</a>
1111
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f">
@@ -16,7 +16,8 @@
1616
* Definition:
1717
* ===========
1818
*
19-
* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
19+
* SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
20+
* T, LDT )
2021
*
2122
* .. Scalar Arguments ..
2223
* CHARACTER DIRECT, STOREV
@@ -32,7 +33,7 @@
3233
*>
3334
*> \verbatim
3435
*>
35-
*> ZLARFT forms the triangular factor T of a complex block reflector H
36+
*> ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H
3637
*> of order n, which is defined as a product of k elementary reflectors.
3738
*>
3839
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
@@ -321,6 +322,6 @@ SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
321322
END IF
322323
RETURN
323324
*
324-
* End of ZLARFT
325+
* End of ZLARFT_LVL2
325326
*
326327
END

SRC/clarft.f

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,12 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
178178
* .. Parameters ..
179179
*
180180
COMPLEX ONE, NEG_ONE, ZERO
181-
PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0)
181+
PARAMETER(ONE=(1.0E+0,0.0E+0), ZERO = (0.0E+0,0.0E+0),
182+
$ NEG_ONE=(-1.0E+0,0.0E+0))
182183
*
183184
* .. Local Scalars ..
184185
*
185-
INTEGER I,J,L
186+
INTEGER I,J,L,NX
186187
LOGICAL QR,LQ,QL,DIRF,COLV
187188
*
188189
* .. External Subroutines ..
@@ -192,7 +193,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
192193
* .. External Functions..
193194
*
194195
LOGICAL LSAME
195-
EXTERNAL LSAME
196+
INTEGER ILAENV
197+
EXTERNAL LSAME, ILAENV
196198
*
197199
* .. Intrinsic Functions..
198200
*
@@ -218,6 +220,14 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
218220
RETURN
219221
END IF
220222
*
223+
* Determine when to cross over into the level 2 based implementation
224+
*
225+
NX = ILAENV(3, "CLARFT", DIRECT // STOREV, N, K, -1, -1)
226+
IF(K.LT.NX) THEN
227+
CALL CLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
228+
RETURN
229+
END IF
230+
*
221231
* Beginning of executable statements
222232
*
223233
L = K / 2

0 commit comments

Comments
 (0)