Skip to content

Commit 2a83ec1

Browse files
authored
Rewrite to use FMA with Householder reflectors
1 parent e1c3c34 commit 2a83ec1

File tree

2 files changed

+74
-66
lines changed

2 files changed

+74
-66
lines changed

lapack-netlib/SRC/dhgeqz.f

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
337337
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
338338
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
339339
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
340-
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
341-
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
342-
$ WR2
340+
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
341+
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
342+
$ WABS, WI, WR, WR2
343343
* ..
344344
* .. Local Arrays ..
345345
DOUBLE PRECISION V( 3 )
@@ -1127,25 +1127,27 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
11271127
H( J+2, J-1 ) = ZERO
11281128
END IF
11291129
*
1130+
T2 = TAU*V( 2 )
1131+
T3 = TAU*V( 3 )
11301132
DO 230 JC = J, ILASTM
1131-
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1132-
$ H( J+2, JC ) )
1133-
H( J, JC ) = H( J, JC ) - TEMP
1134-
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
1135-
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
1136-
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1137-
$ T( J+2, JC ) )
1138-
T( J, JC ) = T( J, JC ) - TEMP2
1139-
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
1140-
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
1133+
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1134+
$ H( J+2, JC )
1135+
H( J, JC ) = H( J, JC ) - TEMP*TAU
1136+
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
1137+
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
1138+
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1139+
$ T( J+2, JC )
1140+
T( J, JC ) = T( J, JC ) - TEMP2*TAU
1141+
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
1142+
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
11411143
230 CONTINUE
11421144
IF( ILQ ) THEN
11431145
DO 240 JR = 1, N
1144-
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1145-
$ Q( JR, J+2 ) )
1146-
Q( JR, J ) = Q( JR, J ) - TEMP
1147-
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
1148-
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
1146+
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1147+
$ Q( JR, J+2 )
1148+
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
1149+
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
1150+
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
11491151
240 CONTINUE
11501152
END IF
11511153
*
@@ -1233,27 +1235,29 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
12331235
*
12341236
* Apply transformations from the right.
12351237
*
1238+
T2 = TAU*V(2)
1239+
T3 = TAU*V(3)
12361240
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
1237-
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1238-
$ H( JR, J+2 ) )
1239-
H( JR, J ) = H( JR, J ) - TEMP
1240-
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
1241-
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
1241+
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1242+
$ H( JR, J+2 )
1243+
H( JR, J ) = H( JR, J ) - TEMP*TAU
1244+
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
1245+
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
12421246
260 CONTINUE
12431247
DO 270 JR = IFRSTM, J + 2
1244-
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1245-
$ T( JR, J+2 ) )
1246-
T( JR, J ) = T( JR, J ) - TEMP
1247-
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
1248-
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
1248+
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1249+
$ T( JR, J+2 )
1250+
T( JR, J ) = T( JR, J ) - TEMP*TAU
1251+
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
1252+
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
12491253
270 CONTINUE
12501254
IF( ILZ ) THEN
12511255
DO 280 JR = 1, N
1252-
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1253-
$ Z( JR, J+2 ) )
1254-
Z( JR, J ) = Z( JR, J ) - TEMP
1255-
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
1256-
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
1256+
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1257+
$ Z( JR, J+2 )
1258+
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
1259+
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
1260+
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
12571261
280 CONTINUE
12581262
END IF
12591263
T( J+1, J ) = ZERO

lapack-netlib/SRC/shgeqz.f

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
337337
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
338338
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
339339
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
340-
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
341-
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
342-
$ WR2
340+
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
341+
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
342+
$ WABS, WI, WR, WR2
343343
* ..
344344
* .. Local Arrays ..
345345
REAL V( 3 )
@@ -1127,25 +1127,27 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
11271127
H( J+2, J-1 ) = ZERO
11281128
END IF
11291129
*
1130+
T2 = TAU * V( 2 )
1131+
T3 = TAU * V( 3 )
11301132
DO 230 JC = J, ILASTM
1131-
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1132-
$ H( J+2, JC ) )
1133-
H( J, JC ) = H( J, JC ) - TEMP
1134-
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
1135-
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
1136-
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1137-
$ T( J+2, JC ) )
1138-
T( J, JC ) = T( J, JC ) - TEMP2
1139-
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
1140-
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
1133+
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1134+
$ H( J+2, JC )
1135+
H( J, JC ) = H( J, JC ) - TEMP*TAU
1136+
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
1137+
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
1138+
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1139+
$ T( J+2, JC )
1140+
T( J, JC ) = T( J, JC ) - TEMP2*TAU
1141+
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
1142+
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
11411143
230 CONTINUE
11421144
IF( ILQ ) THEN
11431145
DO 240 JR = 1, N
1144-
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1145-
$ Q( JR, J+2 ) )
1146-
Q( JR, J ) = Q( JR, J ) - TEMP
1147-
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
1148-
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
1146+
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1147+
$ Q( JR, J+2 )
1148+
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
1149+
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
1150+
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
11491151
240 CONTINUE
11501152
END IF
11511153
*
@@ -1233,27 +1235,29 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
12331235
*
12341236
* Apply transformations from the right.
12351237
*
1238+
T2 = TAU*V( 2 )
1239+
T3 = TAU*V( 3 )
12361240
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
1237-
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1238-
$ H( JR, J+2 ) )
1239-
H( JR, J ) = H( JR, J ) - TEMP
1240-
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
1241-
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
1241+
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1242+
$ H( JR, J+2 )
1243+
H( JR, J ) = H( JR, J ) - TEMP*TAU
1244+
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
1245+
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
12421246
260 CONTINUE
12431247
DO 270 JR = IFRSTM, J + 2
1244-
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1245-
$ T( JR, J+2 ) )
1246-
T( JR, J ) = T( JR, J ) - TEMP
1247-
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
1248-
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
1248+
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1249+
$ T( JR, J+2 )
1250+
T( JR, J ) = T( JR, J ) - TEMP*TAU
1251+
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
1252+
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
12491253
270 CONTINUE
12501254
IF( ILZ ) THEN
12511255
DO 280 JR = 1, N
1252-
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1253-
$ Z( JR, J+2 ) )
1254-
Z( JR, J ) = Z( JR, J ) - TEMP
1255-
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
1256-
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
1256+
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1257+
$ Z( JR, J+2 )
1258+
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
1259+
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
1260+
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
12571261
280 CONTINUE
12581262
END IF
12591263
T( J+1, J ) = ZERO

0 commit comments

Comments
 (0)