Skip to content

Commit 0d1f30a

Browse files
authored
Merge pull request #81 from xianyi/develop
rebase
2 parents 9d1ea75 + 70a254d commit 0d1f30a

File tree

7 files changed

+127
-87
lines changed

7 files changed

+127
-87
lines changed

Makefile.power

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,32 @@ endif
1717
ifeq ($(CORE), POWER9)
1818
ifneq ($(C_COMPILER), PGI)
1919
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
20+
ifeq ($(C_COMPILER), GCC)
2021
ifneq ($(GCCVERSIONGT4), 1)
2122
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
2223
CCOMMON_OPT += -mcpu=power8 -mtune=power8
2324
else
2425
CCOMMON_OPT += -mcpu=power9 -mtune=power9
2526
endif
2627
else
28+
CCOMMON_OPT += -mcpu=power9 -mtune=power9
29+
endif
30+
else
2731
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
2832
endif
2933
ifneq ($(F_COMPILER), PGI)
3034
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
35+
ifeq ($(C_COMPILER), GCC)
3136
ifneq ($(GCCVERSIONGT4), 1)
3237
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
3338
FCOMMON_OPT += -mcpu=power8 -mtune=power8
3439
else
3540
FCOMMON_OPT += -mcpu=power9 -mtune=power9
3641
endif
3742
else
43+
FCOMMON_OPT += -mcpu=power9 -mtune=power9
44+
endif
45+
else
3846
FCOMMON_OPT += -O2 -Mrecursive
3947
endif
4048
endif

driver/level3/level3_syrk_threaded.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
526526
BLASLONG width, i, j, k;
527527
BLASLONG n, n_from, n_to;
528528
int mode, mask;
529-
double dnum;
529+
double dnum, di, dinum;
530530

531531
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
532532
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
@@ -601,9 +601,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
601601

602602
if (nthreads - num_cpu > 1) {
603603

604-
double di = (double)i;
604+
di = (double)i;
605605

606-
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
606+
dinum = di * di + dnum;
607+
608+
if (dinum > 0)
609+
width = (((BLASLONG)((sqrt(dinum) - di) + mask)/(mask+1)) * (mask+1) );
610+
else
611+
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1) );
607612

608613
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
609614

@@ -643,10 +648,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
643648

644649
if (nthreads - num_cpu > 1) {
645650

646-
double di = (double)i;
651+
di = (double)i;
647652

648-
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
653+
dinum = di * di +dnum;
649654

655+
if (dinum > 0)
656+
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
657+
else
658+
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1));
659+
650660
if ((width > n - i) || (width < mask)) width = n - i;
651661

652662
} else {

lapack-netlib/LAPACKE/include/lapack.h

Lines changed: 93 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2513,7 +2513,7 @@ void LAPACK_zgesvdq(
25132513
lapack_complex_double* U, lapack_int const* ldu,
25142514
lapack_complex_double* V, lapack_int const* ldv, lapack_int* numrank,
25152515
lapack_int* iwork, lapack_int const* liwork,
2516-
lapack_complex_float* cwork, lapack_int* lcwork,
2516+
lapack_complex_double* cwork, lapack_int* lcwork,
25172517
double* rwork, lapack_int const* lrwork,
25182518
lapack_int* info );
25192519

@@ -3650,45 +3650,58 @@ void LAPACK_zggrqf(
36503650
lapack_int* info );
36513651

36523652
#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD)
3653-
lapack_int LAPACK_sggsvd( char const* jobu, char const* jobv, char const* jobq,
3654-
lapack_int* m, lapack_int* n, lapack_int* p,
3655-
lapack_int* k, lapack_int* l, float* a,
3656-
lapack_int* lda, float* b, lapack_int* ldb,
3657-
float* alpha, float* beta, float* u, lapack_int* ldu,
3658-
float* v, lapack_int* ldv, float* q, lapack_int* ldq,
3659-
float* work, lapack_int* iwork, lapack_int* info );
3653+
lapack_int LAPACK_sggsvd(
3654+
char const* jobu, char const* jobv, char const* jobq,
3655+
lapack_int const* m, lapack_int const* n, lapack_int const* p,
3656+
lapack_int* k, lapack_int* l,
3657+
float* a, lapack_int const* lda,
3658+
float* b, lapack_int const* ldb,
3659+
float* alpha, float* beta,
3660+
float* u, lapack_int const* ldu,
3661+
float* v, lapack_int const* ldv,
3662+
float* q, lapack_int const* ldq,
3663+
float* work, lapack_int* iwork, lapack_int* info );
36603664

36613665
#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD)
3662-
lapack_int LAPACK_dggsvd( char const* jobu, char const* jobv, char const* jobq,
3663-
lapack_int* m, lapack_int* n, lapack_int* p,
3664-
lapack_int* k, lapack_int* l, double* a,
3665-
lapack_int* lda, double* b, lapack_int* ldb,
3666-
double* alpha, double* beta, double* u,
3667-
lapack_int* ldu, double* v, lapack_int* ldv, double* q,
3668-
lapack_int* ldq, float* work, lapack_int* iwork, lapack_int* info );
3666+
lapack_int LAPACK_dggsvd(
3667+
char const* jobu, char const* jobv, char const* jobq,
3668+
lapack_int const* m, lapack_int const* n, lapack_int const* p,
3669+
lapack_int* k, lapack_int* l,
3670+
double* a, lapack_int const* lda,
3671+
double* b, lapack_int const* ldb,
3672+
double* alpha, double* beta,
3673+
double* u, lapack_int const* ldu,
3674+
double* v, lapack_int const* ldv,
3675+
double* q, lapack_int const* ldq,
3676+
double* work, lapack_int* iwork, lapack_int* info );
36693677

36703678
#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD)
3671-
lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq,
3672-
lapack_int* m, lapack_int* n, lapack_int* p,
3673-
lapack_int* k, lapack_int* l,
3674-
lapack_complex_float* a, lapack_int* lda,
3675-
lapack_complex_float* b, lapack_int* ldb,
3676-
float* alpha, float* beta, lapack_complex_float* u,
3677-
lapack_int* ldu, lapack_complex_float* v,
3678-
lapack_int* ldv, lapack_complex_float* q,
3679-
lapack_int* ldq, float* work, lapack_int* rwork, lapack_int* iwork, lapack_int *info );
3679+
lapack_int LAPACK_cggsvd(
3680+
char const* jobu, char const* jobv, char const* jobq,
3681+
lapack_int const* m, lapack_int const* n, lapack_int const* p,
3682+
lapack_int* k, lapack_int* l,
3683+
lapack_complex_float* a, lapack_int const* lda,
3684+
lapack_complex_float* b, lapack_int const* ldb,
3685+
float* alpha, float* beta,
3686+
lapack_complex_float* u, lapack_int const* ldu,
3687+
lapack_complex_float* v, lapack_int const* ldv,
3688+
lapack_complex_float* q, lapack_int const* ldq,
3689+
lapack_complex_float* work, float* rwork,
3690+
lapack_int* iwork, lapack_int* info );
36803691

36813692
#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD)
3682-
lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq,
3683-
lapack_int* m, lapack_int* n, lapack_int* p,
3684-
lapack_int* k, lapack_int* l,
3685-
lapack_complex_double* a, lapack_int* lda,
3686-
lapack_complex_double* b, lapack_int* ldb,
3687-
double* alpha, double* beta,
3688-
lapack_complex_double* u, lapack_int* ldu,
3689-
lapack_complex_double* v, lapack_int* ldv,
3690-
lapack_complex_double* q, lapack_int* ldq,
3691-
float* work, lapack_int* rwork, lapack_int* iwork, lapack_int* info );
3693+
lapack_int LAPACK_zggsvd(
3694+
char const* jobu, char const* jobv, char const* jobq,
3695+
lapack_int const* m, lapack_int const* n, lapack_int const* p,
3696+
lapack_int* k, lapack_int* l,
3697+
lapack_complex_double* a, lapack_int const* lda,
3698+
lapack_complex_double* b, lapack_int const* ldb,
3699+
double* alpha, double* beta,
3700+
lapack_complex_double* u, lapack_int const* ldu,
3701+
lapack_complex_double* v, lapack_int const* ldv,
3702+
lapack_complex_double* q, lapack_int const* ldq,
3703+
lapack_complex_double* work, double* rwork,
3704+
lapack_int* iwork, lapack_int* info );
36923705

36933706
#define LAPACK_cggsvd3 LAPACK_GLOBAL(cggsvd3,CGGSVD3)
36943707
void LAPACK_cggsvd3(
@@ -3753,49 +3766,58 @@ void LAPACK_zggsvd3(
37533766
lapack_int* info );
37543767

37553768
#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP)
3756-
lapack_int LAPACK_sggsvp( char const* jobu, char const* jobv, char const* jobq,
3757-
lapack_int* m, lapack_int* p, lapack_int* n, float* a,
3758-
lapack_int* lda, float* b, lapack_int* ldb, float* tola,
3759-
float* tolb, lapack_int* k, lapack_int* l, float* u,
3760-
lapack_int* ldu, float* v, lapack_int* ldv, float* q,
3761-
lapack_int* ldq, lapack_int* iwork, float* tau,
3762-
float* work, lapack_int* info);
3769+
lapack_int LAPACK_sggsvp(
3770+
char const* jobu, char const* jobv, char const* jobq,
3771+
lapack_int const* m, lapack_int const* p, lapack_int const* n,
3772+
float* a, lapack_int const* lda,
3773+
float* b, lapack_int const* ldb,
3774+
float* tola, float* tolb,
3775+
lapack_int* k, lapack_int* l,
3776+
float* u, lapack_int const* ldu,
3777+
float* v, lapack_int const* ldv,
3778+
float* q, lapack_int const* ldq,
3779+
lapack_int* iwork, float* tau,
3780+
float* work, lapack_int* info );
37633781

37643782
#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP)
3765-
lapack_int LAPACK_dggsvp( char const* jobu, char const* jobv, char const* jobq,
3766-
lapack_int* m, lapack_int* p, lapack_int* n, double* a,
3767-
lapack_int* lda, double* b, lapack_int* ldb,
3768-
double* tola, double* tolb, lapack_int* k,
3769-
lapack_int* l, double* u, lapack_int* ldu, double* v,
3770-
lapack_int* ldv, double* q, lapack_int* ldq,
3771-
lapack_int* iwork, double* tau, double* work,
3772-
lapack_int* info);
3783+
lapack_int LAPACK_dggsvp(
3784+
char const* jobu, char const* jobv, char const* jobq,
3785+
lapack_int const* m, lapack_int const* p, lapack_int const* n,
3786+
double* a, lapack_int const* lda,
3787+
double* b, lapack_int const* ldb,
3788+
double* tola, double* tolb,
3789+
lapack_int* k, lapack_int* l,
3790+
double* u, lapack_int const* ldu,
3791+
double* v, lapack_int const* ldv,
3792+
double* q, lapack_int const* ldq,
3793+
lapack_int* iwork, double* tau,
3794+
double* work, lapack_int* info );
37733795

37743796
#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP)
3775-
lapack_int LAPACK_cggsvp( char const* jobu, char const* jobv, char const* jobq,
3776-
lapack_int* m, lapack_int* p, lapack_int* n,
3777-
lapack_complex_float* a, lapack_int* lda,
3778-
lapack_complex_float* b, lapack_int* ldb, float* tola,
3779-
float* tolb, lapack_int* k, lapack_int* l,
3780-
lapack_complex_float* u, lapack_int* ldu,
3781-
lapack_complex_float* v, lapack_int* ldv,
3782-
lapack_complex_float* q, lapack_int* ldq,
3783-
lapack_int* iwork, lapack_int* rwork,
3784-
lapack_complex_float* tau, lapack_complex_float* work,
3785-
lapack_int* info);
3797+
lapack_int LAPACK_cggsvp(
3798+
char const* jobu, char const* jobv, char const* jobq,
3799+
lapack_int const* m, lapack_int const* p, lapack_int const* n,
3800+
lapack_complex_float* a, lapack_int const* lda,
3801+
lapack_complex_float* b, lapack_int const* ldb,
3802+
float* tola, float* tolb, lapack_int* k, lapack_int* l,
3803+
lapack_complex_float* u, lapack_int const* ldu,
3804+
lapack_complex_float* v, lapack_int const* ldv,
3805+
lapack_complex_float* q, lapack_int const* ldq,
3806+
lapack_int* iwork, float* rwork, lapack_complex_float* tau,
3807+
lapack_complex_float* work, lapack_int* info );
37863808

37873809
#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP)
3788-
lapack_int LAPACK_zggsvp( char const* jobu, char const* jobv, char const* jobq,
3789-
lapack_int* m, lapack_int* p, lapack_int* n,
3790-
lapack_complex_double* a, lapack_int* lda,
3791-
lapack_complex_double* b, lapack_int* ldb,
3792-
double* tola, double* tolb, lapack_int* k,
3793-
lapack_int* l, lapack_complex_double* u,
3794-
lapack_int* ldu, lapack_complex_double* v,
3795-
lapack_int* ldv, lapack_complex_double* q,
3796-
lapack_int* ldq, lapack_int* iwork, lapack_int* rwork,
3797-
lapack_complex_double* tau, lapack_complex_double* work,
3798-
lapack_int* info);
3810+
lapack_int LAPACK_zggsvp(
3811+
char const* jobu, char const* jobv, char const* jobq,
3812+
lapack_int const* m, lapack_int const* p, lapack_int const* n,
3813+
lapack_complex_double* a, lapack_int const* lda,
3814+
lapack_complex_double* b, lapack_int const* ldb,
3815+
double* tola, double* tolb, lapack_int* k, lapack_int* l,
3816+
lapack_complex_double* u, lapack_int const* ldu,
3817+
lapack_complex_double* v, lapack_int const* ldv,
3818+
lapack_complex_double* q, lapack_int const* ldq,
3819+
lapack_int* iwork, double* rwork, lapack_complex_double* tau,
3820+
lapack_complex_double* work, lapack_int* info );
37993821

38003822
#define LAPACK_cggsvp3 LAPACK_GLOBAL(cggsvp3,CGGSVP3)
38013823
void LAPACK_cggsvp3(

lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
4747
lapack_complex_float* cwork = NULL;
4848
lapack_complex_float cwork_query;
4949
lapack_int lrwork = -1;
50-
double* rwork = NULL;
51-
double rwork_query;
50+
float* rwork = NULL;
51+
float rwork_query;
5252
lapack_int i;
5353
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
5454
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
@@ -84,7 +84,7 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
8484
info = LAPACK_WORK_MEMORY_ERROR;
8585
goto exit_level_0;
8686
}
87-
rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork );
87+
rwork = (float*)LAPACKE_malloc( sizeof(float) * lrwork );
8888
if( rwork == NULL ) {
8989
info = LAPACK_WORK_MEMORY_ERROR;
9090
goto exit_level_0;

lapack-netlib/TESTING/EIG/cchkhb2stg.f

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ SUBROUTINE CCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
680680
* the one from above. Compare it with D1 computed
681681
* using the DSBTRD.
682682
*
683-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
684-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
683+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
684+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
685685
CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU )
686686
LH = MAX(1, 4*N)
687687
LW = LWORK - LH
@@ -753,8 +753,8 @@ SUBROUTINE CCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
753753
* the one from above. Compare it with D1 computed
754754
* using the DSBTRD.
755755
*
756-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
757-
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
756+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
757+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
758758
CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU )
759759
LH = MAX(1, 4*N)
760760
LW = LWORK - LH

lapack-netlib/TESTING/EIG/schksb2stg.f

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -670,8 +670,8 @@ SUBROUTINE SCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE,
670670
* the one from above. Compare it with D1 computed
671671
* using the SSBTRD.
672672
*
673-
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
674-
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
673+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
674+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
675675
CALL SLACPY( ' ', K+1, N, A, LDA, U, LDU )
676676
LH = MAX(1, 4*N)
677677
LW = LWORK - LH

lapack-netlib/TESTING/EIG/schkst2stg.f

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -999,8 +999,8 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH,
999999
* the one from above. Compare it with D1 computed
10001000
* using the 1-stage.
10011001
*
1002-
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
1003-
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
1002+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
1003+
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
10041004
CALL SLACPY( "U", N, N, A, LDA, V, LDU )
10051005
LH = MAX(1, 4*N)
10061006
LW = LWORK - LH

0 commit comments

Comments
 (0)