Skip to content

Commit 36adfe8

Browse files
committed
Merge branch 'hotfix-v0.2.8' into develop
2 parents a293066 + a07cc39 commit 36adfe8

File tree

11 files changed

+128
-28
lines changed

11 files changed

+128
-28
lines changed

Makefile.system

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,14 +324,16 @@ ifeq ($(ARCH), x86)
324324
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
325325
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
326326
ifneq ($(NO_AVX), 1)
327-
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
327+
DYNAMIC_CORE += SANDYBRIDGE
328+
#BULLDOZER PILEDRIVER
328329
endif
329330
endif
330331

331332
ifeq ($(ARCH), x86_64)
332333
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
333334
ifneq ($(NO_AVX), 1)
334-
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
335+
DYNAMIC_CORE += SANDYBRIDGE
336+
#BULLDOZER PILEDRIVER
335337
endif
336338
endif
337339

@@ -895,6 +897,7 @@ export CC
895897
export FC
896898
export BU
897899
export FU
900+
export NEED2UNDERSCORES
898901
export USE_THREAD
899902
export NUM_THREADS
900903
export NUM_CORES

cpuid.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@
105105
#define CORE_NANO 19
106106
#define CORE_SANDYBRIDGE 20
107107
#define CORE_BOBCAT 21
108-
#define CORE_BULLDOZER 22
109-
#define CORE_PILEDRIVER 23
108+
#define CORE_BULLDOZER CORE_BARCELONA
109+
#define CORE_PILEDRIVER CORE_BARCELONA
110110
#define CORE_HASWELL CORE_SANDYBRIDGE
111111

112112
#define HAVE_SSE (1 << 0)
@@ -198,8 +198,8 @@ typedef struct {
198198
#define CPUTYPE_NANO 43
199199
#define CPUTYPE_SANDYBRIDGE 44
200200
#define CPUTYPE_BOBCAT 45
201-
#define CPUTYPE_BULLDOZER 46
202-
#define CPUTYPE_PILEDRIVER 47
201+
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
202+
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
203203
// this define is because BLAS doesn't have haswell specific optimizations yet
204204
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
205205

driver/others/dynamic.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA;
6363
extern gotoblas_t gotoblas_BOBCAT;
6464
#ifndef NO_AVX
6565
extern gotoblas_t gotoblas_SANDYBRIDGE;
66-
extern gotoblas_t gotoblas_BULLDOZER;
67-
extern gotoblas_t gotoblas_PILEDRIVER;
66+
//extern gotoblas_t gotoblas_BULLDOZER;
67+
//extern gotoblas_t gotoblas_PILEDRIVER;
6868
#else
6969
//Use NEHALEM kernels for sandy bridge
7070
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
71+
#endif
72+
7173
#define gotoblas_BULLDOZER gotoblas_BARCELONA
7274
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
73-
#endif
75+
7476
//Use sandy bridge kernels for haswell.
7577
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
7678

exports/Makefile

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ ifndef NO_LAPACKE
1818
NO_LAPACKE = 0
1919
endif
2020

21+
ifndef NEED2UNDERSCORES
22+
NEED2UNDERSCORES=0
23+
endif
24+
2125
ifeq ($(OSNAME), WINNT)
2226
ifeq ($(F_COMPILER), GFORTRAN)
2327
EXTRALIB += -lgfortran
@@ -94,13 +98,13 @@ libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
9498
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
9599

96100
libopenblas.def : gensymbol
97-
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
101+
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
98102

99103
libgoto2_shared.def : gensymbol
100-
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
104+
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
101105

102106
libgoto_hpl.def : gensymbol
103-
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
107+
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
104108

105109
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
106110
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
@@ -187,23 +191,23 @@ static : ../$(LIBNAME)
187191
rm -f goto.$(SUFFIX)
188192

189193
linux.def : gensymbol ../Makefile.system ../getarch.c
190-
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
194+
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
191195

192196
osx.def : gensymbol ../Makefile.system ../getarch.c
193-
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
197+
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
194198

195199
aix.def : gensymbol ../Makefile.system ../getarch.c
196-
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
200+
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
197201

198202
symbol.S : gensymbol
199-
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S
203+
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S
200204

201205
test : linktest.c
202206
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
203207
rm -f linktest
204208

205209
linktest.c : gensymbol ../Makefile.system ../getarch.c
206-
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c
210+
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c
207211

208212
clean ::
209213
@rm -f *.def *.dylib __.SYMDEF*

exports/gensymbol

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@
114114

115115
# ALLAUX -- Auxiliary routines called from all precisions
116116
# already provided by @blasobjs: xerbla, lsame
117-
ilaenv, ieeeck, lsamen, xerbla_array, iparmq,
118-
ilaprec, ilatrans, ilauplo, iladiag, chla_transtype,
117+
ilaenv, ieeeck, lsamen, iparmq,
118+
ilaprec, ilatrans, ilauplo, iladiag,
119119
ilaver, slamch, slamc3,
120120

121121
# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX.
@@ -2672,12 +2672,25 @@
26722672
#LAPACKE_zlagsy_work,
26732673
);
26742674

2675+
#These function may need 2 underscores.
2676+
@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,);
2677+
26752678
if ($ARGV[5] == 1) {
26762679
#NO_LAPACK=1
26772680
@underscore_objs = (@blasobjs, @misc_underscore_objs);
26782681
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
26792682
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
2680-
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
2683+
2684+
if ($ARGV[7] == 0){
2685+
# NEED2UNDERSCORES=0
2686+
# Don't need 2 underscores
2687+
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs);
2688+
}else{
2689+
# Need 2 underscores
2690+
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
2691+
@need_2underscore_objs = (@lapack_embeded_underscore_objs);
2692+
};
2693+
26812694
} else {
26822695
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
26832696
}
@@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){
27292742
print $objs, $bu, "\n";
27302743
}
27312744

2745+
foreach $objs (@need_2underscore_objs) {
2746+
print $objs, $bu, $bu, "\n";
2747+
}
2748+
27322749
# if ($ARGV[4] == 0) {
27332750
foreach $objs (@no_underscore_objs) {
27342751
print $objs, "\n";
@@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){
27502767
print "_", $objs, $bu, "\n";
27512768
}
27522769

2770+
foreach $objs (@need_2underscore_objs) {
2771+
print "_", $objs, $bu, $bu, "\n";
2772+
}
2773+
27532774
# if ($ARGV[4] == 0) {
27542775
foreach $objs (@no_underscore_objs) {
27552776
print "_", $objs, "\n";
@@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){
27672788
print $objs, $bu, "\n";
27682789
}
27692790

2791+
foreach $objs (@need_2underscore_objs) {
2792+
print $objs, $bu, $bu, "\n";
2793+
}
2794+
27702795
# if ($ARGV[4] == 0) {
27712796
foreach $objs (@no_underscore_objs) {
27722797
print $objs, "\n";
@@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){
27912816
print "\t$uppercase=$objs", "_ \@", $count, "\n";
27922817
$count ++;
27932818
}
2819+
2820+
foreach $objs (@need_2underscore_objs) {
2821+
$uppercase = $objs;
2822+
$uppercase =~ tr/[a-z]/[A-Z]/;
2823+
print "\t$objs=$objs","__ \@", $count, "\n";
2824+
$count ++;
2825+
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
2826+
$count ++;
2827+
print "\t$uppercase=$objs", "__ \@", $count, "\n";
2828+
$count ++;
2829+
}
27942830

27952831
#for misc_common_objs
27962832
foreach $objs (@misc_common_objs) {
@@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){
28522888
print "\t$uppercase\_ = $objs","_\n";
28532889
$count ++;
28542890
}
2891+
2892+
foreach $objs (@need_2underscore_objs) {
2893+
$uppercase = $objs;
2894+
$uppercase =~ tr/[a-z]/[A-Z]/;
2895+
print "\t$objs=$objs","__ \@", $count, "\n";
2896+
$count ++;
2897+
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
2898+
$count ++;
2899+
print "\t$uppercase=$objs", "__ \@", $count, "\n";
2900+
$count ++;
2901+
}
2902+
28552903
exit(0);
28562904
}
28572905

@@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){
28682916
print "_", $uppercase, "_:\n";
28692917
print "\tjmp\t_", $objs, "_\n";
28702918
}
2919+
2920+
foreach $objs (@need_2underscore_objs) {
2921+
$uppercase = $objs;
2922+
$uppercase =~ tr/[a-z]/[A-Z]/;
2923+
print "\t.align 16\n";
2924+
print "\t.globl _", $uppercase, "__\n";
2925+
print "_", $uppercase, "__:\n";
2926+
print "\tjmp\t_", $objs, "__\n";
2927+
}
2928+
28712929
exit(0);
28722930
}
28732931

@@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){
28802938
foreach $objs (@underscore_objs) {
28812939
print $objs, $bu, "();\n" if $objs ne "xerbla";
28822940
}
2941+
2942+
foreach $objs (@need_2underscore_objs) {
2943+
print $objs, $bu, $bu, "();\n";
2944+
}
2945+
28832946
# if ($ARGV[4] == 0) {
28842947
foreach $objs (@no_underscore_objs) {
28852948
print $objs, "();\n";

f_check

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ if ($compiler eq "") {
114114
$vendor = IBM;
115115
$openmp = "-openmp";
116116
}
117+
118+
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
119+
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
120+
if ($data =~ /zho_ge__/) {
121+
$need2bu = 1;
122+
}
117123
}
118124

119125
if ($vendor eq "") {
@@ -245,6 +251,8 @@ if ($link ne "") {
245251

246252
$link =~ s/\-rpath\s+/\-rpath\@/g;
247253

254+
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;
255+
248256
@flags = split(/[\s\,\n]/, $link);
249257
# remove leading and trailing quotes from each flag.
250258
@flags = map {s/^['"]|['"]$//g; $_} @flags;
@@ -265,7 +273,15 @@ if ($link ne "") {
265273
$linker_L .= "-Wl,". $flags . " ";
266274
}
267275

268-
if ($flags =~ /^\-rpath/) {
276+
if ($flags =~ /^\-rpath\@/) {
277+
$flags =~ s/\@/\,/g;
278+
if ($vendor eq "PGI") {
279+
$flags =~ s/lib$/libso/;
280+
}
281+
$linker_L .= "-Wl,". $flags . " " ;
282+
}
283+
284+
if ($flags =~ /^\-rpath-link\@/) {
269285
$flags =~ s/\@/\,/g;
270286
if ($vendor eq "PGI") {
271287
$flags =~ s/lib$/libso/;
@@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;
309325

310326
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
311327
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
328+
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";
329+
330+
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";
312331

313332
if (($linker_l ne "") || ($linker_a ne "")) {
314333
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";

ftest3.f

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
double complex function zho_ge()
2+
3+
zho_ge = (0.0d0,0.0d0)
4+
5+
return
6+
end

getarch.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
354354
#define CORENAME "OPTERON"
355355
#endif
356356

357-
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
357+
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER)
358358
#define FORCE
359359
#define FORCE_INTEL
360360
#define ARCHITECTURE "X86"
@@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
384384
#define CORENAME "BOBCAT"
385385
#endif
386386

387-
#if defined (FORCE_BULLDOZER)
387+
#if 0
388388
#define FORCE
389389
#define FORCE_INTEL
390390
#define ARCHITECTURE "X86"
@@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
400400
#define CORENAME "BULLDOZER"
401401
#endif
402402

403-
#if defined (FORCE_PILEDRIVER)
403+
#if 0
404404
#define FORCE
405405
#define FORCE_INTEL
406406
#define ARCHITECTURE "X86"

getarch_2nd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
int main(int argc, char **argv) {
1010

11-
if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) {
11+
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
1212
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
1313
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
1414
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);

interface/trtri.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
6060
};
6161
#endif
6262

63-
extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
63+
extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
6464

6565
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
6666

@@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
137137
// double trtri_U single thread error
138138
// call dtrtri from lapack for a walk around.
139139
if(uplo==0){
140-
dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info);
140+
BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info);
141+
#ifndef PPC440
142+
blas_memory_free(buffer);
143+
#endif
141144
return 0;
142145
}
143146
#endif

0 commit comments

Comments
 (0)