Skip to content

Commit 12d3d94

Browse files
authored
Merge pull request #3316 from xianyi/develop
Merge develop for bugfix release 0.3.17
2 parents 4777eb6 + f349be3 commit 12d3d94

File tree

9 files changed

+69
-27
lines changed

9 files changed

+69
-27
lines changed

Changelog.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
OpenBLAS ChangeLog
22
====================================================================
3+
Version 0.3.17
4+
15-Jul-2021
5+
6+
common:
7+
- reverted the optimization of SGEMV_N/DGEMV_N for small input sizes
8+
and consecutive arguments as it led to stack overflows on x86_64
9+
with some operating systems (notably OSX and Windows)
10+
11+
x86_64:
12+
- reverted the performance patch for SGEMV_T on AVX512 as it caused
13+
wrong results in some applications
14+
15+
SPARC:
16+
- fixed compilation with compilers other than gcc
17+
====================================================================
318
Version 0.3.16
419
11-Jul-2021
520

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.16
6+
VERSION = 0.3.16.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#define VENDOR_TRANSMETA 9
5555
#define VENDOR_NSC 10
5656
#define VENDOR_HYGON 11
57+
#define VENDOR_ZHAOXIN 12
5758
#define VENDOR_UNKNOWN 99
5859

5960
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))

cpuid_x86.c

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ int get_vendor(void){
283283
if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX;
284284
if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN;
285285
if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR;
286-
if (!strcmp(vendor, " Shanghai ")) return VENDOR_CENTAUR;
286+
if (!strcmp(vendor, " Shanghai ")) return VENDOR_ZHAOXIN;
287287
if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE;
288288
if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS;
289289
if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA;
@@ -1067,7 +1067,8 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
10671067

10681068
if ((get_vendor() == VENDOR_AMD) ||
10691069
(get_vendor() == VENDOR_HYGON) ||
1070-
(get_vendor() == VENDOR_CENTAUR)) {
1070+
(get_vendor() == VENDOR_CENTAUR) ||
1071+
(get_vendor() == VENDOR_ZHAOXIN)) {
10711072
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
10721073

10731074
LDTB.size = 4096;
@@ -1190,14 +1191,15 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
11901191

11911192
int get_cpuname(void){
11921193

1193-
int family, exfamily, model, vendor, exmodel;
1194+
int family, exfamily, model, vendor, exmodel, stepping;
11941195

11951196
if (!have_cpuid()) return CPUTYPE_80386;
11961197

11971198
family = get_cputype(GET_FAMILY);
11981199
exfamily = get_cputype(GET_EXFAMILY);
11991200
model = get_cputype(GET_MODEL);
12001201
exmodel = get_cputype(GET_EXMODEL);
1202+
stepping = get_cputype(GET_STEPPING);
12011203

12021204
vendor = get_vendor();
12031205

@@ -1628,15 +1630,20 @@ int get_cpuname(void){
16281630
switch (family) {
16291631
case 0x5:
16301632
return CPUTYPE_CENTAURC6;
1631-
break;
16321633
case 0x6:
1633-
return CPUTYPE_NANO;
1634-
break;
1635-
case 0x7:
1634+
if (model == 0xf && stepping < 0xe)
1635+
return CPUTYPE_NANO;
16361636
return CPUTYPE_NEHALEM;
1637-
break;
1637+
default:
1638+
if (family >= 0x7)
1639+
return CPUTYPE_NEHALEM;
1640+
else
1641+
return CPUTYPE_VIAC3;
16381642
}
1639-
return CPUTYPE_VIAC3;
1643+
}
1644+
1645+
if (vendor == VENDOR_ZHAOXIN){
1646+
return CPUTYPE_NEHALEM;
16401647
}
16411648

16421649
if (vendor == VENDOR_RISE){
@@ -1869,14 +1876,15 @@ char *get_lower_cpunamechar(void){
18691876

18701877
int get_coretype(void){
18711878

1872-
int family, exfamily, model, exmodel, vendor;
1879+
int family, exfamily, model, exmodel, vendor, stepping;
18731880

18741881
if (!have_cpuid()) return CORE_80486;
18751882

18761883
family = get_cputype(GET_FAMILY);
18771884
exfamily = get_cputype(GET_EXFAMILY);
18781885
model = get_cputype(GET_MODEL);
18791886
exmodel = get_cputype(GET_EXMODEL);
1887+
stepping = get_cputype(GET_STEPPING);
18801888

18811889
vendor = get_vendor();
18821890

@@ -2286,13 +2294,19 @@ int get_coretype(void){
22862294
if (vendor == VENDOR_CENTAUR) {
22872295
switch (family) {
22882296
case 0x6:
2289-
return CORE_NANO;
2290-
break;
2291-
case 0x7:
2297+
if (model == 0xf && stepping < 0xe)
2298+
return CORE_NANO;
22922299
return CORE_NEHALEM;
2293-
break;
2300+
default:
2301+
if (family >= 0x7)
2302+
return CORE_NEHALEM;
2303+
else
2304+
return CORE_VIAC3;
22942305
}
2295-
return CORE_VIAC3;
2306+
}
2307+
2308+
if (vendor == VENDOR_ZHAOXIN) {
2309+
return CORE_NEHALEM;
22962310
}
22972311

22982312
return CORE_UNKNOWN;

driver/others/dynamic.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ extern gotoblas_t gotoblas_COOPERLAKE;
292292
#define VENDOR_AMD 2
293293
#define VENDOR_CENTAUR 3
294294
#define VENDOR_HYGON 4
295+
#define VENDOR_ZHAOXIN 5
295296
#define VENDOR_UNKNOWN 99
296297

297298
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
@@ -404,7 +405,7 @@ static int get_vendor(void){
404405
if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL;
405406
if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD;
406407
if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR;
407-
if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_CENTAUR;
408+
if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_ZHAOXIN;
408409
if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON;
409410

410411
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
@@ -415,14 +416,15 @@ static int get_vendor(void){
415416
static gotoblas_t *get_coretype(void){
416417

417418
int eax, ebx, ecx, edx;
418-
int family, exfamily, model, vendor, exmodel;
419+
int family, exfamily, model, vendor, exmodel, stepping;
419420

420421
cpuid(1, &eax, &ebx, &ecx, &edx);
421422

422423
family = BITMASK(eax, 8, 0x0f);
423424
exfamily = BITMASK(eax, 20, 0xff);
424425
model = BITMASK(eax, 4, 0x0f);
425426
exmodel = BITMASK(eax, 16, 0x0f);
427+
stepping = BITMASK(eax, 0, 0x0f);
426428

427429
vendor = get_vendor();
428430

@@ -824,13 +826,19 @@ static gotoblas_t *get_coretype(void){
824826
if (vendor == VENDOR_CENTAUR) {
825827
switch (family) {
826828
case 0x6:
827-
return &gotoblas_NANO;
828-
break;
829-
case 0x7:
829+
if (model == 0xf && stepping < 0xe)
830+
return &gotoblas_NANO;
830831
return &gotoblas_NEHALEM;
832+
default:
833+
if (family >= 0x7)
834+
return &gotoblas_NEHALEM;
831835
}
832836
}
833837

838+
if (vendor == VENDOR_ZHAOXIN) {
839+
return &gotoblas_NEHALEM;
840+
}
841+
834842
return NULL;
835843
}
836844

interface/gemv.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,14 @@ void CNAME(enum CBLAS_ORDER order,
201201
if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
202202

203203
if (alpha == ZERO) return;
204-
204+
205+
#if 0
206+
/* this optimization causes stack corruption on x86_64 under OSX, Windows and FreeBSD */
205207
if (trans == 0 && incx == 1 && incy == 1 && m*n < 2304 *GEMM_MULTITHREAD_THRESHOLD) {
206208
GEMV_N(m, n, 0, alpha, a, lda, x, incx, y, incy, NULL);
207209
return;
208210
}
209-
211+
#endif
210212
IDEBUG_START;
211213

212214
FUNCTION_PROFILE_START();

kernel/x86_64/sgemv_t_4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
#include "sgemv_t_microk_haswell-4.c"
3939
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
4040
#include "sgemv_t_microk_haswell-4.c"
41-
#include "sgemv_t_microk_skylakex.c"
41+
/*#include "sgemv_t_microk_skylakex.c"*/
4242
#endif
4343

4444
#if defined(STEAMROLLER) || defined(EXCAVATOR)

openblas_config_template.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ typedef int blasint;
9999

100100
/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */
101101
#ifdef OPENBLAS_OS_LINUX
102-
#define _GNU_SOURCE
102+
#ifndef _GNU_SOURCE
103+
#define _GNU_SOURCE
104+
#endif
103105
#include <sched.h>
104106
#endif

param.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2502,7 +2502,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25022502

25032503
#define GEMM_DEFAULT_OFFSET_A 0
25042504
#define GEMM_DEFAULT_OFFSET_B 2048
2505-
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2505+
#define GEMM_DEFAULT_ALIGN 0x03fffUL
25062506

25072507
#define SGEMM_DEFAULT_UNROLL_M 2
25082508
#define SGEMM_DEFAULT_UNROLL_N 8
@@ -2534,7 +2534,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25342534

25352535
#define GEMM_DEFAULT_OFFSET_A 0
25362536
#define GEMM_DEFAULT_OFFSET_B 2048
2537-
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2537+
#define GEMM_DEFAULT_ALIGN 0x03fffUL
25382538

25392539
#define SGEMM_DEFAULT_UNROLL_M 4
25402540
#define SGEMM_DEFAULT_UNROLL_N 4

0 commit comments

Comments
 (0)