Skip to content

Commit de06954

Browse files
authored
Merge pull request numpy#15069 from pv/blas-ilp64
ENH: add support for ILP64 OpenBLAS (without symbol suffix)
2 parents 740c7f5 + 85447b1 commit de06954

File tree

18 files changed

+287
-212
lines changed

18 files changed

+287
-212
lines changed

doc/source/release/1.18.0-notes.rst

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,9 @@ The `numpy.expand_dims` ``axis`` keyword can now accept a tuple of
205205
axes. Previously, ``axis`` was required to be an integer.
206206
(`gh-14051 <https://github.com/numpy/numpy/pull/14051>`__)
207207

208-
Support for 64-bit OpenBLAS with symbol suffix
209-
----------------------------------------------
210-
Added support for 64-bit (ILP64) OpenBLAS compiled with
211-
``make INTERFACE64=1 SYMBOLSUFFIX=64_``. See ``site.cfg.example``
208+
Support for 64-bit OpenBLAS
209+
---------------------------
210+
Added support for 64-bit (ILP64) OpenBLAS. See ``site.cfg.example``
212211
for details.
213212

214213
Improvements

doc/source/user/building.rst

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,23 +195,37 @@ or::
195195
BLAS=None LAPACK=None ATLAS=None python setup.py build
196196

197197

198-
64-bit BLAS and LAPACK with symbol suffix
199-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198+
64-bit BLAS and LAPACK
199+
~~~~~~~~~~~~~~~~~~~~~~
200+
201+
You can tell Numpy to use 64-bit BLAS/LAPACK libraries by setting the
202+
environment variable::
203+
204+
NPY_USE_BLAS_ILP64=1
205+
206+
when building Numpy. The following 64-bit BLAS/LAPACK libraries are
207+
supported:
208+
209+
1. OpenBLAS ILP64 with ``64_`` symbol suffix (``openblas64_``)
210+
2. OpenBLAS ILP64 without symbol suffix (``openblas_ilp64``)
211+
212+
The order in which they are preferred is determined by
213+
``NPY_BLAS_ILP64_ORDER`` and ``NPY_LAPACK_ILP64_ORDER`` environment
214+
variables. The default value is ``openblas64_,openblas_ilp64``.
215+
216+
.. note::
200217

201-
Numpy also supports 64-bit OpenBLAS with ``64_`` symbol suffix. Such
202-
library is obtained by compiling OpenBLAS with settings::
218+
Using non-symbol-suffixed 64-bit BLAS/LAPACK in a program that also
219+
uses 32-bit BLAS/LAPACK can cause crashes under certain conditions
220+
(e.g. with embedded Python interpreters on Linux).
203221

204-
make INTERFACE64=1 SYMBOLSUFFIX=64_
222+
The 64-bit OpenBLAS with ``64_`` symbol suffix is obtained by
223+
compiling OpenBLAS with settings::
205224

206-
To make Numpy use it, set ``NPY_USE_BLAS64_=1`` environment variable
207-
when building Numpy. You may also need to configure the
208-
``[openblas64_]`` section in ``site.cfg``.
225+
make INTERFACE64=1 SYMBOLSUFFIX=64_
209226

210-
The symbol suffix avoids symbol name clashes between 32-bit and 64-bit
211-
BLAS/LAPACK libraries, meaning that you can link to both in the same
212-
program. This avoids potential issues when using 64-bit BLAS/LAPACK in
213-
Numpy while simultaneously using other Python software that uses the
214-
32-bit versions.
227+
The symbol suffix avoids the symbol name clashes between 32-bit and
228+
64-bit BLAS/LAPACK libraries.
215229

216230

217231
Supplying additional compiler flags

numpy/core/setup.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,6 @@ def is_npy_no_smp():
9191
# block.
9292
return 'NPY_NOSMP' in os.environ
9393

94-
def is_npy_use_blas64_():
95-
return (os.environ.get('NPY_USE_BLAS64_', "0") != "0")
96-
9794
def win32_checks(deflist):
9895
from numpy.distutils.misc_util import get_build_architecture
9996
a = get_build_architecture()
@@ -756,12 +753,12 @@ def get_mathlib_info(*args):
756753
join('src', 'common', 'numpyos.c'),
757754
]
758755

759-
if is_npy_use_blas64_():
760-
blas_info = get_info('blas64__opt', 2)
761-
have_blas = blas_info and ('HAVE_CBLAS64_', None) in blas_info.get('define_macros', [])
756+
if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0":
757+
blas_info = get_info('blas_ilp64_opt', 2)
762758
else:
763759
blas_info = get_info('blas_opt', 0)
764-
have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])
760+
761+
have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])
765762

766763
if have_blas:
767764
extra_info = blas_info

numpy/core/src/common/cblasfuncs.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,10 @@
1010
#include <assert.h>
1111
#include <numpy/arrayobject.h>
1212
#include "npy_cblas.h"
13-
#include "npy_cblas64_.h"
1413
#include "arraytypes.h"
1514
#include "common.h"
1615

1716

18-
/*
19-
* If 64-bit CBLAS with symbol suffix '64_' is available, use it.
20-
*/
21-
#ifdef HAVE_CBLAS64_
22-
#define CBLAS_FUNC(name) name ## 64_
23-
#else
24-
#define CBLAS_FUNC(name) name
25-
#endif
26-
2717
static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
2818
static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
2919

numpy/core/src/common/npy_cblas.h

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,34 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
2525

2626
#define CBLAS_INDEX size_t /* this may vary between platforms */
2727

28-
#define BLASINT int
29-
#define BLASNAME(name) name
28+
#ifdef NO_APPEND_FORTRAN
29+
#define BLAS_FORTRAN_SUFFIX
30+
#else
31+
#define BLAS_FORTRAN_SUFFIX _
32+
#endif
33+
34+
#ifndef BLAS_SYMBOL_PREFIX
35+
#define BLAS_SYMBOL_PREFIX
36+
#endif
37+
38+
#ifndef BLAS_SYMBOL_SUFFIX
39+
#define BLAS_SYMBOL_SUFFIX
40+
#endif
41+
42+
#define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2
43+
#define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2)
44+
45+
#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX)
46+
#define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_FORTRAN_SUFFIX,BLAS_SYMBOL_SUFFIX)
47+
48+
#ifdef HAVE_BLAS_ILP64
49+
#define CBLAS_INT npy_int64
50+
#else
51+
#define CBLAS_INT int
52+
#endif
53+
54+
#define BLASNAME(name) CBLAS_FUNC(name)
55+
#define BLASINT CBLAS_INT
3056

3157
#include "npy_cblas_base.h"
3258

numpy/core/src/common/npy_cblas64_.h

Lines changed: 0 additions & 31 deletions
This file was deleted.

numpy/core/src/common/python_xerbla.c

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
#include "Python.h"
22
#include "numpy/npy_common.h"
3-
4-
/*
5-
* From f2c.h, this should be safe unless fortran is set to use 64
6-
* bit integers. We don't seem to have any good way to detect that.
7-
*/
8-
typedef int integer;
3+
#include "npy_cblas.h"
94

105
/*
116
From the original manpage:
@@ -24,7 +19,7 @@ typedef int integer;
2419
info: Number of the invalid parameter.
2520
*/
2621

27-
int xerbla_(char *srname, integer *info)
22+
CBLAS_INT BLAS_FUNC(xerbla)(char *srname, CBLAS_INT *info)
2823
{
2924
static const char format[] = "On entry to %.*s" \
3025
" parameter number %d had an illegal value";
@@ -42,19 +37,11 @@ int xerbla_(char *srname, integer *info)
4237
#ifdef WITH_THREAD
4338
save = PyGILState_Ensure();
4439
#endif
45-
PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info);
40+
PyOS_snprintf(buf, sizeof(buf), format, len, srname, (int)*info);
4641
PyErr_SetString(PyExc_ValueError, buf);
4742
#ifdef WITH_THREAD
4843
PyGILState_Release(save);
4944
#endif
5045

5146
return 0;
5247
}
53-
54-
55-
/* Same for LAPACK64_ */
56-
npy_int64 xerbla_64_(char *srname, npy_int64 *info)
57-
{
58-
integer info_int = (integer)*info;
59-
return xerbla_(srname, &info_int);
60-
}

numpy/core/src/multiarray/arraytypes.c.src

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3535,17 +3535,17 @@ NPY_NO_EXPORT void
35353535
npy_intp n, void *NPY_UNUSED(ignore))
35363536
{
35373537
#if defined(HAVE_CBLAS)
3538-
int is1b = blas_stride(is1, sizeof(@type@));
3539-
int is2b = blas_stride(is2, sizeof(@type@));
3538+
CBLAS_INT is1b = blas_stride(is1, sizeof(@type@));
3539+
CBLAS_INT is2b = blas_stride(is2, sizeof(@type@));
35403540

35413541
if (is1b && is2b)
35423542
{
35433543
double sum = 0.; /* double for stability */
35443544

35453545
while (n > 0) {
3546-
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
3546+
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
35473547

3548-
sum += cblas_@prefix@dot(chunk,
3548+
sum += CBLAS_FUNC(cblas_@prefix@dot)(chunk,
35493549
(@type@ *) ip1, is1b,
35503550
(@type@ *) ip2, is2b);
35513551
/* use char strides here */
@@ -3584,17 +3584,17 @@ NPY_NO_EXPORT void
35843584
char *op, npy_intp n, void *NPY_UNUSED(ignore))
35853585
{
35863586
#if defined(HAVE_CBLAS)
3587-
int is1b = blas_stride(is1, sizeof(@ctype@));
3588-
int is2b = blas_stride(is2, sizeof(@ctype@));
3587+
CBLAS_INT is1b = blas_stride(is1, sizeof(@ctype@));
3588+
CBLAS_INT is2b = blas_stride(is2, sizeof(@ctype@));
35893589

35903590
if (is1b && is2b) {
35913591
double sum[2] = {0., 0.}; /* double for stability */
35923592

35933593
while (n > 0) {
3594-
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
3594+
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
35953595
@type@ tmp[2];
35963596

3597-
cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp);
3597+
CBLAS_FUNC(cblas_@prefix@dotu_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
35983598
sum[0] += (double)tmp[0];
35993599
sum[1] += (double)tmp[1];
36003600
/* use char strides here */

numpy/core/src/multiarray/common.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
303303
*/
304304
if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
305305
stride /= itemsize;
306+
#ifndef HAVE_BLAS_ILP64
306307
if (stride <= INT_MAX) {
308+
#else
309+
if (stride <= NPY_MAX_INT64) {
310+
#endif
307311
return stride;
308312
}
309313
}
@@ -314,7 +318,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
314318
* Define a chunksize for CBLAS. CBLAS counts in integers.
315319
*/
316320
#if NPY_MAX_INTP > INT_MAX
317-
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
321+
# ifndef HAVE_BLAS_ILP64
322+
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
323+
# else
324+
# define NPY_CBLAS_CHUNK (NPY_MAX_INT64 / 2 + 1)
325+
# endif
318326
#else
319327
# define NPY_CBLAS_CHUNK NPY_MAX_INTP
320328
#endif

numpy/core/src/multiarray/vdot.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@ CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
1515
char *op, npy_intp n, void *NPY_UNUSED(ignore))
1616
{
1717
#if defined(HAVE_CBLAS)
18-
int is1b = blas_stride(is1, sizeof(npy_cfloat));
19-
int is2b = blas_stride(is2, sizeof(npy_cfloat));
18+
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cfloat));
19+
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cfloat));
2020

2121
if (is1b && is2b) {
2222
double sum[2] = {0., 0.}; /* double for stability */
2323

2424
while (n > 0) {
25-
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
25+
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
2626
float tmp[2];
2727

28-
cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
28+
CBLAS_FUNC(cblas_cdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
2929
sum[0] += (double)tmp[0];
3030
sum[1] += (double)tmp[1];
3131
/* use char strides here */
@@ -66,17 +66,17 @@ CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
6666
char *op, npy_intp n, void *NPY_UNUSED(ignore))
6767
{
6868
#if defined(HAVE_CBLAS)
69-
int is1b = blas_stride(is1, sizeof(npy_cdouble));
70-
int is2b = blas_stride(is2, sizeof(npy_cdouble));
69+
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cdouble));
70+
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cdouble));
7171

7272
if (is1b && is2b) {
7373
double sum[2] = {0., 0.}; /* double for stability */
7474

7575
while (n > 0) {
76-
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
76+
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
7777
double tmp[2];
7878

79-
cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
79+
CBLAS_FUNC(cblas_zdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
8080
sum[0] += (double)tmp[0];
8181
sum[1] += (double)tmp[1];
8282
/* use char strides here */

0 commit comments

Comments
 (0)