Skip to content

Commit b219b96

Browse files
committed
Add cblas_sdot and cblas_ddot to our CBLAS workaround list
Julia recently started using these, so we need to workaround them in LBT as well, for CBLAS-divergent BLAS libraries such as MKL v2022.
1 parent 1a429b7 commit b219b96

File tree

7 files changed

+75
-15
lines changed

7 files changed

+75
-15
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ You can always tell if your system is limited in this fashion by calling `lbt_ge
6060

6161
### Version History
6262

63+
v5.0.2 - Add `cblas_sdot` and `cblas_ddot` to CBLAS divergence workaround wrappers.
64+
6365
v5.0.1 - Fix complex return wrapper infinite loop bug.
6466

6567
v5.0.0 - Add complex return value wrappers and CBLAS workaround. The complex return value wrapper ensures that all symbols maintain a standard ABI for returning complex numbers, and the CBLAS workaround maps CBLAS symbols to FORTRAN symbols when properly-suffixed CBLAS symbols do not exist, as is the case in MKL `v2022.0`.

ext/gensymbol/generate_func_list.sh

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,20 +107,25 @@ echo "#endif" >> "${OUTPUT_FILE}"
107107
NUM_COMPLEX128_SYMBOLS="${NUM_SYMBOLS}"
108108

109109
NUM_SYMBOLS=0
110-
CBLAS_SUB_FUNCS="$(grep -e '^cblas_.*_sub' <<< "${EXPORTED_FUNCS}")"
110+
# We manually curate a list of cblas functions that we have defined adapters for
111+
# in `src/cblas_adapters.c`. This is our compromise between the crushing workload
112+
# of manually defining every single CBLAS function we need, and the practical need
113+
# to get Julia to pass its LinearAlgebra tests using MKL v2022.
114+
CBLAS_WORKAROUND_FUNCS="$(grep -e '^cblas_.*_sub$' <<< "${EXPORTED_FUNCS}")"
115+
CBLAS_WORKAROUND_FUNCS="${CBLAS_WORKAROUND_FUNCS} $(grep -e '^cblas_.dot$' <<< "${EXPORTED_FUNCS}")"
111116
echo >> "${OUTPUT_FILE}"
112-
echo "#ifndef CBLAS_SUB_FUNCS" >> "${OUTPUT_FILE}"
113-
echo "#define CBLAS_SUB_FUNCS(XX) \\" >> "${OUTPUT_FILE}"
114-
for func_name in ${CBLAS_SUB_FUNCS}; do
117+
echo "#ifndef CBLAS_WORKAROUND_FUNCS" >> "${OUTPUT_FILE}"
118+
echo "#define CBLAS_WORKAROUND_FUNCS(XX) \\" >> "${OUTPUT_FILE}"
119+
for func_name in ${CBLAS_WORKAROUND_FUNCS}; do
115120
output_func "${func_name}"
116121
done
117122
echo >> "${OUTPUT_FILE}"
118123
echo "#endif" >> "${OUTPUT_FILE}"
119-
NUM_CBLAS_SUB_SYMBOLS="${NUM_SYMBOLS}"
124+
NUM_CBLAS_WORKAROUND_SYMBOLS="${NUM_SYMBOLS}"
120125

121126
# Report to the user and cleanup
122127
echo
123128
NUM_F2C_SYMBOLS="$((NUM_FLOAT32_SYMBOLS + NUM_COMPLEX64_SYMBOLS + NUM_COMPLEX128_SYMBOLS))"
124129
NUM_CMPLX_SYMBOLS="$((NUM_COMPLEX64_SYMBOLS + NUM_COMPLEX128_SYMBOLS))"
125-
echo "Done, with ${NUM_EXPORTED} symbols generated (${NUM_F2C_SYMBOLS} f2c, ${NUM_CMPLX_SYMBOLS} complex-returning, ${NUM_CBLAS_SUB_SYMBOLS} cblas-sub functions)."
130+
echo "Done, with ${NUM_EXPORTED} symbols generated (${NUM_F2C_SYMBOLS} f2c, ${NUM_CMPLX_SYMBOLS} complex-returning, ${NUM_CBLAS_WORKAROUND_SYMBOLS} cblas-workaround functions)."
126131
rm -f tempsymbols.def

src/Make.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ endif
2424

2525
LBT_SOVERSION_MAJOR := 5
2626
LBT_SOVERSION_MINOR := 0
27-
LBT_SOVERSION_PATCH := 1
27+
LBT_SOVERSION_PATCH := 2
2828

2929
ifeq ($(OS), WINNT)
3030
SHLIB_EXT := dll

src/cblas_adapters.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,45 @@ void lbt_cblas_cdotu_sub64_(const int64_t N,
105105
{
106106
*z = cdotu_64_(&N, X, &incX, Y, &incY);
107107
}
108+
109+
110+
111+
extern float sdot_(const int32_t *,
112+
const float *, const int32_t *,
113+
const float *, const int32_t *);
114+
float lbt_cblas_sdot(const int32_t N,
115+
const float *X, const int32_t incX,
116+
const float *Y, const int32_t incY)
117+
{
118+
return sdot_(&N, X, &incX, Y, &incY);
119+
}
120+
121+
extern float sdot_64_(const int64_t *,
122+
const float *, const int64_t *,
123+
const float *, const int64_t *);
124+
float lbt_cblas_sdot64_(const int64_t N,
125+
const float *X, const int64_t incX,
126+
const float *Y, const int64_t incY)
127+
{
128+
return sdot_64_(&N, X, &incX, Y, &incY);
129+
}
130+
131+
extern double ddot_(const int32_t *,
132+
const double *, const int32_t *,
133+
const double *, const int32_t *);
134+
double lbt_cblas_ddot(const int32_t N,
135+
const double *X, const int32_t incX,
136+
const double *Y, const int32_t incY)
137+
{
138+
return ddot_(&N, X, &incX, Y, &incY);
139+
}
140+
141+
extern double ddot_64_(const int64_t *,
142+
const double *, const int64_t *,
143+
const double *, const int64_t *);
144+
double lbt_cblas_ddot64_(const int64_t N,
145+
const double *X, const int64_t incX,
146+
const double *Y, const int64_t incY)
147+
{
148+
return ddot_64_(&N, X, &incX, Y, &incY);
149+
}

src/exported_funcs.inc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4986,11 +4986,13 @@
49864986

49874987
#endif
49884988

4989-
#ifndef CBLAS_SUB_FUNCS
4990-
#define CBLAS_SUB_FUNCS(XX) \
4989+
#ifndef CBLAS_WORKAROUND_FUNCS
4990+
#define CBLAS_WORKAROUND_FUNCS(XX) \
49914991
XX(cblas_cdotc_sub, 2547) \
49924992
XX(cblas_cdotu_sub, 2549) \
49934993
XX(cblas_zdotc_sub, 2695) \
49944994
XX(cblas_zdotu_sub, 2697) \
4995+
XX(cblas_ddot, 2588) \
4996+
XX(cblas_sdot, 2655) \
49954997

49964998
#endif

src/libblastrampoline_cblasdata.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#define XX(name, index) extern const void * lbt_##name ;
22
#define XX_64(name, index) extern const void * lbt_##name##64_ ;
3-
CBLAS_SUB_FUNCS(XX)
4-
CBLAS_SUB_FUNCS(XX_64)
3+
CBLAS_WORKAROUND_FUNCS(XX)
4+
CBLAS_WORKAROUND_FUNCS(XX_64)
55
#undef XX
66
#undef XX_64
77

@@ -11,11 +11,11 @@ CBLAS_SUB_FUNCS(XX_64)
1111
#define XX(name, index) &lbt_##name,
1212
#define XX_64(name, index) &lbt_##name##64_,
1313
const void ** cblas32_func_wrappers[] = {
14-
CBLAS_SUB_FUNCS(XX)
14+
CBLAS_WORKAROUND_FUNCS(XX)
1515
NULL
1616
};
1717
const void ** cblas64_func_wrappers[] = {
18-
CBLAS_SUB_FUNCS(XX_64)
18+
CBLAS_WORKAROUND_FUNCS(XX_64)
1919
NULL
2020
};
2121
#undef XX
@@ -24,7 +24,7 @@ const void ** cblas64_func_wrappers[] = {
2424
// Finally, an array that maps cblas index -> exported symbol index
2525
#define XX(name, index) index,
2626
const int cblas_func_idxs[] = {
27-
CBLAS_SUB_FUNCS(XX)
27+
CBLAS_WORKAROUND_FUNCS(XX)
2828
-1
2929
};
30-
#undef XX
30+
#undef XX

test/direct.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,5 +259,14 @@ if MKL_jll.is_available() && Sys.ARCH == :x86_64
259259
ccall(zdotu_fptr, Cvoid, (Int64, Ptr{ComplexF64}, Int64, Ptr{ComplexF64}, Int64, Ptr{ComplexF64}), 2, A, 1, B, 1, result)
260260
@test result[1] ComplexF64(1.47 + 3.83im)
261261
@test isempty(stacktraces)
262+
263+
# Also call `sdot_`, asserting the same.
264+
empty!(stacktraces)
265+
A = Float32[3.1, -1.0]
266+
B = Float32[1.3, -1.1]
267+
sdot_fptr = dlsym(lbt_handle, :cblas_sdot64_)
268+
result = ccall(sdot_fptr, Cfloat, (Int64, Ptr{Float32}, Int64, Ptr{Float32}, Int64), 2, A, 1, B, 1)
269+
@test result Float32(5.13)
270+
@test isempty(stacktraces)
262271
end
263272
end

0 commit comments

Comments
 (0)