Skip to content

Commit 72e1f7d

Browse files
committed
Only do complex return style autodetection on non-Windows x86_64
Other architectures we don't care about as much (since we won't be using MKL on them) and I don't have time to figure out the assembly syntax to probe without smashing the stack on `i686`. Windows prefers argument retstyles, so just let it be.
1 parent 0f605c4 commit 72e1f7d

File tree

9 files changed

+54
-19
lines changed

9 files changed

+54
-19
lines changed

src/Make.inc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,15 @@ ifneq (,$(filter $(ARCH), x86_64 i686))
8989
CBLAS_DIVERGENCE_AUTODETECTION := 1
9090
endif
9191

92+
# If we're on x86_64 (and NOT on windows), we can do complex return style autodetection
93+
# We don't do this on Windows because Windows actually prefers arugment retstyle
94+
ifneq ($(OS),WINNT)
95+
ifneq (,$(filter $(ARCH), x86_64))
96+
LBT_CFLAGS += -DCOMPLEX_RETSTYLE_AUTODETECTION
97+
COMPLEX_RETSTYLE_AUTODETECTION := 1
98+
endif
99+
endif
100+
92101

93102
ifeq ($(VERBOSE),0)
94103
ENDCOLOR := "\033[0m"

src/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ all: $(builddir)/libblastrampoline.$(SHLIB_EXT)
77
# Objects we'll build
88
MAIN_OBJS := libblastrampoline.o dl_utils.o config.o \
99
autodetection.o \
10-
complex_return_style_adapters.o \
1110
threading.o deepbindless.o trampolines/trampolines_$(ARCH).o
1211

1312
# Include win_utils.c on windws
@@ -24,6 +23,10 @@ ifeq ($(CBLAS_DIVERGENCE_AUTODETECTION),1)
2423
MAIN_OBJS += cblas_adapters.o
2524
endif
2625

26+
ifeq ($(COMPLEX_RETSTYLE_AUTODETECTION),1)
27+
MAIN_OBJS += complex_return_style_adapters.o
28+
endif
29+
2730
# Place the `.o` files into `$(builddir)`
2831
MAIN_OBJS := $(addprefix $(builddir)/,$(MAIN_OBJS))
2932

src/autodetection.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
154154
return LBT_INTERFACE_UNKNOWN;
155155
}
156156

157+
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
157158
int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
158159
char symbol_name[MAX_SYMBOL_LEN];
159160

@@ -196,6 +197,7 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
196197
// If that was not reset either, we have no idea what's going on.
197198
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
198199
}
200+
#endif // COMPLEX_RETSTYLE_AUTODETECTION
199201

200202
#ifdef F2C_AUTODETECTION
201203
int32_t autodetect_f2c(void * handle, const char * suffix) {
@@ -232,7 +234,7 @@ int32_t autodetect_f2c(void * handle, const char * suffix) {
232234
// We have no idea what happened; nothing works and everything is broken
233235
return LBT_F2C_UNKNOWN;
234236
}
235-
#endif
237+
#endif // F2C_AUTODETECTION
236238

237239
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
238240
int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
@@ -261,4 +263,4 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
261263
// If we can't even find `zdotc_64`, we don't know what this is.
262264
return LBT_CBLAS_UNKNOWN;
263265
}
264-
#endif
266+
#endif // CBLAS_DIVERGENCE_AUTODETECTION

src/libblastrampoline.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#include "libblastrampoline_internal.h"
22
#include "libblastrampoline_trampdata.h"
3-
#include "libblastrampoline_complex_retdata.h"
43

4+
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
5+
#include "libblastrampoline_complex_retdata.h"
6+
#endif
57
#ifdef F2C_AUTODETECTION
68
#include "libblastrampoline_f2cdata.h"
79
#endif
@@ -67,6 +69,7 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
6769
}
6870
}
6971

72+
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
7073
if (complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT) {
7174
// Check to see if this symbol is one of the complex-returning functions
7275
for (int complex_symbol_idx=0; cmplxret_func_idxs[complex_symbol_idx] != -1; ++complex_symbol_idx) {
@@ -90,6 +93,7 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
9093
}
9194
}
9295
}
96+
#endif // COMPLEX_RETSTYLE_AUTODETECTION
9397

9498
#ifdef F2C_AUTODETECTION
9599
if (f2c == LBT_F2C_REQUIRED) {
@@ -216,7 +220,9 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
216220
}
217221

218222
// Next, let's figure out what the complex return style is:
219-
int complex_retstyle = autodetect_complex_return_style(handle, lib_suffix);
223+
int complex_retstyle = LBT_COMPLEX_RETSTYLE_UNKNOWN;
224+
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
225+
complex_retstyle = autodetect_complex_return_style(handle, lib_suffix);
220226
if (complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN) {
221227
fprintf(stderr, "Unable to autodetect complex return style of \"%s\"\n", libname);
222228
return 0;
@@ -229,6 +235,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
229235
printf(" -> Autodetected argument-passing complex return style\n");
230236
}
231237
}
238+
#endif // COMPLEX_RETSTYLE_AUTODETECTION
232239

233240
int f2c = LBT_F2C_PLAIN;
234241
#ifdef F2C_AUTODETECTION
@@ -247,7 +254,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
247254
printf(" -> Autodetected gfortran calling convention\n");
248255
}
249256
}
250-
#endif
257+
#endif // F2C_AUTODETECTION
251258

252259
int cblas = LBT_CBLAS_UNKNOWN;
253260
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
@@ -273,7 +280,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
273280
}
274281
}
275282
}
276-
#endif
283+
#endif // CBLAS_DIVERGENCE_AUTODETECTION
277284

278285
/*
279286
* Now, if we are opening a 64-bit library with 32-bit names (e.g. suffix == ""),
@@ -378,7 +385,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
378385
}
379386
}
380387
}
381-
#endif
388+
#endif // CBLAS_DIVERGENCE_AUTODETECTION
382389

383390
record_library_load(libname, handle, lib_suffix, &forwards[0], interface, complex_retstyle, f2c, cblas);
384391
if (verbose) {

src/libblastrampoline_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint);
8080
int32_t autodetect_blas_interface(void * isamax_addr);
8181
int32_t autodetect_lapack_interface(void * dpotrf_addr);
8282
int32_t autodetect_interface(void * handle, const char * suffix);
83+
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
8384
int32_t autodetect_complex_return_style(void * handle, const char * suffix);
85+
#endif
8486

8587
#ifdef F2C_AUTODETECTION
8688
int32_t autodetect_f2c(void * handle, const char * suffix);

test/direct.jl

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,18 @@ lbt_handle = dlopen("$(lbt_prefix)/$(binlib)/lib$(lbt_link_name).$(shlib_ext)",
7171
@test libs[1].f2c == LBT_F2C_PLAIN
7272
if Sys.ARCH == :x86_64
7373
@test libs[1].cblas == LBT_CBLAS_CONFORMANT
74+
if Sys.iswindows()
75+
# Technically, this should be "argument", but we disable complex return style
76+
# autodetection on windows since the default compilers seem to prefer argument
77+
# style, so we'd rather just not touch things on that platform.
78+
@test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN
79+
else
80+
@test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_NORMAL
81+
end
7482
else
7583
@test libs[1].cblas == LBT_CBLAS_UNKNOWN
84+
@test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN
7685
end
77-
@test libs[1].complex_retstyle == LBT_COMLPEX_RETSTYLE_NORMAL
7886

7987
@test bitfield_get(libs[1].active_forwards, dgemm_idx) != 0
8088

@@ -83,7 +91,6 @@ lbt_handle = dlopen("$(lbt_prefix)/$(binlib)/lib$(lbt_link_name).$(shlib_ext)",
8391
@test libs[2].suffix == ""
8492
@test libs[2].interface == LBT_INTERFACE_LP64
8593
@test libs[2].f2c == LBT_F2C_PLAIN
86-
@test libs[2].complex_retstyle == LBT_COMLPEX_RETSTYLE_NORMAL
8794

8895
# If OpenBLAS32 and OpenBLAS are the same interface (e.g. i686)
8996
# then libs[2].active_forwards should be all zero!
@@ -189,7 +196,7 @@ end
189196
@test length(self_traces) == 3
190197
end
191198

192-
if MKL_jll.is_available() && Sys.WORD_SIZE == 64
199+
if MKL_jll.is_available() && Sys.ARCH == :x86_64
193200
# Since MKL v2022, we can explicitly link against ILP64-suffixed symbols
194201
@testset "MKL v2022 ILP64 loading" begin
195202
# Load the ILP64 interface library. Remember, you must load the `core`
@@ -237,9 +244,13 @@ if MKL_jll.is_available() && Sys.WORD_SIZE == 64
237244
@test length(libs) == 1
238245
@test libs[1].interface == LBT_INTERFACE_ILP64
239246
@test libs[1].cblas == LBT_CBLAS_DIVERGENT
240-
@test libs[1].complex_retstyle == LBT_COMLPEX_RETSTYLE_ARGUMENT
247+
if Sys.iswindows()
248+
@test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN
249+
else
250+
@test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT
251+
end
241252

242-
# Call cblas_zdotu_sub, showcasing that it doesn't work
253+
# Call cblas_zdotu_sub, asserting that it does not try to call a forwardless-symbol
243254
empty!(stacktraces)
244255
A = ComplexF64[3.1 + 1.4im, -1.0 + 1.2im]
245256
B = ComplexF64[1.3 + 0.3im, -1.1 + -3.4im]

test/runtests.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ end
104104

105105
# Build version that links against vanilla OpenBLAS32
106106
@testset "Vanilla OpenBLAS32_jll (LP64)" begin
107-
run_all_tests("openblas", OpenBLAS32_jll.LIBPATH_list, :LP64, "")
107+
# Reverse OpenBLAS32_jll's LIBPATH_list so that we get the right openblas.so
108+
run_all_tests("openblas", reverse(OpenBLAS32_jll.LIBPATH_list), :LP64, "")
108109
end
109110

110111
# Next, build a version that links against `libblastrampoline`, and tell

test/utils.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ end
117117
const LBT_INTERFACE_LP64 = 32
118118
const LBT_INTERFACE_ILP64 = 64
119119
const LBT_F2C_PLAIN = 0
120-
const LBT_COMLPEX_RETSTYLE_NORMAL = 0
121-
const LBT_COMLPEX_RETSTYLE_ARGUMENT = 1
122-
const LBT_COMLPEX_RETSTYLE_UNKNOWN = -1
120+
const LBT_COMPLEX_RETSTYLE_NORMAL = 0
121+
const LBT_COMPLEX_RETSTYLE_ARGUMENT = 1
122+
const LBT_COMPLEX_RETSTYLE_UNKNOWN = -1
123123
const LBT_CBLAS_CONFORMANT = 0
124124
const LBT_CBLAS_DIVERGENT = 1
125125
const LBT_CBLAS_UNKNOWN = -1
@@ -152,7 +152,7 @@ function lbt_get_forward(handle, symbol_name, interface, f2c = LBT_F2C_PLAIN)
152152
return ccall(dlsym(handle, :lbt_get_forward), Ptr{Cvoid}, (Cstring, Int32, Int32), symbol_name, interface, f2c)
153153
end
154154

155-
function lbt_set_forward(handle, symbol_name, addr, interface, complex_retstyle = LBT_COMLPEX_RETSTYLE_NORMAL, f2c = LBT_F2C_PLAIN; verbose::Bool = false)
155+
function lbt_set_forward(handle, symbol_name, addr, interface, complex_retstyle = LBT_COMPLEX_RETSTYLE_NORMAL, f2c = LBT_F2C_PLAIN; verbose::Bool = false)
156156
return ccall(dlsym(handle, :lbt_set_forward), Int32, (Cstring, Ptr{Cvoid}, Int32, Int32, Int32, Int32), symbol_name, addr, interface, complex_retstyle, f2c, verbose ? 1 : 0)
157157
end
158158

test/zdotc_test/zdotc_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ typedef int64_t blasint;
1010
typedef int32_t blasint;
1111
#endif
1212

13-
extern double complex MANGLE(cblas_zdotc_sub)(blasint, double complex *, blasint, double complex *, blasint, double complex *);
13+
extern void MANGLE(cblas_zdotc_sub)(blasint, double complex *, blasint, double complex *, blasint, double complex *);
1414
extern double complex MANGLE(zdotc_)(blasint *, double complex *, blasint *, double complex *, blasint *);
1515

1616
#define N 2

0 commit comments

Comments
 (0)