Skip to content

Commit 6b1398d

Browse files
committed
Add support for ILP64 Accelerate
The new Accelerate released in macOS v13.3 provides two new interfaces; an upgraded LAPACK for the LP64 interface, and an all-new ILP64 interface (that uses the same upgraded LAPACK). These symbols are available from Accelerate with the suffix `$NEWLAPACK` and `$NEWLAPACK$ILP64`, respectively. Unfortunately, this is not a "true" suffix, as Apple has decided to drop the trailing underscore from the typical F77 names, meaning that a symbol such as `dgemm_` gets mangled to `dgemm$NEWLAPACK`, whereas a CBLAS symbol such as `cblas_zdotc_sub` gets mangled to `cblas_zdotc_sub$NEWLAPACK`. This means that we need to selectively erase the trailing underscore from some symbols when applying this Accelerate suffix. To do this, we add a new feature, enabled by default only on Apple builds, called `SYMBOL_TRIMMING`, which allows a `suffix_hint` to contain the ASCII "substitution character" `0x1a` as the first character of the suffix hint to mean "remove a trailing underscore when applying this suffix". To make dealing with suffix hints easier for command-line users, these suffix hints are available for use in `LBT_BACKING_LIBS` by listing libraries separated by suffix hints with an exclamation point, e.g. `libname!suffix`.
1 parent 031e1a8 commit 6b1398d

File tree

9 files changed

+134
-26
lines changed

9 files changed

+134
-26
lines changed

src/Make.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ ifneq (,$(filter $(ARCH), x86_64 aarch64))
9898
COMPLEX_RETSTYLE_AUTODETECTION := 1
9999
endif
100100

101+
# If we're on an apple platform, we need to support symbol name trimming
102+
ifeq ($(OS), Darwin)
103+
LBT_CFLAGS += -DSYMBOL_TRIMMING
104+
SYMBOL_TRIMMING := 1
105+
endif
101106

102107
ifeq ($(VERBOSE),0)
103108
ENDCOLOR := "\033[0m"

src/autodetection.c

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,30 @@
11
#include "libblastrampoline_internal.h"
22
#include <complex.h>
33

4+
/*
5+
* Some vendors (such as Accelerate) decided to trim off the trailing underscore
6+
* from the F77 symbol names when adding their ILP64 symbol names. So the
7+
* symbol name `dgemm_` turns into `dgemm$NEWLAPACK$ILP64`. But of course,
8+
* symbol names like `cblas_sdot` turn into `cblas_sdot$NEWLAPACK$ILP64`.
9+
*
10+
* So we need a way to selectively trim off the trailing underscore. We do so
11+
* by shoving an ASCII "substitute character" onto the start of `$NEWLAPACK`,
12+
*/
13+
void build_symbol_name(char * out, const char *symbol_name, const char *suffix) {
14+
size_t symbol_len = strlen(symbol_name);
15+
#if defined(SYMBOL_TRIMMING)
16+
if (suffix[0] == '\x1a') {
17+
if (symbol_name[symbol_len-1] == '_') {
18+
symbol_len -= 1;
19+
}
20+
suffix += 1;
21+
}
22+
#endif
23+
24+
strncpy(out, symbol_name, MAX_SYMBOL_LEN);
25+
strncpy(out + symbol_len, suffix, MAX_SYMBOL_LEN - symbol_len);
26+
}
27+
428
/*
529
* Search for a symbol that ends in one of the given suffixes. Returns NULL if not found.
630
*/
@@ -12,7 +36,7 @@ const char * symbol_suffix_search(void * handle, const char * symbol_name, const
1236
continue;
1337
}
1438

15-
sprintf(symbol_name_suffixed, "%s%s", symbol_name, suffixes[suffix_idx]);
39+
build_symbol_name(symbol_name_suffixed, symbol_name, suffixes[suffix_idx]);
1640
if (lookup_symbol(handle, symbol_name_suffixed) != NULL) {
1741
return suffixes[suffix_idx];
1842
}
@@ -38,13 +62,25 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint) {
3862
// Possibly-NULL suffix that we should search over
3963
suffix_hint,
4064

41-
// First, search for LP64-mangling suffixes, so that when we are loading MKL from a
65+
// First, search for LP64-mangling suffixes, so that when we are loading libs from an
4266
// CLI environment, (where suffix hints are not easy) we want to give the most stable
4367
// configuration by default.
68+
#if defined(_OS_DARWIN_) && defined(SYMBOL_TRIMMING)
69+
// Apple Accelerate has an updated LAPACK interface, default to that.
70+
// Note that we are making use of our symbol trimming support here to eliminate
71+
// the F77 trailing underscore by starting the string with `\x1a`.
72+
"\x1a$NEWLAPACK",
73+
#endif
4474
"", "_", "__",
4575

76+
// Next, ILP64-mangling suffixes
77+
#if defined(_OS_DARWIN_) && defined(SYMBOL_TRIMMING)
78+
// Once again, search for Accelerate's non-pure-suffixed names
79+
"\x1a$NEWLAPACK$ILP64",
80+
#endif
81+
4682
// Next, search for ILP64-mangling suffixes
47-
"64", "64_", "_64__", "__64___",
83+
"64", "_64", "64_", "_64_", "_64__", "__64___",
4884
};
4985

5086
// If the suffix hint is NULL, just skip it when calling `lookup_symbol()`.
@@ -143,14 +179,14 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
143179
char symbol_name[MAX_SYMBOL_LEN];
144180

145181
// Attempt BLAS `isamax()` test
146-
sprintf(symbol_name, "isamax_%s", suffix);
182+
build_symbol_name(symbol_name, "isamax_", suffix);
147183
void * isamax = lookup_symbol(handle, symbol_name);
148184
if (isamax != NULL) {
149185
return autodetect_blas_interface(isamax);
150186
}
151187

152188
// Attempt LAPACK `dpotrf()` test
153-
sprintf(symbol_name, "dpotrf_%s", suffix);
189+
build_symbol_name(symbol_name, "dpotrf_", suffix);
154190
void * dpotrf = lookup_symbol(handle, symbol_name);
155191
if (dpotrf != NULL) {
156192
return autodetect_lapack_interface(dpotrf);
@@ -164,7 +200,7 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
164200
int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
165201
char symbol_name[MAX_SYMBOL_LEN];
166202

167-
sprintf(symbol_name, "zdotc_%s", suffix);
203+
build_symbol_name(symbol_name, "zdotc_", suffix);
168204
void * zdotc_addr = lookup_symbol(handle, symbol_name);
169205
if (zdotc_addr == NULL) {
170206
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
@@ -209,8 +245,8 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
209245
int32_t autodetect_f2c(void * handle, const char * suffix) {
210246
char symbol_name[MAX_SYMBOL_LEN];
211247

212-
// Attempt BLAS `sdot_()` test
213-
sprintf(symbol_name, "sdot_%s", suffix);
248+
// Attempt BLAS `sdot()` test
249+
build_symbol_name(symbol_name, "sdot_", suffix);
214250
void * sdot_addr = lookup_symbol(handle, symbol_name);
215251
if (sdot_addr == NULL) {
216252
return LBT_F2C_UNKNOWN;
@@ -246,10 +282,10 @@ int32_t autodetect_f2c(void * handle, const char * suffix) {
246282
int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
247283
char symbol_name[MAX_SYMBOL_LEN];
248284

249-
sprintf(symbol_name, "zdotc_%s", suffix);
285+
build_symbol_name(symbol_name, "zdotc_", suffix);
250286
if (lookup_symbol(handle, symbol_name) != NULL ) {
251287
// If we have both `zdotc_64` and `cblas_zdotc_sub64`, it's all good:
252-
sprintf(symbol_name, "cblas_zdotc_sub%s", suffix);
288+
build_symbol_name(symbol_name, "cblas_zdotc_sub", suffix);
253289
if (lookup_symbol(handle, symbol_name) != NULL ) {
254290
return LBT_CBLAS_CONFORMANT;
255291
}

src/config.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ LBT_DLLEXPORT const lbt_config_t * lbt_get_config() {
2121
lbt_config.build_flags |= LBT_BUILDFLAGS_F2C_CAPABLE;
2222
#endif
2323

24+
#if defined(CBLAS_DIVERGENCE_AUTODETECTION)
25+
lbt_config.build_flags |= LBT_BUILDFLAGS_CBLAS_DIVERGENCE;
26+
#endif
27+
28+
#if defined(COMPLEX_RETSTYLE_AUTODETECTION)
29+
lbt_config.build_flags |= LBT_BUILDFLAGS_COMPLEX_RETSTYLE;
30+
#endif
31+
32+
#if defined(SYMBOL_TRIMMING)
33+
lbt_config.build_flags |= LBT_BUILDFLAGS_SYMBOL_TRIMMING;
34+
#endif
35+
2436
lbt_config.exported_symbols = (const char **)&exported_func_names[0];
2537
lbt_config.num_exported_symbols = NUM_EXPORTED_FUNCS;
2638

src/libblastrampoline.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
8080
// Report to the user that we're cblas-wrapping this one
8181
if (verbose) {
8282
char exported_name[MAX_SYMBOL_LEN];
83-
sprintf(exported_name, "%s%s", exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
83+
build_symbol_name(exported_name, exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
8484
printf(" - [%04d] complex(%s)\n", symbol_idx, exported_name);
8585
}
8686

@@ -106,7 +106,7 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
106106

107107
if (verbose) {
108108
char exported_name[MAX_SYMBOL_LEN];
109-
sprintf(exported_name, "%s%s", exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
109+
build_symbol_name(exported_name, exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
110110
printf(" - [%04d] f2c(%s)\n", symbol_idx, exported_name);
111111
}
112112

@@ -182,7 +182,7 @@ LBT_DLLEXPORT int32_t lbt_set_forward(const char * symbol_name, const void * add
182182
// Load `libname`, clearing previous mappings if `clear` is set.
183183
LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t verbose, const char * suffix_hint) {
184184
if (verbose) {
185-
printf("Generating forwards to %s\n", libname);
185+
printf("Generating forwards to %s (clear: %d, verbose: %d, suffix_hint: '%s')\n", libname, clear, verbose, suffix_hint);
186186
}
187187

188188
// Load the library, throwing an error if we can't actually load it
@@ -353,7 +353,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
353353
}
354354

355355
// Look up this symbol in the given library, if it is a valid symbol, set it!
356-
sprintf(symbol_name, "%s%s", exported_func_names[symbol_idx], lib_suffix);
356+
build_symbol_name(symbol_name, exported_func_names[symbol_idx], lib_suffix);
357357
void *addr = lookup_symbol(handle, symbol_name);
358358
if (addr != NULL) {
359359
set_forward_by_index(symbol_idx, addr, interface, complex_retstyle, f2c, verbose);
@@ -374,7 +374,7 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
374374
// Report to the user that we're cblas-wrapping this one
375375
if (verbose) {
376376
char exported_name[MAX_SYMBOL_LEN];
377-
sprintf(exported_name, "%s%s", exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
377+
build_symbol_name(exported_name, exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : "");
378378
printf(" - [%04d] cblas(%s)\n", symbol_idx, exported_name);
379379
}
380380

@@ -443,7 +443,10 @@ __attribute__((constructor)) void init(void) {
443443
default_func = lookup_self_symbol("lbt_default_func_print_error_and_exit");
444444
}
445445

446-
// LBT_DEFAULT_LIBS is a semicolon-separated list of paths that should be loaded as BLAS libraries
446+
// LBT_DEFAULT_LIBS is a semicolon-separated list of paths that should be loaded as BLAS libraries.
447+
// Each library can have a `!suffix` tacked onto the end of it, providing a library-specific
448+
// suffix_hint. Example:
449+
// export LBT_DEFAULT_LIBS="libopenblas64.so!64_;/tmp/libfoo.so;/tmp/libbar.so!fastmath32"
447450
const char * default_libs = getenv("LBT_DEFAULT_LIBS");
448451
#if defined(LBT_FALLBACK_LIBS)
449452
if (default_libs == NULL) {
@@ -454,22 +457,41 @@ __attribute__((constructor)) void init(void) {
454457
const char * curr_lib_start = default_libs;
455458
int clear = 1;
456459
char curr_lib[PATH_MAX];
460+
char suffix_buffer[MAX_SYMBOL_LEN];
457461
while (curr_lib_start[0] != '\0') {
458462
// Find the end of this current library name
459463
const char * end = curr_lib_start;
460-
while (*end != ';' && *end != '\0')
464+
const char * suffix_sep = NULL;
465+
while (*end != ';' && *end != '\0') {
466+
if (*end == '!' && suffix_sep == NULL) {
467+
suffix_sep = end;
468+
}
461469
end++;
470+
}
471+
const char * curr_lib_end = end;
472+
if (suffix_sep != NULL) {
473+
curr_lib_end = suffix_sep;
474+
}
462475

463-
// Copy it into a temporary location
464-
int len = end - curr_lib_start;
476+
// Figure out if there's an embedded suffix_hint:
477+
const char * suffix_hint = NULL;
478+
if (suffix_sep != NULL) {
479+
int len = end - (suffix_sep + 1);
480+
memcpy(suffix_buffer, suffix_sep+1, len);
481+
suffix_buffer[len] = '\0';
482+
suffix_hint = &suffix_buffer[0];
483+
}
484+
485+
int len = curr_lib_end - curr_lib_start;
465486
memcpy(curr_lib, curr_lib_start, len);
466487
curr_lib[len] = '\0';
467488
curr_lib_start = end;
489+
468490
while (curr_lib_start[0] == ';')
469491
curr_lib_start++;
470492

471493
// Load functions from this library, clearing only the first time.
472-
lbt_forward(curr_lib, clear, verbose, NULL);
494+
lbt_forward(curr_lib, clear, verbose, suffix_hint);
473495
clear = 0;
474496
}
475497
}

src/libblastrampoline.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,11 @@ typedef struct {
111111
} lbt_config_t;
112112

113113
// Possible values for `build_flags` in `lbt_config_t`
114-
#define LBT_BUILDFLAGS_DEEPBINDLESS 0x01
115-
#define LBT_BUILDFLAGS_F2C_CAPABLE 0x02
114+
#define LBT_BUILDFLAGS_DEEPBINDLESS 0x01
115+
#define LBT_BUILDFLAGS_F2C_CAPABLE 0x02
116+
#define LBT_BUILDFLAGS_CBLAS_DIVERGENCE 0x04
117+
#define LBT_BUILDFLAGS_COMPLEX_RETSTYLE 0x08
118+
#define LBT_BUILDFLAGS_SYMBOL_TRIMMING 0x10
116119

117120
/*
118121
* Load the given `libname`, lookup all registered symbols within our configured list of exported

src/libblastrampoline_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ const char * lookup_self_path();
7676
void close_library(void * handle);
7777

7878
// Functions in `autodetection.c`
79+
void build_symbol_name(char * out, const char *symbol_name, const char *suffix);
7980
const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint);
8081
int32_t autodetect_blas_interface(void * isamax_addr);
8182
int32_t autodetect_lapack_interface(void * dpotrf_addr);

src/threading.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ LBT_DLLEXPORT int32_t lbt_get_num_threads() {
5656
lbt_library_info_t * lib = config->loaded_libs[lib_idx];
5757
for (int symbol_idx=0; getter_names[symbol_idx] != NULL; ++symbol_idx) {
5858
char symbol_name[MAX_SYMBOL_LEN];
59-
sprintf(symbol_name, "%s%s", getter_names[symbol_idx], lib->suffix);
59+
build_symbol_name(symbol_name, getter_names[symbol_idx], lib->suffix);
6060
int (*fptr)() = lookup_symbol(lib->handle, symbol_name);
6161
if (fptr != NULL) {
6262
int new_threads = fptr();
@@ -78,7 +78,7 @@ LBT_DLLEXPORT void lbt_set_num_threads(int32_t nthreads) {
7878
lbt_library_info_t * lib = config->loaded_libs[lib_idx];
7979
for (int symbol_idx=0; setter_names[symbol_idx] != NULL; ++symbol_idx) {
8080
char symbol_name[MAX_SYMBOL_LEN];
81-
sprintf(symbol_name, "%s%s", setter_names[symbol_idx], lib->suffix);
81+
build_symbol_name(symbol_name, setter_names[symbol_idx], lib->suffix);
8282
void (*fptr)(int) = lookup_symbol(lib->handle, symbol_name);
8383
if (fptr != NULL) {
8484
fptr(nthreads);

test/runtests.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,19 @@ if dlopen_e(veclib_blas_path) != C_NULL
170170
@testset "LBT -> vecLib/libLAPACK" begin
171171
run_all_tests(blastrampoline_link_name(), [lbt_dir], :LP64, string(veclib_blas_path, ";", veclib_lapack_path), tests=[dgemm, sgesv, sdot, zdotc])
172172
end
173+
174+
veclib_lapack_handle = dlopen(veclib_lapack_path)
175+
if dlsym_e(veclib_lapack_handle, "dpotrf\$NEWLAPACK\$ILP64") != C_NULL
176+
@testset "LBT -> vecLib/libBLAS (ILP64)" begin
177+
veclib_blas_path_ilp64 = "$(veclib_blas_path)!\x1a\$NEWLAPACK\$ILP64"
178+
run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_blas_path_ilp64; tests=[dgemm, sdot, zdotc])
179+
end
180+
181+
@testset "LBT -> vecLib/libLAPACK (ILP64)" begin
182+
veclib_lapack_path_ilp64 = "$(veclib_lapack_path)!\x1a\$NEWLAPACK\$ILP64"
183+
run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_lapack_path_ilp64; tests=[dgemm, sgesv, sdot, zdotc])
184+
end
185+
end
173186
end
174187

175188

test/utils.jl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,31 @@ function append_libpath(paths::Vector{<:AbstractString})
2222
return join(vcat(paths..., get(ENV, LIBPATH_env, String[])), pathsep)
2323
end
2424

25-
function capture_output(cmd::Cmd; verbose::Bool = false)
25+
function capture_output(cmd::Cmd; verbose::Bool = false, timeout = 10.0)
2626
out_pipe = Pipe()
2727
if verbose
2828
ld_env = filter(e -> startswith(e, "LBT_") || startswith(e, "LD_") || startswith(e, "DYLD_"), something(cmd.env, String[]))
2929
@info("Running $(basename(cmd.exec[1]))", ld_env)
3030
end
3131
p = run(pipeline(ignorestatus(cmd), stdout=out_pipe, stderr=out_pipe), wait=false)
3232
close(out_pipe.in)
33-
output = @async read(out_pipe, String)
33+
output = @async begin
34+
lines = String[]
35+
for line in readlines(out_pipe)
36+
if verbose
37+
println(line)
38+
end
39+
push!(lines, line)
40+
end
41+
return join(lines, "\n")
42+
end
43+
@async begin
44+
sleep(timeout)
45+
if process_running(p)
46+
@warn("$(basename(cmd.exec[1])) timeout exceeded!")
47+
kill(p)
48+
end
49+
end
3450
wait(p)
3551
return p, fetch(output)
3652
end

0 commit comments

Comments
 (0)