|
| 1 | +## JLLWrappers musl SONAME workaround |
| 2 | +# |
| 3 | +# The problem is detailed in this thread [0], but in short: |
| 4 | +# |
| 5 | +# JLLs rely on a specific behavior of most `dlopen()` implementations; that if |
| 6 | +# a library with the same SONAME will not be loaded twice; e.g. if you first |
| 7 | +# load `/a/libfoo.so`, loading `/b/libbar.so` which declares a dependency on |
| 8 | +# `libfoo.so` will find the previously-loaded `libfoo.so` without needing to |
| 9 | +# search because the SONAME `libbar.so` looks for matches the SONAME of the |
| 10 | +# previously-loaded `libfoo.so`. This allows JLLs to store libraries all over |
| 11 | +# the place, and directly `dlopen()` all dependencies before any dependents |
| 12 | +# would trigger a system-wide search. |
| 13 | +# |
| 14 | +# Musl does not do this. They do have a mechanism for skipping the directory |
| 15 | +# search, but it is only invoked when loading a library without specifying |
| 16 | +# the full path [1]. This means that when checking for dependencies, musl |
| 17 | +# skips all libraries that were loaded by full path [2]. All that needs to |
| 18 | +# happen is that musl needs to record the `shortname` (e.g. SONAME) of all |
| 19 | +# libraries, but sadly there's no way to do that if we also want to specify |
| 20 | +# the library unambiguously [3,2]. Manipulating the environment to allow for |
| 21 | +# non-fully-specified searches to work (e.g. changing `LD_LIBRARY_PATH` then |
| 22 | +# invoking `dlopen("libfoo.so")`) won't work, as the environment is only read |
| 23 | +# at process initialization. We are therefore backed into a corner and must |
| 24 | +# resort to heroic measures: manually inserting an appropriate `shortname`. |
| 25 | +# |
| 26 | +# [0] https://github.com/JuliaLang/julia/issues/40556 |
| 27 | +# [1] https://github.com/ifduyue/musl/blob/aad50fcd791e009961621ddfbe3d4c245fd689a3/ldso/dynlink.c#L1163-L1164 |
| 28 | +# [2] https://github.com/ifduyue/musl/blob/aad50fcd791e009961621ddfbe3d4c245fd689a3/ldso/dynlink.c#L1047-L1052 |
| 29 | +# [3] https://github.com/ifduyue/musl/blob/aad50fcd791e009961621ddfbe3d4c245fd689a3/ldso/dynlink.c#L1043-L1044 |
| 30 | + |
| 31 | + |
| 32 | +# Use this to ensure the GC doesn't clean up values we insert into musl. |
| 33 | +manual_gc_roots = String[] |
| 34 | + |
| 35 | +## We define these structures so that Julia's internal struct padding logic |
| 36 | +## can do some arithmetic for us, instead of us needing to do manual offset |
| 37 | +## calculation ourselves, which is more error-prone. |
| 38 | + |
| 39 | + |
| 40 | +# This structure taken from `libc.h` |
| 41 | +# https://github.com/ifduyue/musl/blob/aad50fcd791e009961621ddfbe3d4c245fd689a3/src/internal/libc.h#L14-L18 |
| 42 | +struct musl_tls_module |
| 43 | + next::Ptr{musl_tls_module} |
| 44 | + image::Ptr{musl_tls_module} |
| 45 | + len::Csize_t |
| 46 | + size::Csize_t |
| 47 | + align::Csize_t |
| 48 | + offset::Csize_t |
| 49 | +end |
| 50 | + |
| 51 | +# This structure taken from `ldso/dynlink.c` |
| 52 | +# https://github.com/ifduyue/musl/blob/aad50fcd791e009961621ddfbe3d4c245fd689a3/ldso/dynlink.c#L53-L107 |
| 53 | +struct musl_dso |
| 54 | + # Things we find mildly interesting |
| 55 | + base::Ptr{Cvoid} |
| 56 | + name::Ptr{UInt8} |
| 57 | + |
| 58 | + # The wasteland of things we don't care about |
| 59 | + dynv::Ptr{Csize_t} |
| 60 | + next::Ptr{musl_dso} |
| 61 | + prev::Ptr{musl_dso} |
| 62 | + |
| 63 | + phdr::Ptr{Cvoid} |
| 64 | + phnum::Cint |
| 65 | + phentsize::Csize_t |
| 66 | + |
| 67 | + syms::Ptr{Cvoid} |
| 68 | + hashtab::Ptr{Cvoid} |
| 69 | + ghashtab::Ptr{Cvoid} |
| 70 | + versym::Ptr{Int16} |
| 71 | + strings::Ptr{UInt8} |
| 72 | + syms_next::Ptr{musl_dso} |
| 73 | + lazy_next::Ptr{musl_dso} |
| 74 | + lazy::Ptr{Csize_t} |
| 75 | + lazy_cnt::Csize_t |
| 76 | + |
| 77 | + map::Ptr{Cuchar} |
| 78 | + map_len::Csize_t |
| 79 | + |
| 80 | + # We assume that dev_t and ino_t are always `uint64_t`, even on 32-bit systems. |
| 81 | + dev::UInt64 |
| 82 | + ino::UInt64 |
| 83 | + relocated::Cchar |
| 84 | + constructed::Cchar |
| 85 | + kernel_mapped::Cchar |
| 86 | + mark::Cchar |
| 87 | + bfs_built::Cchar |
| 88 | + runtime_loaded::Cchar |
| 89 | + # NOTE: struct layout rules should insert two bytes of space here |
| 90 | + deps::Ptr{Ptr{musl_dso}} |
| 91 | + needed_by::Ptr{musl_dso} |
| 92 | + ndeps_direct::Csize_t |
| 93 | + next_dep::Csize_t |
| 94 | + ctor_visitor::Cint |
| 95 | + rpath_orig::Ptr{UInt8} |
| 96 | + rpath::Ptr{UInt8} |
| 97 | + |
| 98 | + tls::musl_tls_module |
| 99 | + tls_id::Csize_t |
| 100 | + relro_start::Csize_t |
| 101 | + relro_end::Csize_t |
| 102 | + new_dtv::Ptr{Ptr{Cuint}} |
| 103 | + new_tls::Ptr{UInt8} |
| 104 | + td_index::Ptr{Cvoid} |
| 105 | + fini_next::Ptr{musl_dso} |
| 106 | + |
| 107 | + # Finally! The field we're interested in! |
| 108 | + shortname::Ptr{UInt8} |
| 109 | + |
| 110 | + # We'll put this stuff at the end because it might be interesting to someone somewhere |
| 111 | + loadmap::Ptr{Cvoid} |
| 112 | + funcdesc::Ptr{Cvoid} |
| 113 | + got::Ptr{Csize_t} |
| 114 | +end |
| 115 | + |
| 116 | +function replace_musl_shortname(lib_handle::Ptr{Cvoid}) |
| 117 | + # First, find the "name", which should be the absolute path to the library: |
| 118 | + lib_path = dlpath(lib_handle) |
| 119 | + |
| 120 | + # The first element in the structure should be a pointer, then after that |
| 121 | + # should be a pointer to the path of this library: |
| 122 | + dso = unsafe_load(Ptr{musl_dso}(lib_handle)) |
| 123 | + if abspath(unsafe_string(dso.name)) != abspath(lib_path) |
| 124 | + @debug("Unable to synchronize to DSO structure", name=abspath(unsafe_string(dso.name)), path=abspath(lib_path)) |
| 125 | + return lib_handle |
| 126 | + end |
| 127 | + |
| 128 | + # If the shortname is not null, break out |
| 129 | + if dso.shortname != C_NULL |
| 130 | + @debug("shortname != NULL!", ptr=shortname_ptr, value=unsafe_string(shortname_ptr)) |
| 131 | + return lib_handle |
| 132 | + end |
| 133 | + |
| 134 | + # Calculate the offset of `shortname` from the base pointer of the DSO object |
| 135 | + shortname_offset = fieldoffset(musl_dso, findfirst(fieldnames(musl_dso) .== :shortname)) |
| 136 | + |
| 137 | + # Replace the shortname with the basename of lib_path. Note that, in general, this |
| 138 | + # should be the SONAME, but not always. If we wanted to be pedantic, we should |
| 139 | + # actually parse out the SONAME of this object. But we don't want to be. |
| 140 | + new_shortname = basename(lib_path) |
| 141 | + push!(manual_gc_roots, new_shortname) |
| 142 | + unsafe_store!(Ptr{Ptr{UInt8}}(lib_handle + shortname_offset), pointer(new_shortname)) |
| 143 | + return lib_handle |
| 144 | +end |
0 commit comments