Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
[workspace]
members = ["jemallocator", "jemallocator-global", "jemalloc-ctl", "jemalloc-sys"]
members = [
"jemallocator",
"jemallocator-global",
"jemalloc-ctl",
"jemalloc-sys",
"test-dylib",
]
15 changes: 13 additions & 2 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cargo test --target "${TARGET}" --features stats
cargo test --target "${TARGET}" --features 'debug profiling'

cargo test --target "${TARGET}" \
--features unprefixed_malloc_on_supported_platforms
--features override_allocator_on_supported_platforms
cargo test --target "${TARGET}" --no-default-features
cargo test --target "${TARGET}" --no-default-features \
--features background_threads_runtime_support
Expand All @@ -48,7 +48,7 @@ cargo test --target "${TARGET}" --release
cargo test --target "${TARGET}" --manifest-path jemalloc-sys/Cargo.toml
cargo test --target "${TARGET}" \
--manifest-path jemalloc-sys/Cargo.toml \
--features unprefixed_malloc_on_supported_platforms
--features override_allocator_on_supported_platforms

# FIXME: jemalloc-ctl fails in the following targets
case "${TARGET}" in
Expand Down Expand Up @@ -77,3 +77,14 @@ cargo test --target "${TARGET}" \
# # The Alloc trait is unstable:
# ${CARGO_CMD} test --target "${TARGET}" --features alloc_trait
# fi

# Test that overriding works in dylibs.
case "$TARGET" in
"i686-unknown-linux-musl") ;;
"x86_64-unknown-linux-musl") ;;
*)
cargo run --target "${TARGET}" \
-p test-dylib \
--features override_allocator_on_supported_platforms
;;
esac
1 change: 1 addition & 0 deletions jemalloc-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ background_threads_runtime_support = []
background_threads = [ "background_threads_runtime_support" ]
stats = []
unprefixed_malloc_on_supported_platforms = []
override_allocator_on_supported_platforms = [ "unprefixed_malloc_on_supported_platforms" ]
disable_initial_exec_tls = []
disable_cache_oblivious = []

Expand Down
47 changes: 32 additions & 15 deletions jemalloc-sys/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ This crate provides following cargo feature flags:
* `stats` (configure `jemalloc` with `--enable-stats`): Enable statistics
gathering functionality. See the `jemalloc`'s "`opt.stats_print`" option
documentation for usage details.

* `debug` (configure `jemalloc` with `--enable-debug`): Enable assertions and
validation code. This incurs a substantial performance hit, but is very useful
during application development.

* `background_threads_runtime_support` (enabled by default): enables
background-threads run-time support when building `jemalloc-sys` on some POSIX
targets supported by `jemalloc`. Background threads are disabled at run-time
Expand All @@ -72,16 +72,33 @@ This crate provides following cargo feature flags:
* `unprefixed_malloc_on_supported_platforms`: when disabled, configure
`jemalloc` with `--with-jemalloc-prefix=_rjem_`. Enabling this causes symbols
like `malloc` to be emitted without a prefix, overriding the ones defined by
libc. This usually causes C and C++ code linked in the same program to use
`jemalloc` as well. On some platforms prefixes are always used because
unprefixing is known to cause segfaults due to allocator mismatches.

libc. This usually causes C, Objective-C and C++ code linked in the same
program to use `jemalloc` as well. On some platforms prefixes are always used
because unprefixing is known to cause segfaults due to allocator mismatches.

* `override_allocator_on_supported_platforms`: override the system allocator,
even outside Rust code.

This enables the `unprefixed_malloc_on_supported_platforms` feature, with the
addition that it forces overriding the allocator even if `malloc` and `free`
would not usually have been seen by the linker. It also overrides the
allocator on Apple platforms.

Note that to use this, the `jemalloc-sys` crate must actually be visible to
`rustc` (it is not enough to only declare it in `Cargo.toml`). This can be
done by adding:
```rust
use jemalloc_sys as _;
```

In your `main.rs`.

* `disable_initial_exec_tls` (disabled by default): when enabled, jemalloc is
built with the `--disable-initial-exec-tls` option. It disables the
initial-exec TLS model for jemalloc's internal thread-local storage (on those
platforms that support explicit settings). This can allow jemalloc to be
built with the `--disable-initial-exec-tls` option. It disables the
initial-exec TLS model for jemalloc's internal thread-local storage (on those
platforms that support explicit settings). This can allow jemalloc to be
dynamically loaded after program startup (e.g. using dlopen). If you encounter
the error `yourlib.so: cannot allocate memory in static TLS block`, you'll
the error `yourlib.so: cannot allocate memory in static TLS block`, you'll
likely want to enable this.

* `disable_cache_oblivious` (disabled by default): when enabled, jemalloc is
Expand All @@ -104,7 +121,7 @@ hyphens `-` are replaced with underscores `_`(see
variable, the `/etc/malloc.conf` symlink, and the `MALLOC_CONF` environment
variable (note: this variable might be prefixed as `_RJEM_MALLOC_CONF`). For
example, to change the default decay time for dirty pages to 30 seconds:

```
JEMALLOC_SYS_WITH_MALLOC_CONF=dirty_decay_ms:30000
```
Expand All @@ -115,17 +132,17 @@ hyphens `-` are replaced with underscores `_`(see
allocator page size equal to the system page size, so this option need not be
specified unless the system page size may change between configuration and
execution, e.g. when cross compiling.

* `JEMALLOC_SYS_WITH_LG_HUGEPAGE=<lg-hugepage>`: Specify the base 2 log of the
system huge page size. This option is useful when cross compiling, or when
overriding the default for systems that do not explicitly support huge pages.


* `JEMALLOC_SYS_WITH_LG_QUANTUM=<lg-quantum>`: Specify the base 2 log of the
minimum allocation alignment. jemalloc needs to know the minimum alignment
that meets the following C standard requirement (quoted from the April 12,
2011 draft of the C11 standard):

> The pointer returned if the allocation succeeds is suitably aligned so that
> it may be assigned to a pointer to any type of object with a fundamental
> alignment requirement and then used to access such an object or an array of
Expand Down
14 changes: 10 additions & 4 deletions jemalloc-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,16 @@ fn main() {
.iter()
.any(|i| target.contains(i))
{
warning!(
"Unprefixed `malloc` requested on unsupported platform `{}` => using prefixed `malloc`",
target
);
// Apple targets don't support unprefixed, but they do support
// overriding (if you do the `zone_register` trick), so no need to
// warn there.
let override_ = env::var("CARGO_FEATURE_OVERRIDE_ALLOCATOR_ON_SUPPORTED_PLATFORMS").is_ok();
if !target.contains("apple") || !override_ {
warning!(
"Unprefixed `malloc` requested on unsupported platform `{}` => using prefixed `malloc`",
target
);
}
use_prefix = true;
}

Expand Down
2 changes: 1 addition & 1 deletion jemalloc-sys/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ pub static NO_BG_THREAD_TARGETS: &[&str] = &["musl"];
// https://github.com/rust-lang/rust/commit/e3b414d8612314e74e2b0ebde1ed5c6997d28e8d
// https://github.com/rust-lang/rust/commit/9f3de647326fbe50e0e283b9018ab7c41abccde3
// https://github.com/rust-lang/rust/commit/ed015456a114ae907a36af80c06f81ea93182a24
pub static NO_UNPREFIXED_MALLOC_TARGETS: &[&str] = &["android", "dragonfly", "darwin"];
pub static NO_UNPREFIXED_MALLOC_TARGETS: &[&str] = &["android", "dragonfly", "apple"];
83 changes: 83 additions & 0 deletions jemalloc-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,3 +890,86 @@ pub type extent_merge_t = unsafe extern "C" fn(
mod env;

pub use env::*;

// When using the `"override_allocator_on_supported_platforms"` feature flag,
// the user wants us to globally override the system allocator.
//
// However, since we build `jemalloc` as a static library (an archive), the
// linker may decide to not care about our overrides if it can't directly see
// references to the symbols, see the following link for details:
// <https://maskray.me/blog/2021-06-20-symbol-processing#archive-processing>
//
// This is problematic if `jemalloc_sys` is used from a library that by itself
// doesn't allocate, while invoking other shared libraries that do.
//
// Another especially problematic case would be something like the following:
//
// ```
// // Call `malloc` whose symbol is looked up statically.
// let ptr = libc::malloc(42);
//
// // But use a dynamically looked up `free`.
// let free = libc::dlsym(null_mut(), c"free".as_ptr());
// let free = transmute::<*mut c_void, unsafe extern "C" fn(*mut c_void)>(free);
// free(ptr);
// ```
//
// Since if the `malloc` and `free` provided by `jemalloc` end up in different
// object files in the archive (NOTE: In practice, this is unlikely to be an
// issue, since `jemalloc.c` contains all the implementations and is compiled
// as a single object file), the linker would think that only `malloc` was
// used, and would never load the `free` that we also want (and hence we'd end
// up executing jemalloc's `malloc` and the system's `free`, which is UB).
//
// To avoid this problem, we make sure that all the allocator functions are
// visible to the linker, such that it will always override all of them.
//
// We do this by referencing these symbols in `#[used]` statics, which makes
// them known to `rustc`, which will reference them in a `symbols.o` stub file
// that is later passed to the linker. See the following link for details on
// how this works:
// <https://github.com/rust-lang/rust/pull/95604>

#[cfg(all(
feature = "override_allocator_on_supported_platforms",
not(target_vendor = "apple")
))]
mod set_up_statics {
use super::*;

#[used]
static USED_MALLOC: unsafe extern "C" fn(usize) -> *mut c_void = malloc;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any references on how these statics are processed?

Copy link
Author

@madsmtm madsmtm Dec 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's the reference on #[used], and the equivalent Clang attribute and GCC attribute.

But these are somewhat vague, perhaps intentionally so as this is very much a linker concept? I don't really have a good reference on linkers, the best I can do is reference this piece of source code in rustc that talks about a workaround for static libs, and the following section from the manual page for ld64:

A static library (aka static archive) is a collection of .o files with a table of contents that lists the global symbols in the .o files. ld will only pull .o files out of a static library if needed to resolve some symbol reference. Unlike traditional linkers, ld will continually search a static library while linking.

(Note that Rust .rlibs are internally archives / static libraries, and so the rules for static libaries apply to them as well).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or if you have more specific questions about how things work then I can try to answer them, to the best of my ability?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. How about adding a test to show it work as expected? You can add a dylib crate that allocs in the root directory and then add a test crate that links both the dylib and jemalloc-sys. If it works as expected the test shoud be able to use jemalloc's free to dealloc the pointer from dylib.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay.

I found the closest thing to a reference link, and have rewritten the docs around it to hopefully be clearer.

I have also added two tests:

  1. malloc_and_libc_are_interoperable_when_overridden, which tests that the overriding actually works on macOS.
  2. test-dylib, which tests that when linking a dylib, the symbol is correctly overridden. Note that I couldn't reproduce it with the current nightly, so something might have changed recently that makes this hack redundant nowadays? Unsure, though it doesn't hurt to have in any case.

Failed CI run of the first commit with just the tests: https://github.com/madsmtm/jemallocator/actions/runs/15490515399
Successful CI run after the second commit: https://github.com/madsmtm/jemallocator/actions/runs/15490429305

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got the test-dylib test to not work without this PR, have pushed that as the first commit instead.

Failed CI run: https://github.com/madsmtm/jemallocator/actions/runs/17785024568
Succesful CI run: https://github.com/madsmtm/jemallocator/actions/runs/17784954361

#[used]
static USED_CALLOC: unsafe extern "C" fn(usize, usize) -> *mut c_void = calloc;
#[used]
static USED_POSIX_MEMALIGN: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int =
posix_memalign;
#[used]
static USED_ALIGNED_ALLOC: unsafe extern "C" fn(usize, usize) -> *mut c_void = aligned_alloc;
#[used]
static USED_REALLOC: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = realloc;
#[used]
static USED_FREE: unsafe extern "C" fn(*mut c_void) = free;
}

// On macOS, jemalloc doesn't directly override malloc/free, but instead
// registers itself with the allocator's zone APIs in a ctor (`zone_register`
// is marked with `__attribute__((constructor))`).
//
// Similarly to above though, for the Mach-O linker to actually consider ctors
// as "used" when defined in an archive member in a static library, so we need
// to explicitly reference the function via. Rust's `#[used]`.

#[cfg(all(
feature = "override_allocator_on_supported_platforms",
target_vendor = "apple"
))]
#[used]
static USED_ZONE_REGISTER: unsafe extern "C" fn() = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better make it a new feature so that we can land it without a new minor version.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue this is more in the category "bugfix" rather than "feature"; the unprefixed_malloc_on_supported_platforms just plain didn't work on macOS before (unless you inserted these statics yourself like rustc), and now it does.

Or do you fear this will have a chance of breaking something for users?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unprefixed_malloc_on_supported_platforms just plain didn't work on macOS before

It actually works if unprefixed_malloc_on_supported_platforms is interpreted as its name instead of overriding system allocator. The symbol is still unprefixed on MacOS in the past and future.

Or do you fear this will have a chance of breaking something for users?

Yes, it's known not to override system allocator on MacOS.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. In the last commit, I've changed it to:

unprefixed_malloc_on_supported_platforms = []
override = ["unprefixed_malloc_on_supported_platforms"]

I've also made the statics only get emitted with that feature enabled.

extern "C" {
#[cfg_attr(prefixed, link_name = "_rjem_je_zone_register")]
#[cfg_attr(not(prefixed), link_name = "je_zone_register")]
fn zone_register();
}
zone_register
};
13 changes: 13 additions & 0 deletions jemalloc-sys/tests/unprefixed_malloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,16 @@ fn malloc_is_prefixed() {
fn malloc_is_overridden() {
assert_eq!(tikv_jemalloc_sys::malloc as usize, libc::malloc as usize)
}

#[cfg(any(
not(prefixed),
all(
feature = "override_allocator_on_supported_platforms",
target_vendor = "apple"
),
))]
#[test]
fn malloc_and_libc_are_interoperable_when_overridden() {
let ptr = unsafe { tikv_jemalloc_sys::malloc(42) };
unsafe { libc::free(ptr) };
}
4 changes: 4 additions & 0 deletions jemallocator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ stats = ["tikv-jemalloc-sys/stats"]
background_threads_runtime_support = ["tikv-jemalloc-sys/background_threads_runtime_support"]
background_threads = ["tikv-jemalloc-sys/background_threads"]
unprefixed_malloc_on_supported_platforms = ["tikv-jemalloc-sys/unprefixed_malloc_on_supported_platforms"]
override_allocator_on_supported_platforms = [
"unprefixed_malloc_on_supported_platforms",
"tikv-jemalloc-sys/override_allocator_on_supported_platforms",
]
disable_initial_exec_tls = ["tikv-jemalloc-sys/disable_initial_exec_tls"]
disable_cache_oblivious = ["tikv-jemalloc-sys/disable_cache_oblivious"]

Expand Down
23 changes: 23 additions & 0 deletions test-dylib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "test-dylib"
version = "0.0.0"
license = "MIT OR Apache-2.0"
description = "A test helper for jemalloc-sys"
edition = "2018"
publish = false

[dependencies]
libc = { version = "^0.2.8", default-features = false }
tikv-jemalloc-sys = { path = "../jemalloc-sys" }

[build-dependencies]
cc = "^1.0.13"

[features]
override_allocator_on_supported_platforms = [
"tikv-jemalloc-sys/override_allocator_on_supported_platforms",
]

[[bin]]
name = "test-dylib"
test = false
29 changes: 29 additions & 0 deletions test-dylib/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//! Build shared library `dep.c`.
use std::{env, path::PathBuf};

fn main() {
println!("cargo:rerun-if-changed=src/dep.c");

let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());

// NOTE: Only for testing, extension is wrong when cross-compiling.
let dylib = out_dir.join(format!(
"{}dep{}",
env::consts::DLL_PREFIX,
env::consts::DLL_SUFFIX
));

let status = cc::Build::new()
.get_compiler()
.to_command()
.arg("src/dep.c")
.arg("-shared")
.arg("-o")
.arg(&dylib)
.status()
.unwrap();
assert!(status.success());

println!("cargo:rustc-link-lib=dylib=dep");
println!("cargo:rustc-link-search=native={}", out_dir.display());
}
21 changes: 21 additions & 0 deletions test-dylib/src/dep.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <dlfcn.h>

const char* dep_lookup_malloc_address(void) {
Dl_info info;
if (!dladdr((void *)malloc, &info)) {
printf("failed finding `malloc`\n");
abort();
}
return info.dli_fname;
}

void* dep_malloc(size_t size) {
return malloc(size);
}

void dep_free(void* ptr) {
free(ptr);
}
Loading