Skip to content

Commit 2756675

Browse files
authored
Merge branch 'main' into schrodinger/prepare-release-nits
2 parents 925ac24 + 4a169bf commit 2756675

File tree

7 files changed

+375
-50
lines changed

7 files changed

+375
-50
lines changed

examples/bench_contention.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use std::sync::mpsc::channel;
2+
use std::thread;
3+
use std::time::Instant;
4+
use std::alloc::Layout;
5+
6+
#[global_allocator]
7+
static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
8+
9+
const BLOCK_SIZE: usize = 64;
10+
const ITERATIONS: usize = 1_000_000;
11+
12+
struct Ptr(*mut u8);
13+
unsafe impl Send for Ptr {}
14+
15+
fn main() {
16+
let thread_count = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4);
17+
println!("Running contention benchmark with {} threads, {} iterations per thread", thread_count, ITERATIONS);
18+
19+
// Use std::sync::Barrier
20+
let barrier = std::sync::Arc::new(std::sync::Barrier::new(thread_count + 1));
21+
22+
let mut senders = Vec::new();
23+
let mut receivers = Vec::new();
24+
25+
// Create a ring topology channels
26+
for _ in 0..thread_count {
27+
let (tx, rx) = channel::<Ptr>();
28+
senders.push(tx);
29+
receivers.push(Some(rx));
30+
}
31+
32+
let mut handles = Vec::new();
33+
34+
// Start timing from here, but actual work starts after barrier
35+
let _start = Instant::now();
36+
37+
for i in 0..thread_count {
38+
let barrier = barrier.clone();
39+
// Thread i sends to (i + 1) % N
40+
let tx = senders[(i + 1) % thread_count].clone();
41+
// Thread i receives from i
42+
let rx = receivers[i].take().unwrap();
43+
44+
handles.push(thread::spawn(move || {
45+
// Pre-allocate some items to fill the pipe
46+
let layout = Layout::from_size_align(BLOCK_SIZE, 8).unwrap();
47+
48+
barrier.wait(); // Synchronize start
49+
50+
for _ in 0..ITERATIONS {
51+
// 1. Allocate a new block
52+
let ptr = unsafe { std::alloc::alloc(layout) };
53+
54+
// 2. Send to next neighbor (who will free it)
55+
tx.send(Ptr(ptr)).unwrap();
56+
57+
// 3. Receive from prev neighbor (who allocated it)
58+
let received = rx.recv().unwrap();
59+
60+
// 4. Free the received block
61+
unsafe { std::alloc::dealloc(received.0, layout) };
62+
}
63+
}));
64+
}
65+
66+
barrier.wait(); // Start timing
67+
let loop_start = Instant::now();
68+
69+
for h in handles {
70+
h.join().unwrap();
71+
}
72+
73+
let duration = loop_start.elapsed();
74+
println!("Benchmark completed in {:.2?}", duration);
75+
println!("Throughput: {:.2} Mops/sec", (thread_count * ITERATIONS) as f64 / duration.as_secs_f64() / 1_000_000.0);
76+
}

snmalloc-rs/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ readme = "README.md"
1313
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1414

1515
[workspace]
16-
members = ["snmalloc-sys"]
16+
members = ["snmalloc-sys", "xtask"]
1717

1818
[dependencies]
1919
snmalloc-sys = { version = "0.7.4", path = "snmalloc-sys", default-features = false }
@@ -34,3 +34,9 @@ notls = ["snmalloc-sys/notls"]
3434
stats = ["snmalloc-sys/stats"]
3535
usewait-on-address = ["snmalloc-sys/usewait-on-address"]
3636
libc-api = ["snmalloc-sys/libc-api"]
37+
tracing = ["snmalloc-sys/tracing"]
38+
fuzzing = ["snmalloc-sys/fuzzing"]
39+
vendored-stl = ["snmalloc-sys/vendored-stl"]
40+
check-loads = ["snmalloc-sys/check-loads"]
41+
pageid = ["snmalloc-sys/pageid"]
42+
gwp-asan = ["snmalloc-sys/gwp-asan"]

snmalloc-rs/README.md

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,14 @@ a global allocator for rust. snmalloc is a research allocator. Its key design fe
1313
Some old benchmark results are available in
1414
the [`snmalloc` paper](https://github.com/microsoft/snmalloc/blob/master/snmalloc.pdf).
1515

16-
There are three features defined in this crate:
16+
There are the following features defined in this crate:
1717

18-
- `debug`: Enable the `Debug` mode in `snmalloc`.
19-
- ~~`1mib`: Use the `1mib` chunk configuration. From `0.2.17`, this is set as a default feature~~ (removed since 0.3.0)
20-
- ~~`16mib`: Use the `16mib` chunk configuration.~~ (removed since 0.3.0)
21-
- ~~`cache-friendly`: Make the allocator more cache friendly (setting `CACHE_FRIENDLY_OFFSET` to `64` in building the
22-
library).~~ (removed since 0.3.0)
18+
- `debug`: Enable the `Debug` mode in `snmalloc`. This is also automatically enabled if Cargo's `DEBUG` environment variable is set to `true`.
2319
- `native-cpu`: Optimize `snmalloc` for the native CPU of the host machine. (this is not a default behavior
2420
since `0.2.14`)
2521
- `qemu`: Workaround `madvise` problem of QEMU environment
26-
- ~~`stats`: Enable statistics~~ (removed since 0.3.0)
2722
- `local_dynamic_tls`: Workaround cannot allocate memory in static tls block
2823
- `build_cc`: Use of cc crate instead of cmake (cmake still default) as builder (more platform agnostic)
29-
- ~~`usecxx20`: Enable C++20 standard if available~~ (removed since 0.3.0)
3024
- `usecxx17`: Use C++17 standard
3125
- `check`: Enable extra checks to improve security, see upstream [security docs](https://github.com/microsoft/snmalloc/tree/main/docs/security).
3226
Note that the `memcpy` protection is not enabled in Rust.
@@ -35,6 +29,25 @@ There are three features defined in this crate:
3529
- `notls`: Enables to be loaded dynamically, thus disable tls.
3630
- `stats`: Enables allocation statistics.
3731
- `libc-api`: Enables libc API backed by snmalloc.
32+
- `usewait-on-address`: Enable `WaitOnAddress` support on Windows (enabled by default).
33+
- `tracing`: Enable structured tracing/logging.
34+
- `fuzzing`: Enable fuzzing support.
35+
- `vendored-stl`: Use self-vendored STL.
36+
- `check-loads`: Enable check loads feature.
37+
- `pageid`: Enable page ID feature.
38+
- `gwp-asan`: Enable GWP-ASan integration. Requires `SNMALLOC_GWP_ASAN_INCLUDE_PATH` and `SNMALLOC_GWP_ASAN_LIBRARY_PATH`.
39+
40+
## Build Configuration
41+
42+
The build script ensures architectural alignment between the Rust profile and the underlying `snmalloc` allocator:
43+
44+
### Environment Variables
45+
The following environment variables are automatically detected and propagated:
46+
- `DEBUG`: Synchronizes the `snmalloc` build type with the Cargo profile. If `true`, `snmalloc` is built in `Debug` mode.
47+
- `OPT_LEVEL`: Propagated to the C++ compiler to ensure optimization parity between Rust and C++ components.
48+
49+
### Windows CRT Consistency
50+
On Windows, the build script enforces static CRT linking (`/MT` or `/MTd`) across both `cc` and `cmake` builders. This prevents linker errors and ensures consistency when `snmalloc` is used as a global allocator.
3851

3952
**To get the crates compiled, you need to choose either `1mib` or `16mib` to determine the chunk configuration**
4053

snmalloc-rs/snmalloc-sys/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ cc = { version = "1.0", optional = true }
1717
cmake = { version = "0.1", optional = true }
1818

1919
[features]
20-
default = ["build_cmake"]
20+
default = ["build_cmake", "usewait-on-address"]
2121
build_cc = ["cc"]
2222
build_cmake = ["cmake"]
2323
qemu = []
@@ -33,3 +33,9 @@ notls = []
3333
stats = []
3434
usewait-on-address = []
3535
libc-api = []
36+
tracing = []
37+
fuzzing = []
38+
vendored-stl = []
39+
check-loads = []
40+
pageid = []
41+
gwp-asan = []

0 commit comments

Comments
 (0)