Skip to content

Commit 4f6032b

Browse files
committed
samples: bench: Benchmark a "base semaphore"
This is a comparison benchmark of implementing deferred initialization of Zephyr kernel objects from Rust. The idea here is that instead of the current `Fixed` enum, which contains either a reference to a static, or a Pin<Box<T>> of the object, we pair the objects with an atomic pointer variable indicating the pointer state. It starts as a null pointer, and will result in the object being initialized when it is first used. The pointer is then replaced with the address of `Self` so that later code can detect an attempt to more the object. Signed-off-by: David Brown <[email protected]>
1 parent 5b883ed commit 4f6032b

File tree

3 files changed

+148
-0
lines changed

3 files changed

+148
-0
lines changed

samples/bench/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ crate-type = ["staticlib"]
1414

1515
[dependencies]
1616
zephyr = "0.1.0"
17+
critical-section = "1.1.2"
1718

1819
# Dependencies that are used by build.rs.
1920
[build-dependencies]

samples/bench/src/basesem.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//! Base Semaphore
2+
//!
3+
//! This is an experiment into a different approach to Zephyr kernel objects.
4+
//!
5+
//! Currently, these kernel objects are directed through "Fixed", which is an enum referencing with
6+
//! a pointer to something static declared, or to a `Pin<Box<UnsafeCell<T>>>`. This was done in an
7+
//! attempt to keep things performant, but we actually always still end up with both an enum
8+
//! discriminant, as well as an extra indirection for the static one.
9+
//!
10+
//! The deep issue here is that Zephyr objects inherently cannot be moved. Zephyr uses a `dlist`
11+
//! structure in most objects that has a pointer back to itself to indicate the empty list.
12+
//!
13+
//! To work around this, we will implement objects as a pairing of an `AtomicUsize` and a
14+
//! `UnsafeCell<k_sem>` (for whatever underlying type). The atomic will go through a small number
15+
//! of states:
16+
//!
17+
//! - 0: indicates that this object is uninitialized.
18+
//! - ptr: where ptr is the address of Self for an initialized object.
19+
//!
20+
//! On each use, the atomic value can be read (Relaxed is fine here), and if a 0 is seen, perform an
21+
//! initialization. The initialization will lock a simple critical section, checking the atomic
22+
//! again, to make sure it didn't get initialized by something intercepting it. If the check sees a
23+
//! 'ptr' value that is not the same as Self, it indicates the object has been moved after
24+
//! initialization, and will simply panic.
25+
26+
// To measure performance, this module implements this for `k_sem` without abstractions around it.
27+
// The idea is to compare performance with the above `Fixed` implementation.
28+
29+
use core::{cell::UnsafeCell, ffi::c_uint, mem, sync::atomic::Ordering};
30+
31+
use zephyr::{error::to_result_void, raw::{k_sem, k_sem_give, k_sem_init, k_sem_take}, sync::atomic::AtomicUsize, time::Timeout};
32+
use zephyr::Result;
33+
34+
pub struct Semaphore {
35+
state: AtomicUsize,
36+
item: UnsafeCell<k_sem>,
37+
}
38+
39+
// SAFETY: These are both Send and Sync. The semaphore itself is safe, and the atomic+critical
40+
// section protects the state.
41+
unsafe impl Send for Semaphore { }
42+
unsafe impl Sync for Semaphore { }
43+
44+
impl Semaphore {
45+
/// Construct a new semaphore, with the given initial_count and limit. There is a bit of
46+
/// trickery to pass the initial values through to the initializer, but otherwise this is just a
47+
/// basic initialization.
48+
pub fn new(initial_count: c_uint, limit: c_uint) -> Semaphore {
49+
let this = Self {
50+
state: AtomicUsize::new(0),
51+
item: unsafe { UnsafeCell::new(mem::zeroed()) },
52+
};
53+
54+
// Set the initial count and limit in the semaphore to use for later initialization.
55+
unsafe {
56+
let ptr = this.item.get();
57+
(*ptr).count = initial_count;
58+
(*ptr).limit = limit;
59+
}
60+
61+
this
62+
}
63+
64+
/// Get the raw pointer, initializing the `k_sem` if needed.
65+
fn get(&self) -> *mut k_sem {
66+
// First load can be relaxed, for performance reasons. If it is seen as uninitialized, the
67+
// below Acquire load will see the correct value.
68+
let state = self.state.load(Ordering::Relaxed);
69+
if state == self as *const Self as usize {
70+
return self.item.get();
71+
} else if state != 0 {
72+
panic!("Semaphore was moved after first use");
73+
}
74+
75+
critical_section::with(|_| {
76+
// Reload, with Acquire ordering to see a determined value.
77+
let state = self.state.load(Ordering::Acquire);
78+
if state == self as *const Self as usize {
79+
return self.item.get();
80+
} else if state != 0 {
81+
panic!("Semaphore was moved after first use");
82+
}
83+
84+
// Perform the initialization. We're within the critical section, and know that nobody
85+
// could be using this.
86+
unsafe {
87+
let ptr = self.item.get();
88+
let initial_count = (*ptr).count;
89+
let limit = (*ptr).limit;
90+
91+
k_sem_init(ptr, initial_count, limit);
92+
}
93+
94+
self.state.store(self as *const Self as usize, Ordering::Release);
95+
self.item.get()
96+
})
97+
}
98+
99+
/// Synchronous take.
100+
pub fn take(&self, timeout: impl Into<Timeout>) -> Result<()> {
101+
let timeout: Timeout = timeout.into();
102+
let ptr = self.get();
103+
let ret = unsafe { k_sem_take(ptr, timeout.0) };
104+
to_result_void(ret)
105+
}
106+
107+
pub fn give(&self) {
108+
let ptr = self.get();
109+
unsafe { k_sem_give(ptr) };
110+
}
111+
}

samples/bench/src/lib.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ use zephyr::{
3333
work::{WorkQueue, WorkQueueBuilder},
3434
};
3535

36+
mod basesem;
37+
3638
/// How many threads to run in the tests.
3739
const NUM_THREADS: usize = 6;
3840

@@ -59,6 +61,7 @@ extern "C" fn rust_main() {
5961
}
6062

6163
tester.run(Command::Empty);
64+
tester.run(Command::BaseSemPingPong(NUM_THREADS, 10_000));
6265
tester.run(Command::SimpleSem(10_000));
6366
tester.run(Command::SimpleSemAsync(10_000));
6467
tester.run(Command::SimpleSemYield(10_000));
@@ -119,6 +122,10 @@ struct ThreadTests {
119122
/// The test also all return their result to the main. The threads Send, the main running
120123
/// receives.
121124
results: ChanPair<Result>,
125+
126+
/// For the base sem test, just use these, which are just shared by reference.
127+
forward: basesem::Semaphore,
128+
reverse: basesem::Semaphore,
122129
}
123130

124131
impl ThreadTests {
@@ -149,6 +156,8 @@ impl ThreadTests {
149156
low_command: low_send,
150157
high_command: high_send,
151158
workq,
159+
forward: basesem::Semaphore::new(0, 1),
160+
reverse: basesem::Semaphore::new(0, 1),
152161
};
153162

154163
let mut thread_commands = Vec::new();
@@ -441,6 +450,11 @@ impl ThreadTests {
441450
continue;
442451
}
443452
}
453+
454+
Command::BaseSemPingPong(_nthread, count) => {
455+
this.base_worker(count);
456+
total = count;
457+
}
444458
}
445459

446460
this.results
@@ -626,6 +640,23 @@ impl ThreadTests {
626640
let _ = this;
627641
}
628642

643+
fn base_worker(&self, count: usize) {
644+
for _ in 0..count {
645+
self.forward.take(Forever).unwrap();
646+
self.reverse.give();
647+
}
648+
}
649+
650+
// In the low runner, does the ping-pong with each.
651+
fn base_replier(&self, nthread: usize, count: usize) {
652+
for _ in 0..count {
653+
for _ in 0..nthread {
654+
self.forward.give();
655+
self.reverse.take(Forever).unwrap();
656+
}
657+
}
658+
}
659+
629660
/// And the low priority worker.
630661
fn low_runner(this: Arc<Self>, command: Receiver<Command>) {
631662
let _ = this;
@@ -655,6 +686,9 @@ impl ThreadTests {
655686
}
656687
Command::SemPingPongAsync(_) => (),
657688
Command::SemOnePingPongAsync(_, _) => (),
689+
Command::BaseSemPingPong(nthread, count) => {
690+
this.base_replier(nthread, count);
691+
}
658692
}
659693
// printkln!("low command: {:?}", cmd);
660694

@@ -687,6 +721,7 @@ impl ThreadTests {
687721
}
688722
Command::SemPingPongAsync(_) => (),
689723
Command::SemOnePingPongAsync(_, _) => (),
724+
Command::BaseSemPingPong(_, _) => (),
690725
}
691726
// printkln!("high command: {:?}", cmd);
692727

@@ -745,6 +780,7 @@ enum Command {
745780
SemOnePingPong(usize),
746781
/// Same as SemOnePingPong, but async. The first parameter is the number of async tasks.
747782
SemOnePingPongAsync(usize, usize),
783+
BaseSemPingPong(usize, usize),
748784
}
749785

750786
enum Result {

0 commit comments

Comments
 (0)