samples: bench: Benchmark a "base semaphore"

d3zd3z · d3zd3z · commit 4f6032bd1298 · 2025-02-11T07:42:24.000-07:00
This is a comparison benchmark of implementing deferred initialization
of Zephyr kernel objects from Rust.  The idea here is that instead of
the current `Fixed` enum, which contains either a reference to a static,
or a Pin&lt;Box&lt;T&gt;&gt; of the object, we pair the objects with an atomic
pointer variable indicating the pointer state.  It starts as a null
pointer, and will result in the object being initialized when it is
first used.  The pointer is then replaced with the address of `Self` so
that later code can detect an attempt to more the object.

Signed-off-by: David Brown &lt;david.brown@linaro.org&gt;
diff --git a/samples/bench/Cargo.toml b/samples/bench/Cargo.toml
@@ -14,6 +14,7 @@ crate-type = ["staticlib"]
 
 [dependencies]
 zephyr = "0.1.0"
+critical-section = "1.1.2"
 
 # Dependencies that are used by build.rs.
 [build-dependencies]
diff --git a/samples/bench/src/basesem.rs b/samples/bench/src/basesem.rs
@@ -0,0 +1,111 @@
+//! Base Semaphore
+//!
+//! This is an experiment into a different approach to Zephyr kernel objects.
+//!
+//! Currently, these kernel objects are directed through "Fixed", which is an enum referencing with
+//! a pointer to something static declared, or to a `Pin<Box<UnsafeCell<T>>>`.  This was done in an
+//! attempt to keep things performant, but we actually always still end up with both an enum
+//! discriminant, as well as an extra indirection for the static one.
+//!
+//! The deep issue here is that Zephyr objects inherently cannot be moved.  Zephyr uses a `dlist`
+//! structure in most objects that has a pointer back to itself to indicate the empty list.
+//!
+//! To work around this, we will implement objects as a pairing of an `AtomicUsize` and a
+//! `UnsafeCell<k_sem>` (for whatever underlying type).  The atomic will go through a small number
+//! of states:
+//!
+//! - 0: indicates that this object is uninitialized.
+//! - ptr: where ptr is the address of Self for an initialized object.
+//!
+//! On each use, the atomic value can be read (Relaxed is fine here), and if a 0 is seen, perform an
+//! initialization.  The initialization will lock a simple critical section, checking the atomic
+//! again, to make sure it didn't get initialized by something intercepting it.  If the check sees a
+//! 'ptr' value that is not the same as Self, it indicates the object has been moved after
+//! initialization, and will simply panic.
+
+// To measure performance, this module implements this for `k_sem` without abstractions around it.
+// The idea is to compare performance with the above `Fixed` implementation.
+
+use core::{cell::UnsafeCell, ffi::c_uint, mem, sync::atomic::Ordering};
+
+use zephyr::{error::to_result_void, raw::{k_sem, k_sem_give, k_sem_init, k_sem_take}, sync::atomic::AtomicUsize, time::Timeout};
+use zephyr::Result;
+
+pub struct Semaphore {
+    state: AtomicUsize,
+    item: UnsafeCell<k_sem>,
+}
+
+// SAFETY: These are both Send and Sync. The semaphore itself is safe, and the atomic+critical
+// section protects the state.
+unsafe impl Send for Semaphore { }
+unsafe impl Sync for Semaphore { }
+
+impl Semaphore {
+    /// Construct a new semaphore, with the given initial_count and limit.  There is a bit of
+    /// trickery to pass the initial values through to the initializer, but otherwise this is just a
+    /// basic initialization.
+    pub fn new(initial_count: c_uint, limit: c_uint) -> Semaphore {
+        let this = Self {
+            state: AtomicUsize::new(0),
+            item: unsafe { UnsafeCell::new(mem::zeroed()) },
+        };
+
+        // Set the initial count and limit in the semaphore to use for later initialization.
+        unsafe {
+            let ptr = this.item.get();
+            (*ptr).count = initial_count;
+            (*ptr).limit = limit;
+        }
+
+        this
+    }
+
+    /// Get the raw pointer, initializing the `k_sem` if needed.
+    fn get(&self) -> *mut k_sem {
+        // First load can be relaxed, for performance reasons.  If it is seen as uninitialized, the
+        // below Acquire load will see the correct value.
+        let state = self.state.load(Ordering::Relaxed);
+        if state == self as *const Self as usize {
+            return self.item.get();
+        } else if state != 0 {
+            panic!("Semaphore was moved after first use");
+        }
+
+        critical_section::with(|_| {
+            // Reload, with Acquire ordering to see a determined value.
+            let state = self.state.load(Ordering::Acquire);
+            if state == self as *const Self as usize {
+                return self.item.get();
+            } else if state != 0 {
+                panic!("Semaphore was moved after first use");
+            }
+
+            // Perform the initialization.  We're within the critical section, and know that nobody
+            // could be using this.
+            unsafe {
+                let ptr = self.item.get();
+                let initial_count = (*ptr).count;
+                let limit = (*ptr).limit;
+
+                k_sem_init(ptr, initial_count, limit);
+            }
+
+            self.state.store(self as *const Self as usize, Ordering::Release);
+            self.item.get()
+        })
+    }
+
+    /// Synchronous take.
+    pub fn take(&self, timeout: impl Into<Timeout>) -> Result<()> {
+        let timeout: Timeout = timeout.into();
+        let ptr = self.get();
+        let ret = unsafe { k_sem_take(ptr, timeout.0) };
+        to_result_void(ret)
+    }
+
+    pub fn give(&self) {
+        let ptr = self.get();
+        unsafe { k_sem_give(ptr) };
+    }
+}
diff --git a/samples/bench/src/lib.rs b/samples/bench/src/lib.rs
@@ -33,6 +33,8 @@ use zephyr::{
     work::{WorkQueue, WorkQueueBuilder},
 };
 
+mod basesem;
+
 /// How many threads to run in the tests.
 const NUM_THREADS: usize = 6;
 
@@ -59,6 +61,7 @@ extern "C" fn rust_main() {
     }
 
     tester.run(Command::Empty);
+    tester.run(Command::BaseSemPingPong(NUM_THREADS, 10_000));
     tester.run(Command::SimpleSem(10_000));
     tester.run(Command::SimpleSemAsync(10_000));
     tester.run(Command::SimpleSemYield(10_000));
@@ -119,6 +122,10 @@ struct ThreadTests {
     /// The test also all return their result to the main.  The threads Send, the main running
     /// receives.
     results: ChanPair<Result>,
+
+    /// For the base sem test, just use these, which are just shared by reference.
+    forward: basesem::Semaphore,
+    reverse: basesem::Semaphore,
 }
 
 impl ThreadTests {
@@ -149,6 +156,8 @@ impl ThreadTests {
             low_command: low_send,
             high_command: high_send,
             workq,
+            forward: basesem::Semaphore::new(0, 1),
+            reverse: basesem::Semaphore::new(0, 1),
         };
 
         let mut thread_commands = Vec::new();
@@ -441,6 +450,11 @@ impl ThreadTests {
                         continue;
                     }
                 }
+
+                Command::BaseSemPingPong(_nthread, count) => {
+                    this.base_worker(count);
+                    total = count;
+                }
             }
 
             this.results
@@ -626,6 +640,23 @@ impl ThreadTests {
         let _ = this;
     }
 
+    fn base_worker(&self, count: usize) {
+        for _ in 0..count {
+            self.forward.take(Forever).unwrap();
+            self.reverse.give();
+        }
+    }
+
+    // In the low runner, does the ping-pong with each.
+    fn base_replier(&self, nthread: usize, count: usize) {
+        for _ in 0..count {
+            for _ in 0..nthread {
+                self.forward.give();
+                self.reverse.take(Forever).unwrap();
+            }
+        }
+    }
+
     /// And the low priority worker.
     fn low_runner(this: Arc<Self>, command: Receiver<Command>) {
         let _ = this;
@@ -655,6 +686,9 @@ impl ThreadTests {
                 }
                 Command::SemPingPongAsync(_) => (),
                 Command::SemOnePingPongAsync(_, _) => (),
+                Command::BaseSemPingPong(nthread, count) => {
+                    this.base_replier(nthread, count);
+                }
             }
             // printkln!("low command: {:?}", cmd);
 
@@ -687,6 +721,7 @@ impl ThreadTests {
                 }
                 Command::SemPingPongAsync(_) => (),
                 Command::SemOnePingPongAsync(_, _) => (),
+                Command::BaseSemPingPong(_, _) => (),
             }
             // printkln!("high command: {:?}", cmd);
 
@@ -745,6 +780,7 @@ enum Command {
     SemOnePingPong(usize),
     /// Same as SemOnePingPong, but async.  The first parameter is the number of async tasks.
     SemOnePingPongAsync(usize, usize),
+    BaseSemPingPong(usize, usize),
 }
 
 enum Result {