Skip to content

Commit e2191c8

Browse files
committed
samples: bench: Add Simple Work benchmark
Create a benchmark that has various work entities that schedule each other to tease out lots of workqueue scheduling. Signed-off-by: David Brown <[email protected]>
1 parent 5f50dc5 commit e2191c8

File tree

1 file changed

+216
-6
lines changed

1 file changed

+216
-6
lines changed

samples/bench/src/lib.rs

Lines changed: 216 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@
99

1010
extern crate alloc;
1111

12+
use core::mem;
13+
use core::pin::Pin;
14+
15+
use alloc::collections::vec_deque::VecDeque;
1216
use alloc::vec;
1317
use alloc::vec::Vec;
18+
use zephyr::sync::SpinMutex;
1419
use zephyr::time::NoWait;
1520
use zephyr::work::futures::work_size;
21+
use zephyr::work::{SimpleAction, Work};
1622
use zephyr::{
1723
kconfig::CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC,
1824
kio::{spawn, yield_now},
@@ -39,6 +45,19 @@ const WORK_STACK_SIZE: usize = 2048;
3945
#[no_mangle]
4046
extern "C" fn rust_main() {
4147
let tester = ThreadTests::new(NUM_THREADS);
48+
49+
// Some basic benchmarks
50+
arc_bench();
51+
spin_bench();
52+
sem_bench();
53+
54+
let simple = Simple::new(tester.workq.clone());
55+
let mut num = 6;
56+
while num < 500 {
57+
simple.run(num, 10_000 / num);
58+
num = num * 13 / 10;
59+
}
60+
4261
tester.run(Command::Empty);
4362
tester.run(Command::SimpleSem(10_000));
4463
tester.run(Command::SimpleSemAsync(10_000));
@@ -65,8 +84,8 @@ extern "C" fn rust_main() {
6584
num = num * 13 / 10;
6685
}
6786

87+
6888
printkln!("Done with all tests\n");
69-
tester.leak();
7089
}
7190

7291
/// Thread-based tests use this information to manage the test and results.
@@ -118,6 +137,9 @@ impl ThreadTests {
118137
.start(WORK_STACK.init_once(()).unwrap()),
119138
);
120139

140+
// Leak the workqueue so it doesn't get dropped.
141+
let _ = Arc::into_raw(workq.clone());
142+
121143
let mut result = Self {
122144
sems: Vec::new(),
123145
back_sems: Vec::new(),
@@ -196,11 +218,6 @@ impl ThreadTests {
196218
result
197219
}
198220

199-
/// At the end of the tests, leak the work queue.
200-
fn leak(&self) {
201-
let _ = Arc::into_raw(self.workq.clone());
202-
}
203-
204221
fn run(&self, command: Command) {
205222
// printkln!("Running {:?}", command);
206223

@@ -743,11 +760,204 @@ enum Result {
743760
High,
744761
}
745762

763+
/// The Simple test just does a ping pong test using manually submitted work.
764+
struct Simple {
765+
workq: Arc<WorkQueue>,
766+
}
767+
768+
impl Simple {
769+
fn new(workq: Arc<WorkQueue>) -> Self {
770+
Self { workq }
771+
}
772+
773+
fn run(&self, workers: usize, iterations: usize) {
774+
// printkln!("Running Simple");
775+
let main = Work::new(SimpleMain::new(workers * iterations, self.workq.clone()));
776+
777+
let children: VecDeque<_> = (0..workers)
778+
.map(|n| Work::new(SimpleWorker::new(main.clone(), self.workq.clone(), n)))
779+
.collect();
780+
781+
let mut locked = main.action().locked.lock().unwrap();
782+
let _ = mem::replace(&mut locked.works, children);
783+
drop(locked);
784+
785+
let start = now();
786+
// Fire off main, which will run everything.
787+
Work::submit_to_queue(main.clone(), &self.workq).unwrap();
788+
789+
// And wait for the completion semaphore.
790+
main.action().done.take(Forever).unwrap();
791+
792+
let stop = now();
793+
let time = (stop - start) as f64 / (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC as f64) * 1000.0;
794+
795+
let total = workers * iterations;
796+
let time = if total > 0 {
797+
time / (total as f64) * 1000.0
798+
} else {
799+
0.0
800+
};
801+
802+
printkln!(" {:8.3} us, {} of {} workers {} times", time, total, workers, iterations);
803+
}
804+
}
805+
806+
/// A simple worker. When run, it submits the main worker to do the next work.
807+
struct SimpleWorker {
808+
main: Pin<Arc<Work<SimpleMain>>>,
809+
workq: Arc<WorkQueue>,
810+
_id: usize,
811+
}
812+
813+
impl SimpleWorker {
814+
fn new(main: Pin<Arc<Work<SimpleMain>>>, workq: Arc<WorkQueue>, id: usize) -> Self {
815+
Self { main, workq, _id: id }
816+
}
817+
}
818+
819+
impl SimpleAction for SimpleWorker {
820+
fn act(self: Pin<&Self>) {
821+
// Each time we are run, fire the main worker back up.
822+
Work::submit_to_queue(self.main.clone(), &self.workq).unwrap();
823+
}
824+
}
825+
826+
/// This is the main worker.
827+
///
828+
/// Each time it is run, it submits the next worker from the queue and exits.
829+
struct SimpleMain {
830+
/// All of the work items.
831+
locked: SpinMutex<Locked>,
832+
workq: Arc<WorkQueue>,
833+
done: Semaphore,
834+
}
835+
836+
impl SimpleAction for SimpleMain {
837+
fn act(self: Pin<&Self>) {
838+
// Each time, take a worker from the queue, and submit it.
839+
let mut lock = self.locked.lock().unwrap();
840+
841+
if lock.count == 0 {
842+
// The last time, indicate we are done.
843+
self.done.give();
844+
return;
845+
}
846+
847+
let worker = lock.works.pop_front().unwrap();
848+
lock.works.push_back(worker.clone());
849+
lock.count -= 1;
850+
drop(lock);
851+
852+
Work::submit_to_queue(worker.clone(), &self.workq).unwrap();
853+
}
854+
}
855+
856+
impl SimpleMain {
857+
fn new(count: usize, workq: Arc<WorkQueue>) -> Self {
858+
Self {
859+
locked: SpinMutex::new(Locked::new(count)),
860+
done: Semaphore::new(0, 1).unwrap(),
861+
workq,
862+
}
863+
}
864+
}
865+
866+
struct Locked {
867+
works: VecDeque<Pin<Arc<Work<SimpleWorker>>>>,
868+
count: usize,
869+
}
870+
871+
impl Locked {
872+
fn new(count: usize) -> Self {
873+
Self {
874+
works: VecDeque::new(),
875+
count,
876+
}
877+
}
878+
}
879+
880+
/// Benchmark the performance of Arc.
881+
fn arc_bench() {
882+
let thing = Arc::new(123);
883+
let timer = BenchTimer::new("Arc clone+drop", 10_000);
884+
for _ in 0..10_000 {
885+
let _ = thing.clone();
886+
}
887+
timer.stop();
888+
}
889+
890+
/// Benchmark SpinMutex.
891+
#[inline(never)]
892+
#[no_mangle]
893+
fn spin_bench() {
894+
let iters = 10_000;
895+
let thing = SpinMutex::new(123);
896+
let timer = BenchTimer::new("SpinMutex lock", iters);
897+
for _ in 0..iters {
898+
*thing.lock().unwrap() += 1;
899+
}
900+
timer.stop();
901+
}
902+
903+
/// Semaphore benchmark.
904+
///
905+
/// This benchmarks a single thread with a semaphore that is always ready. This is pretty close to
906+
/// just syscall with spinlock time.
907+
#[inline(never)]
908+
#[no_mangle]
909+
fn sem_bench() {
910+
let iters = 10_000;
911+
let sem = Semaphore::new(iters as u32, iters as u32).unwrap();
912+
let timer = BenchTimer::new("Semaphore take", iters);
913+
for _ in 0..iters {
914+
sem.take(Forever).unwrap();
915+
}
916+
timer.stop();
917+
}
918+
746919
// For accurate timing, use the cycle counter.
747920
fn now() -> u64 {
748921
unsafe { k_cycle_get_64() }
749922
}
750923

924+
/// Timing some operations.
925+
///
926+
/// To use:
927+
/// ```
928+
/// /// 500 is the number of iterations happening.
929+
/// let timer = BenchTimer::new("My thing", 500);
930+
/// // operations
931+
/// timer.stop("Thing being timed");
932+
/// ```
933+
pub struct BenchTimer<'a> {
934+
what: &'a str,
935+
start: u64,
936+
count: usize,
937+
}
938+
939+
impl<'a> BenchTimer<'a> {
940+
pub fn new(what: &'a str, count: usize) -> Self {
941+
Self {
942+
what,
943+
start: now(),
944+
count,
945+
}
946+
}
947+
948+
pub fn stop(self) {
949+
let stop = now();
950+
let time = (stop - self.start) as f64 / (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC as f64) * 1000.0;
951+
let time = if self.count > 0 {
952+
time / (self.count as f64) * 1000.0
953+
} else {
954+
0.0
955+
};
956+
957+
printkln!(" {:8.3} us, {} of {}", time, self.count, self.what);
958+
}
959+
}
960+
751961
kobj_define! {
752962
static TEST_THREADS: [StaticThread; NUM_THREADS];
753963
static TEST_STACKS: [ThreadStack<THREAD_STACK_SIZE>; NUM_THREADS];

0 commit comments

Comments
 (0)