mostly updated mlf_shardedringbuf

asder8215 · asder8215 · commit 41fca916a92e · 2025-09-02T10:04:02.000-04:00
diff --git a/benches/kanal_async.rs b/benches/kanal_async.rs
@@ -158,7 +158,7 @@ async fn kanal_async_with_msg_vec(
 fn benchmark_kanal_async(c: &mut Criterion) {
     const MAX_THREADS: [usize; 1] = [8];
     const CAPACITY: usize = 128;
-    const TASKS: [usize; 1] = [100000];
+    const TASKS: [usize; 1] = [100];
 
     for thread_num in MAX_THREADS {
         let runtime = tokio::runtime::Builder::new_multi_thread()
diff --git a/benches/srb_pin.rs b/benches/srb_pin.rs
@@ -195,15 +195,44 @@ async fn mlfsrb_pin(capacity: usize, shards: usize, task_count: usize) {
     //     );
     //     enq_tasks.push(handle);
     // }
+    // for i in 0..task_count {
+    //     let handle = mlf_spawn_enqueuer_with_iterator(rb.clone(), i, 0..1);
+    //     enq_tasks.push(handle);
+    // }
+
+    // for i in 0..shards {
+    //     let handle = mlf_spawn_dequeuer_unbounded(rb.clone(), i, |x| {
+    //         // test_func(x as u128);
+    //         // println!("{:?}", x);
+    //     });
+    //     deq_tasks.push(handle);
+    // }
+
     for i in 0..task_count {
-        let handle = mlf_spawn_enqueuer_with_iterator(rb.clone(), i, 0..1);
+        let handle = tokio::spawn({
+            let rb_clone = rb.clone();
+            async move {
+                rb_clone.enqueue(i).await;
+            }
+        });
         enq_tasks.push(handle);
     }
 
     for i in 0..shards {
-        let handle = mlf_spawn_dequeuer_unbounded(rb.clone(), i, |x| {
-            // test_func(x as u128);
-            // println!("{:?}", x);
+        let handle = tokio::spawn({
+            let rb_clone = rb.clone();
+            async move {
+                loop {
+                    // let item = rb_clone.dequeue().await;
+                    let item = rb_clone.dequeue_in_shard(i).await;
+                    match item {
+                        None => break,
+                        Some(val) => {
+                            
+                        }
+                    }
+                }
+            }
         });
         deq_tasks.push(handle);
     }
@@ -545,7 +574,7 @@ fn benchmark_pin(c: &mut Criterion) {
     // const SHARDS: [usize; 5] = [1, 2, 4, 8, 16];
     // const TASKS: [usize; 5] = [1, 2, 4, 8, 16];
     const SHARDS: [usize; 1] = [1];
-    const TASKS: [usize; 1] = [100000];
+    const TASKS: [usize; 1] = [100];
 
     // const MSG_COUNT: usize = 250000;
     // // let msg = BigData { buf: Box::new([0; 1 * 1024]) };
diff --git a/src/mlf_shardedringbuf.rs b/src/mlf_shardedringbuf.rs
@@ -35,12 +35,18 @@ pub struct MLFShardedRingBuf<T> {
     /// Total capacity of the buffer
     capacity: AtomicUsize,
     /// Multiple InnerRingBuffer structure based on num of shards
-    /// CachePadded to prevent false sharing
-    // inner_rb: Box<[CachePadded<InnerRingBuffer<T>>]>,
     inner_rb: Box<[InnerRingBuffer<T>]>,
 
-    // pub(crate) job_space_shard_notifs: Box<[Notify]>,
-    // pub(crate) job_post_shard_notifs: Box<[Notify]>,
+    /// Used as a monotonic increasing counter to determine which
+    /// shard should an enqueuer task place an item in
+    /// Cache Padded to prevent false sharing if spawning an enqueuer
+    /// task happens across multiple threads
+    shard_enq: CachePadded<AtomicUsize>,
+    /// Used as a monotonic increasing counter to determine which
+    /// shard should a dequeuer task place an item in
+    /// Cache Padded to prevent false sharing if spawning an dequeuer
+    /// task happens across multiple threads
+    shard_deq: CachePadded<AtomicUsize>,
 }
 
 // An inner ring buffer to contain the items, enqueue and dequeue index for ShardedRingBuf struct
@@ -49,10 +55,13 @@ struct InnerRingBuffer<T> {
     /// Box of Slots containing the content of the buffer
     /// Cache Padded to avoid false sharing
     items: Box<[CachePadded<Slot<T>>]>,
-    /// Where to enqueue at in the Box
+    /// Where to enqueue at in the items Box
+    /// Cache Padded to avoid false sharing
     enqueue_index: CachePadded<AtomicUsize>,
-    /// Where to dequeue at in the Box
-    // dequeue_index: AtomicUsize,
+    /// Where to dequeue at in the items Box
+    /// Cache Padded to avoid false sharing (though since this
+    /// is the last item, cache padding doesn't really matter
+    /// too much)
     dequeue_index: CachePadded<AtomicUsize>,
 }
 
@@ -63,7 +72,9 @@ struct Slot<T> {
     item: UnsafeCell<MaybeUninit<Option<T>>>,
     /// 0: empty, 1: full, 2: in progress (deq), 3: in progress (enq)
     state: AtomicU8,
+    /// used to notify an enqueuer task that's waiting on this slot 
     enq_notify: Notify,
+    /// used to notify an dequeuer task that's waiting on this slot
     deq_notify: Notify
 }
 
@@ -94,7 +105,6 @@ impl<T> InnerRingBuffer<T> {
                 vec.into_boxed_slice()
             },
             enqueue_index: CachePadded::new(AtomicUsize::new(0)),
-            // dequeue_index: AtomicUsize::new(0),
             dequeue_index: CachePadded::new(AtomicUsize::new(0)),
         }
     }
@@ -122,12 +132,8 @@ impl<T> MLFShardedRingBuf<T> {
                 let mut vec = Vec::with_capacity(shards);
                 for _ in 0..shards {
                     if remainder == 0 {
-                        // vec.push(CachePadded::new(InnerRingBuffer::new(capacity_per_shard)));
                         vec.push(InnerRingBuffer::new(capacity_per_shard));
                     } else {
-                        // vec.push(CachePadded::new(InnerRingBuffer::new(
-                        //     capacity_per_shard + 1,
-                        // )));
                         vec.push(InnerRingBuffer::new(
                             capacity_per_shard + 1,
                         ));
@@ -136,22 +142,8 @@ impl<T> MLFShardedRingBuf<T> {
                 }
                 vec.into_boxed_slice()
             },
-            // job_post_shard_notifs: {
-            //     let mut vec = Vec::with_capacity(shards);
-            //     for _ in 0..shards {
-            //         vec.push(Notify::new());
-            //     }
-            //     vec.into_boxed_slice()
-            // },
-
-            // job_space_shard_notifs: {
-            //     let mut vec = Vec::with_capacity(shards);
-
-            //     for _ in 0..shards {
-            //         vec.push(Notify::new());
-            //     }
-            //     vec.into_boxed_slice()
-            // },
+            shard_enq: CachePadded::new(AtomicUsize::new(0)),
+            shard_deq: CachePadded::new(AtomicUsize::new(0))
         }
     }
 
@@ -190,6 +182,7 @@ impl<T> MLFShardedRingBuf<T> {
         self.capacity.load(Ordering::Relaxed)
     }
 
+    /// Helper function to take a slot at a specific shard in the ring buffer
     #[inline]
     async fn take_slot(&self, acquire: Acquire, shard_ind: usize) -> (usize, usize) {
         let inner = &self.inner_rb[shard_ind];
@@ -280,16 +273,37 @@ impl<T> MLFShardedRingBuf<T> {
         self.release_slot(current.0, current.1, Acquire::Enqueue);
     }
 
-    /// Adds an item of type T to the RingBuffer, *blocking* the thread until there is space to add the item.
-    ///
-    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a shard
+    /// Adds an item of type T to the ring buffer at a provided shard. If the user
+    /// provides a shard index greater than the existing number of shards in the
+    /// buffer, it will perform wrap around (% number of existing shards).
     ///
+    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a slot in a shard
+    /// (this is usually pretty fast)
+    /// 
     /// Space complexity: O(1)
-    pub(crate) async fn enqueue(&self, item: T, shard_ind: usize) {
+    pub async fn enqueue_in_shard(&self, item: T, shard_ind: usize) {
         let shard_ind = shard_ind % self.get_num_of_shards();
         self.enqueue_item(Some(item), shard_ind).await;
     }
 
+    /// Adds an item of type T to the ring buffer. It uses the ring buffer's shard_enq
+    /// field and mods it with the number of existing shards for the buffer to determine
+    /// which shard this enqueue operation will occur at. As a result, if you have multiple
+    /// shards and one enqueuer task repeatedly using enqueue(), it will sweep across the
+    /// shards and place an item in each.
+    /// 
+    /// If you intend to have an enqueuer task map to a specific shard, use enqueue_in_shard()
+    /// for more control.
+    ///
+    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a slot in the shard.
+    /// (this is usually pretty fast)
+    ///
+    /// Space complexity: O(1)
+    pub async fn enqueue(&self, item: T) {
+        let shard_ind = self.shard_enq.fetch_add(1, Ordering::Relaxed) % self.get_num_of_shards();
+        self.enqueue_item(Some(item), shard_ind).await;
+    }
+
     /// Grab the inner ring buffer shard, dequeue the item, update the dequeue index
     #[inline(always)]
     fn dequeue_in_slot(&self, shard_ind: usize, slot_ind: usize) -> Option<T> {
@@ -303,26 +317,49 @@ impl<T> MLFShardedRingBuf<T> {
         unsafe { (*item_cell).assume_init_read() }
     }
 
-    /// Retrieves an item of type T from the RingBuffer if an item exists in the buffer.
-    /// If the ring buffer is set with a poisoned flag or received a poison pill,
+    /// Retrieves an item of type T from the ring buffer. If the user
+    /// provides a shard index greater than the existing number of shards in the
+    /// buffer, it will perform wrap around (% number of existing shards). On a poison pill,
     /// this method will return None.
     ///
-    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a shard
+    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a slot in the shard
     ///
     /// Space Complexity: O(1)
     #[inline(always)]
-    pub(crate) async fn dequeue(&self, shard_ind: usize) -> Option<T> {
+    pub async fn dequeue_in_shard(&self, shard_ind: usize) -> Option<T> {
         let shard_ind = shard_ind % self.get_num_of_shards();
         let current = self.take_slot(Acquire::Dequeue, shard_ind).await;
         let item = self.dequeue_in_slot(current.0, current.1);
         self.release_slot(current.0, current.1, Acquire::Dequeue);
         item
     }
 
-    /// Sets the poison flag of the ring buffer to true. This will prevent enqueuers
-    /// from enqueuing anymore jobs if this method is called while enqueues are occuring.
-    /// However you can use this if you want graceful exit of dequeuers tasks completing
-    /// all available jobs enqueued first before exiting.
+    /// Retrieves an item of type T from the ring buffer. It uses the ring buffer's shard_deq
+    /// field and mods it with the number of existing shards for the buffer to determine
+    /// which shard this dequeue operation will occur at. As a result, if you have multiple
+    /// shards and one dequeuer task repeatedly using dequeue(), it will sweep across the
+    /// shards and place an item in each.
+    /// 
+    /// On a poison pill, this method will return None.
+    /// 
+    /// If you intend to have an dequeuer task map to a specific shard, use dequeue_in_shard()
+    /// for more control.
+    ///
+    /// Time Complexity: O(s_t) where s_t is the time it takes to acquire a slot in the shard
+    ///
+    /// Space Complexity: O(1)
+    #[inline(always)]
+    pub async fn dequeue(&self) -> Option<T> {
+        let shard_ind = self.shard_deq.fetch_add(1, Ordering::Relaxed) % self.get_num_of_shards();
+        let current = self.take_slot(Acquire::Dequeue, shard_ind).await;
+        let item = self.dequeue_in_slot(current.0, current.1);
+        self.release_slot(current.0, current.1, Acquire::Dequeue);
+        item
+    }
+
+    /// Enqueues a poison pill at a specific shard. If you intend to make your dequeuer
+    /// task unbounded with its dequeue operations, this should be the method you use
+    /// to break out of the infinite loop.
     ///
     /// Time Complexity: O(1)
     ///
diff --git a/src/shardedringbuf.rs b/src/shardedringbuf.rs
@@ -88,21 +88,21 @@ impl<T> InnerRingBuffer<T> {
         }
     }
 
-    /// Helper function to see if a given index inside this buffer does
-    /// indeed contain a valid item. Used in Drop Trait.
-    #[inline(always)]
-    fn is_item_in_shard(&self, item_ind: usize) -> bool {
-        let enqueue_ind = self.enqueue_index.load(Ordering::Relaxed) % self.items.len();
-        let dequeue_ind = self.dequeue_index.load(Ordering::Relaxed) % self.items.len();
-
-        if enqueue_ind > dequeue_ind {
-            item_ind < enqueue_ind && item_ind >= dequeue_ind
-        } else if enqueue_ind < dequeue_ind {
-            item_ind >= dequeue_ind || item_ind < enqueue_ind
-        } else {
-            false
-        }
-    }
+    // /// Helper function to see if a given index inside this buffer does
+    // /// indeed contain a valid item. Used in Drop Trait.
+    // #[inline(always)]
+    // fn is_item_in_shard(&self, item_ind: usize) -> bool {
+    //     let enqueue_ind = self.enqueue_index.load(Ordering::Relaxed) % self.items.len();
+    //     let dequeue_ind = self.dequeue_index.load(Ordering::Relaxed) % self.items.len();
+
+    //     if enqueue_ind > dequeue_ind {
+    //         item_ind < enqueue_ind && item_ind >= dequeue_ind
+    //     } else if enqueue_ind < dequeue_ind {
+    //         item_ind >= dequeue_ind || item_ind < enqueue_ind
+    //     } else {
+    //         false
+    //     }
+    // }
 }
 
 impl<T> ShardedRingBuf<T> {
diff --git a/src/task_local_spawn.rs b/src/task_local_spawn.rs
@@ -180,7 +180,7 @@ where
     let enq_fut = async move {
         let mut counter = 0;
         for item in items {
-            buffer.enqueue(item, shard_ind).await;
+            buffer.enqueue_in_shard(item, shard_ind).await;
             counter += 1;
         }
 
@@ -515,7 +515,7 @@ where
     let deq_fut = async move {
         let mut counter = 0;
         loop {
-            let deq_item = buffer.dequeue(shard_ind).await;
+            let deq_item = buffer.dequeue_in_shard(shard_ind).await;
             match deq_item {
                 Some(item) => {
                     f(item);