meta-pytorch
diff --git a/‎hyperactor/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎hyperactor/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎hyperactor/src/mailbox.rs‎
Lines changed: 1 addition & 1 deletion b/‎hyperactor/src/mailbox.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hyperactor/src/ordering.rs‎
Lines changed: 149 additions & 66 deletions b/‎hyperactor/src/ordering.rs‎
Lines changed: 149 additions & 66 deletions
@@ -84,6 +84,7 @@ tokio-stream = { version = "0.1.17", features = ["fs", "io-util", "net", "signal
 tokio-util = { version = "0.7.15", features = ["full"] }
 tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
 unicode-ident = "1.0.12"
+uuid = { version = "1.2", features = ["serde", "v4", "v5", "v6", "v7", "v8"] }
 
 [dev-dependencies]
 buck-resources = "1"
 
@@ -3282,7 +3282,7 @@ mod tests {
 
     fn post(cx: &impl context::Actor, port_id: PortId, msg: u64) {
         let serialized = Serialized::serialize(&msg).unwrap();
-        port_id.send(cx, &serialized);
+        port_id.send(cx, serialized);
     }
 
     #[async_timed_test(timeout_secs = 30)]
 
@@ -14,21 +14,23 @@ use std::ops::DerefMut;
 use std::sync::Arc;
 use std::sync::Mutex;
 
+use dashmap::DashMap;
 use tokio::sync::mpsc;
 use tokio::sync::mpsc::error::SendError;
+use uuid::Uuid;
 
-use crate::dashmap::DashMap;
+use crate::ActorId;
 
 /// A client's re-ordering buffer state.
 struct BufferState<T> {
     /// the last sequence number sent to receiver for this client. seq starts
     /// with 1 and 0 mean no message has been sent.
-    last_seq: usize,
+    last_seq: u64,
     /// Buffer out-of-order messages in order to ensures messages are delivered
     /// strictly in per-client sequence order.
     ///
     /// Map's key is seq_no, value is msg.
-    buffer: HashMap<usize, T>,
+    buffer: HashMap<u64, T>,
 }
 
 impl<T> Default for BufferState<T> {
@@ -43,9 +45,8 @@ impl<T> Default for BufferState<T> {
 /// A sender that ensures messages are delivered in per-client sequence order.
 pub(crate) struct OrderedSender<T> {
     tx: mpsc::UnboundedSender<T>,
-    // map's key is name client which sens messages through this channel. Map's
-    // value is the buffer state of that client.
-    states: Arc<DashMap<String, Arc<Mutex<BufferState<T>>>>>,
+    /// Map's key is session ID, and value is the buffer state of that session.
+    states: Arc<DashMap<Uuid, Arc<Mutex<BufferState<T>>>>>,
     pub(crate) enable_buffering: bool,
     /// The identify of this object, which is used to distiguish it in debugging.
     log_id: String,
@@ -98,8 +99,8 @@ impl<T> OrderedSender<T> {
     /// * calls from different clients will be executed concurrently.
     pub(crate) fn send(
         &self,
-        client: String,
-        seq_no: usize,
+        session_id: Uuid,
+        seq_no: u64,
         msg: T,
     ) -> Result<(), OrderedSenderError<T>> {
         use std::cmp::Ordering;
@@ -109,25 +110,17 @@ impl<T> OrderedSender<T> {
             return Err(OrderedSenderError::InvalidZeroSeq(msg));
         }
 
-        // Make sure only this client's state is locked, not all states.
-        let state = match self.states.get(&client) {
-            Some(state) => state.value().clone(),
-            None => self
-                .states
-                .entry(client.clone())
-                .or_default()
-                .value()
-                .clone(),
-        };
+        // Make sure only this session's state is locked, not all states.
+        let state = self.states.entry(session_id).or_default().value().clone();
         let mut state_guard = state.lock().unwrap();
         let BufferState { last_seq, buffer } = state_guard.deref_mut();
 
         match seq_no.cmp(&(*last_seq + 1)) {
             Ordering::Less => {
                 tracing::warn!(
-                    "{} duplicate message from {} with seq no: {}",
+                    "{} duplicate message from session {} with seq no: {}",
                     self.log_id,
-                    client,
+                    session_id,
                     seq_no,
                 );
             }
@@ -176,9 +169,49 @@ impl<T> OrderedSender<T> {
     }
 }
 
+/// Used by sender to track the message sequence numbers it sends to each actor.
+/// Each [Sequencer] object has a session id, sequencer numbers are scoped by
+/// the (session_id, destination_actor) pair.
+#[derive(Clone, Debug)]
+pub struct Sequencer {
+    session_id: Uuid,
+    // map's key is the destination actor's name, value is the last seq number
+    // sent to that actor.
+    last_seqs: Arc<Mutex<HashMap<ActorId, u64>>>,
+}
+
+impl Sequencer {
+    pub(crate) fn new(session_id: Uuid) -> Self {
+        Self {
+            session_id,
+            last_seqs: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    /// Assign the next seq for the given actor ID, mutate the sequencer with
+    /// the new seq, and return the new seq.
+    pub fn assign_seq(&self, actor_id: &ActorId) -> u64 {
+        let mut guard = self.last_seqs.lock().unwrap();
+        let mut_ref = match guard.get_mut(actor_id) {
+            Some(m) => m,
+            None => guard.entry(actor_id.clone()).or_default(),
+        };
+        *mut_ref += 1;
+        *mut_ref
+    }
+
+    /// Id of the session this sequencer belongs to.
+    pub fn session_id(&self) -> Uuid {
+        self.session_id
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
     use super::*;
+    use crate::id;
 
     fn drain_try_recv<T: std::fmt::Debug + Clone>(rx: &mut mpsc::UnboundedReceiver<T>) -> Vec<T> {
         let mut out = Vec::new();
@@ -190,26 +223,28 @@ mod tests {
 
     #[test]
     fn test_ordered_channel_single_client_send_in_order() {
-        let (tx, mut rx) = ordered_channel::<usize>("test".to_string(), true);
+        let session_id_a = Uuid::now_v7();
+        let (tx, mut rx) = ordered_channel::<u64>("test".to_string(), true);
         for s in 1..=10 {
-            tx.send("A".into(), s, s).unwrap();
+            tx.send(session_id_a, s, s).unwrap();
             let got = drain_try_recv(&mut rx);
             assert_eq!(got, vec![s]);
         }
     }
 
     #[test]
     fn test_ordered_channel_single_client_send_out_of_order() {
-        let (tx, mut rx) = ordered_channel::<usize>("test".to_string(), true);
+        let session_id_a = Uuid::now_v7();
+        let (tx, mut rx) = ordered_channel::<u64>("test".to_string(), true);
 
         // Send 2 to 4 in descending order: all should buffer until 1 arrives.
         for s in (2..=4).rev() {
-            tx.send("A".into(), s, s).unwrap();
+            tx.send(session_id_a, s, s).unwrap();
         }
 
         // Send 7 to 9 in descending order: all should buffer until 1 - 6 arrives.
         for s in (7..=9).rev() {
-            tx.send("A".into(), s, s).unwrap();
+            tx.send(session_id_a, s, s).unwrap();
         }
 
         assert!(
@@ -218,127 +253,175 @@ mod tests {
         );
 
         // Now send 1: should deliver 1 then flush 2 - 4.
-        tx.send("A".into(), 1, 1).unwrap();
+        tx.send(session_id_a, 1, 1).unwrap();
         assert_eq!(drain_try_recv(&mut rx), vec![1, 2, 3, 4]);
 
         // Now send 5: should deliver immediately but not flush 7 - 9.
-        tx.send("A".into(), 5, 5).unwrap();
+        tx.send(session_id_a, 5, 5).unwrap();
         assert_eq!(drain_try_recv(&mut rx), vec![5]);
 
         // Now send 6: should deliver 6 then flush 7 - 9.
-        tx.send("A".into(), 6, 6).unwrap();
+        tx.send(session_id_a, 6, 6).unwrap();
         assert_eq!(drain_try_recv(&mut rx), vec![6, 7, 8, 9]);
 
         // Send 10: should deliver immediately.
-        tx.send("A".into(), 10, 10).unwrap();
+        tx.send(session_id_a, 10, 10).unwrap();
         let got = drain_try_recv(&mut rx);
         assert_eq!(got, vec![10]);
     }
 
     #[test]
     fn test_ordered_channel_multi_clients() {
-        let (tx, mut rx) = ordered_channel::<(String, usize)>("test".to_string(), true);
+        let session_id_a = Uuid::now_v7();
+        let session_id_b = Uuid::now_v7();
+        let (tx, mut rx) = ordered_channel::<(Uuid, u64)>("test".to_string(), true);
 
         // A1 -> deliver
-        tx.send("A".into(), 1, ("A".into(), 1)).unwrap();
-        assert_eq!(drain_try_recv(&mut rx), vec![("A".into(), 1)]);
+        tx.send(session_id_a, 1, (session_id_a, 1)).unwrap();
+        assert_eq!(drain_try_recv(&mut rx), vec![(session_id_a, 1)]);
         // B1 -> deliver
-        tx.send("B".into(), 1, ("B".into(), 1)).unwrap();
-        assert_eq!(drain_try_recv(&mut rx), vec![("B".into(), 1)]);
+        tx.send(session_id_b, 1, (session_id_b, 1)).unwrap();
+        assert_eq!(drain_try_recv(&mut rx), vec![(session_id_b, 1)]);
         for s in (3..=5).rev() {
             // A3-5 -> buffer (waiting for A2)
-            tx.send("A".into(), s, ("A".into(), s)).unwrap();
+            tx.send(session_id_a, s, (session_id_a, s)).unwrap();
             // B3-5 -> buffer (waiting for B2)
-            tx.send("B".into(), s, ("B".into(), s)).unwrap();
+            tx.send(session_id_b, s, (session_id_b, s)).unwrap();
         }
         for s in (7..=9).rev() {
             // A7-9 -> buffer (waiting for A1-6)
-            tx.send("A".into(), s, ("A".into(), s)).unwrap();
+            tx.send(session_id_a, s, (session_id_a, s)).unwrap();
             // B7-9 -> buffer (waiting for B1-6)
-            tx.send("B".into(), s, ("B".into(), s)).unwrap();
+            tx.send(session_id_b, s, (session_id_b, s)).unwrap();
         }
         assert!(
             drain_try_recv(&mut rx).is_empty(),
             "nothing should be delivered yet"
         );
 
         // A2 -> deliver A2 then flush A3
-        tx.send("A".into(), 2, ("A".into(), 2)).unwrap();
+        tx.send(session_id_a, 2, (session_id_a, 2)).unwrap();
         assert_eq!(
             drain_try_recv(&mut rx),
             vec![
-                ("A".into(), 2),
-                ("A".into(), 3),
-                ("A".into(), 4),
-                ("A".into(), 5),
+                (session_id_a, 2),
+                (session_id_a, 3),
+                (session_id_a, 4),
+                (session_id_a, 5),
             ]
         );
         // B2 -> deliver B2 then flush B3
-        tx.send("B".into(), 2, ("B".into(), 2)).unwrap();
+        tx.send(session_id_b, 2, (session_id_b, 2)).unwrap();
         assert_eq!(
             drain_try_recv(&mut rx),
             vec![
-                ("B".into(), 2),
-                ("B".into(), 3),
-                ("B".into(), 4),
-                ("B".into(), 5),
+                (session_id_b, 2),
+                (session_id_b, 3),
+                (session_id_b, 4),
+                (session_id_b, 5),
             ]
         );
 
         // A6 -> should deliver immediately and flush A7-9
-        tx.send("A".into(), 6, ("A".into(), 6)).unwrap();
+        tx.send(session_id_a, 6, (session_id_a, 6)).unwrap();
         assert_eq!(
             drain_try_recv(&mut rx),
             vec![
-                ("A".into(), 6),
-                ("A".into(), 7),
-                ("A".into(), 8),
-                ("A".into(), 9)
+                (session_id_a, 6),
+                (session_id_a, 7),
+                (session_id_a, 8),
+                (session_id_a, 9)
             ]
         );
         // B6 -> should deliver immediately and flush B7-9
-        tx.send("B".into(), 6, ("B".into(), 6)).unwrap();
+        tx.send(session_id_b, 6, (session_id_b, 6)).unwrap();
         assert_eq!(
             drain_try_recv(&mut rx),
             vec![
-                ("B".into(), 6),
-                ("B".into(), 7),
-                ("B".into(), 8),
-                ("B".into(), 9)
+                (session_id_b, 6),
+                (session_id_b, 7),
+                (session_id_b, 8),
+                (session_id_b, 9)
             ]
         );
     }
 
     #[test]
     fn test_ordered_channel_duplicates() {
-        fn verify_empty_buffers<T>(states: &DashMap<String, Arc<Mutex<BufferState<T>>>>) {
+        let session_id_a = Uuid::now_v7();
+        fn verify_empty_buffers<T>(states: &DashMap<Uuid, Arc<Mutex<BufferState<T>>>>) {
             for entry in states.iter() {
                 assert!(entry.value().lock().unwrap().buffer.is_empty());
             }
         }
 
-        let (tx, mut rx) = ordered_channel::<(String, usize)>("test".to_string(), true);
+        let (tx, mut rx) = ordered_channel::<(Uuid, u64)>("test".to_string(), true);
         // A1 -> deliver
-        tx.send("A".into(), 1, ("A".into(), 1)).unwrap();
-        assert_eq!(drain_try_recv(&mut rx), vec![("A".into(), 1)]);
+        tx.send(session_id_a, 1, (session_id_a, 1)).unwrap();
+        assert_eq!(drain_try_recv(&mut rx), vec![(session_id_a, 1)]);
         verify_empty_buffers(&tx.states);
         // duplicate A1 -> drop even if the message is different.
-        tx.send("A".into(), 1, ("A".into(), 1_000)).unwrap();
+        tx.send(session_id_a, 1, (session_id_a, 1_000)).unwrap();
         assert!(
             drain_try_recv(&mut rx).is_empty(),
             "nothing should be delivered yet"
         );
         verify_empty_buffers(&tx.states);
         // A2 -> deliver
-        tx.send("A".into(), 2, ("A".into(), 2)).unwrap();
-        assert_eq!(drain_try_recv(&mut rx), vec![("A".into(), 2)]);
+        tx.send(session_id_a, 2, (session_id_a, 2)).unwrap();
+        assert_eq!(drain_try_recv(&mut rx), vec![(session_id_a, 2)]);
         verify_empty_buffers(&tx.states);
         // late A1 duplicate -> drop
-        tx.send("A".into(), 1, ("A".into(), 1_001)).unwrap();
+        tx.send(session_id_a, 1, (session_id_a, 1_001)).unwrap();
         assert!(
             drain_try_recv(&mut rx).is_empty(),
             "nothing should be delivered yet"
         );
         verify_empty_buffers(&tx.states);
     }
+
+    #[test]
+    fn test_sequencer_clone() {
+        let sequencer = Sequencer {
+            session_id: Uuid::now_v7(),
+            last_seqs: Arc::new(Mutex::new(HashMap::new())),
+        };
+
+        let actor_id = id!(test[0].test);
+
+        // Modify original sequencer
+        sequencer.assign_seq(&actor_id);
+        sequencer.assign_seq(&actor_id);
+
+        // Clone should share the same state
+        let cloned_sequencer = sequencer.clone();
+        assert_eq!(sequencer.session_id(), cloned_sequencer.session_id(),);
+        assert_eq!(cloned_sequencer.assign_seq(&actor_id), 3);
+    }
+
+    #[test]
+    fn test_sequencer_assign_seq() {
+        let sequencer = Sequencer {
+            session_id: Uuid::now_v7(),
+            last_seqs: Arc::new(Mutex::new(HashMap::new())),
+        };
+
+        let actor_id_0 = id!(worker[0].worker);
+        let actor_id_1 = id!(worker[1].worker);
+
+        // Both actors should start with next_seq = 1
+        assert_eq!(sequencer.assign_seq(&actor_id_0), 1);
+        assert_eq!(sequencer.assign_seq(&actor_id_1), 1);
+
+        // Increment actor_0 twice
+        sequencer.assign_seq(&actor_id_0);
+        sequencer.assign_seq(&actor_id_0);
+
+        // Increment actor_1 once
+        sequencer.assign_seq(&actor_id_1);
+
+        // Check independent sequences
+        assert_eq!(sequencer.assign_seq(&actor_id_0), 4);
+        assert_eq!(sequencer.assign_seq(&actor_id_1), 3);
+    }
 }
Original file line number	Diff line number	Diff line change
`@@ -3282,7 +3282,7 @@ mod tests {`
`3282`	`3282`
`3283`	`3283`	`fn post(cx: &impl context::Actor, port_id: PortId, msg: u64) {`
`3284`	`3284`	`let serialized = Serialized::serialize(&msg).unwrap();`
`3285`		`- port_id.send(cx, &serialized);`
	`3285`	`+ port_id.send(cx, serialized);`
`3286`	`3286`	`}`
`3287`	`3287`
`3288`	`3288`	`#[async_timed_test(timeout_secs = 30)]`