Actor throuput regression test (#749)

pablorfb-meta · facebook-github-bot · commit 3105c568c04c · 2025-08-04T19:29:16.000-07:00
Summary: Pull Request resolved: #749 Measures how long it takes to cast and reply to 1Kb to N hosts with 8 actors via local transport Added artificial processing time of 100ms ``` trainer_mesh .cast( all(true_()), StepMessage { step: i as usize, reply: tx.bind(), payload, }, ) .unwrap(); let mut msg_rcv = 0; while msg_rcv < actor_count { let _ = rx.recv().await.unwrap(); msg_rcv += 1; } ``` Reviewed By: moonli Differential Revision: D79406183 fbshipit-source-id: dad719a0fd2e4288341c0fd8f6bbea607cd123ac
diff --git a/hyperactor_mesh/Cargo.toml b/hyperactor_mesh/Cargo.toml
@@ -1,4 +1,4 @@
-# @generated by autocargo from //monarch/hyperactor_mesh:[hyperactor_mesh,hyperactor_mesh_test_bootstrap,hyperactor_mesh_test_remote_process_alloc,hyperactor_mesh_test_remote_process_allocator,process_allocator_cleanup,process_allocator_test_bin,process_allocator_test_bootstrap]
+# @generated by autocargo from //monarch/hyperactor_mesh:[benchmarks,hyperactor_mesh,hyperactor_mesh_test_bootstrap,hyperactor_mesh_test_remote_process_alloc,hyperactor_mesh_test_remote_process_allocator,process_allocator_cleanup,process_allocator_test_bin,process_allocator_test_bootstrap]
 
 [package]
 name = "hyperactor_mesh"
@@ -7,6 +7,10 @@ authors = ["Meta"]
 edition = "2021"
 license = "BSD-3-Clause"
 
+[[bin]]
+name = "benchmarks"
+path = "benches/main.rs"
+
 [[bin]]
 name = "hyperactor_mesh_test_bootstrap"
 path = "test/bootstrap.rs"
@@ -39,6 +43,7 @@ bitmaps = "3.2.1"
 buck-resources = "1"
 chrono = { version = "0.4.41", features = ["clock", "serde", "std"], default-features = false }
 clap = { version = "4.5.41", features = ["derive", "env", "string", "unicode", "wrap_help"] }
+criterion = { version = "0.5.1", features = ["async_tokio", "csv_output"] }
 dashmap = { version = "5.5.3", features = ["rayon", "serde"] }
 enum-as-inner = "0.6.0"
 erased-serde = "0.3.27"
diff --git a/hyperactor_mesh/benches/bench_actor.rs b/hyperactor_mesh/benches/bench_actor.rs
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+use std::time::Duration;
+
+use anyhow::Result;
+use async_trait::async_trait;
+use hyperactor::Actor;
+use hyperactor::Bind;
+use hyperactor::Context;
+use hyperactor::Handler;
+use hyperactor::Named;
+use hyperactor::PortRef;
+use hyperactor::Unbind;
+use hyperactor::clock::Clock;
+use serde::Deserialize;
+use serde::Serialize;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Named, Bind, Unbind)]
+pub struct BenchMessage {
+    pub step: usize,
+    pub reply: PortRef<usize>,
+    #[serde(with = "serde_bytes")]
+    pub payload: Vec<u8>,
+}
+
+#[derive(Debug)]
+#[hyperactor::export(
+    spawn = true,
+    handlers = [
+        BenchMessage { cast = true },
+    ],
+)]
+pub struct BenchActor {}
+
+#[async_trait]
+impl Actor for BenchActor {
+    type Params = ();
+
+    async fn new(_: Self::Params) -> Result<Self, anyhow::Error> {
+        Ok(Self {})
+    }
+}
+
+#[async_trait]
+impl Handler<BenchMessage> for BenchActor {
+    async fn handle(
+        &mut self,
+        ctx: &Context<Self>,
+        msg: BenchMessage,
+    ) -> Result<(), anyhow::Error> {
+        hyperactor::clock::ClockKind::default()
+            .sleep(Duration::from_millis(100))
+            .await;
+
+        let _ = msg.reply.send(ctx, msg.step);
+        Ok(())
+    }
+}
diff --git a/hyperactor_mesh/benches/main.rs b/hyperactor_mesh/benches/main.rs
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+use std::time::Instant;
+
+use criterion::BenchmarkId;
+use criterion::Criterion;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use hyperactor_mesh::ProcMesh;
+use hyperactor_mesh::actor_mesh::ActorMesh;
+use hyperactor_mesh::actor_mesh::RootActorMesh;
+use hyperactor_mesh::alloc::AllocSpec;
+use hyperactor_mesh::alloc::Allocator;
+use hyperactor_mesh::alloc::LocalAllocator;
+use hyperactor_mesh::selection::dsl::all;
+use hyperactor_mesh::selection::dsl::true_;
+use hyperactor_mesh::shape;
+
+mod bench_actor;
+use bench_actor::BenchActor;
+use bench_actor::BenchMessage;
+use tokio::runtime::Runtime;
+
+// Benchmark how long does it take to process 1KB message on 1, 10, 100, 1K hosts with 8 GPUs each
+fn bench_actor_scaling(c: &mut Criterion) {
+    let mut group = c.benchmark_group("actor_scaling");
+    let host_counts = vec![1, 10, 100, 1000];
+    let message_size = 1024; // Fixed message size (1KB)
+    group.sample_size(10);
+    group.sampling_mode(criterion::SamplingMode::Flat);
+
+    for host_count in host_counts {
+        group.bench_function(BenchmarkId::from_parameter(host_count), |b| {
+            let mut b = b.to_async(Runtime::new().unwrap());
+            b.iter_custom(|iters| async move {
+                let shape = shape! {  hosts=host_count, gpus=8 };
+                let alloc = LocalAllocator
+                    .allocate(AllocSpec {
+                        shape: shape.clone(),
+                        constraints: Default::default(),
+                    })
+                    .await
+                    .unwrap();
+
+                let proc_mesh = ProcMesh::allocate(alloc).await.unwrap();
+                let trainer_mesh: RootActorMesh<BenchActor> =
+                    proc_mesh.spawn("trainer", &()).await.unwrap();
+                let client = proc_mesh.client();
+
+                let start = Instant::now();
+                for i in 0..iters {
+                    let (tx, mut rx) = client.open_port();
+                    let payload = vec![0u8; message_size];
+
+                    trainer_mesh
+                        .cast(
+                            all(true_()),
+                            BenchMessage {
+                                step: i as usize,
+                                reply: tx.bind(),
+                                payload,
+                            },
+                        )
+                        .unwrap();
+
+                    let mut msg_rcv = 0;
+                    while msg_rcv < host_count {
+                        let _ = rx.recv().await.unwrap();
+                        msg_rcv += 1;
+                    }
+                }
+
+                start.elapsed()
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_actor_scaling);
+criterion_main!(benches);