Skip to content

Commit 3d7895d

Browse files
pablorfb-metafacebook-github-bot
authored andcommitted
Fix number of messages being waited on (#855)
Summary: Pull Request resolved: #855 Previously we were not waiting for all messages being casted (missing GPU) Unsuprisingly this causes the job to hang Reviewed By: pzhan9, vidhyav Differential Revision: D80187064 fbshipit-source-id: 2ea9da39645b4c21d5e37ccd0126b4fc4d1ee3d4
1 parent b0b68af commit 3d7895d

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

hyperactor_mesh/benches/main.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use tokio::runtime::Runtime;
3333
fn bench_actor_scaling(c: &mut Criterion) {
3434
let mut group = c.benchmark_group("actor_scaling");
3535
let host_counts = vec![1, 10, 100];
36+
let gpus = 1;
3637
let message_size = 1024; // Fixed message size (1KB)
3738
group.sample_size(10);
3839
group.sampling_mode(criterion::SamplingMode::Flat);
@@ -43,7 +44,7 @@ fn bench_actor_scaling(c: &mut Criterion) {
4344
b.iter_custom(|iters| async move {
4445
let alloc = LocalAllocator
4546
.allocate(AllocSpec {
46-
extent: extent!(hosts = host_count, gpus = 8),
47+
extent: extent!(hosts = host_count, gpus = gpus),
4748
constraints: Default::default(),
4849
})
4950
.await
@@ -74,13 +75,17 @@ fn bench_actor_scaling(c: &mut Criterion) {
7475
.unwrap();
7576

7677
let mut msg_rcv = 0;
77-
while msg_rcv < host_count {
78-
let _ = rx.recv().await.unwrap();
78+
while msg_rcv < host_count * gpus {
79+
let _ = tokio::time::timeout(Duration::from_secs(10), rx.recv())
80+
.await
81+
.unwrap();
82+
7983
msg_rcv += 1;
8084
}
8185
}
8286

8387
let elapsed = start.elapsed();
88+
println!("Elapsed: {:?} on iters {}", elapsed, iters);
8489
proc_mesh
8590
.events()
8691
.unwrap()
@@ -89,7 +94,7 @@ fn bench_actor_scaling(c: &mut Criterion) {
8994
.await
9095
.expect("Failed to stop allocator");
9196
elapsed
92-
});
97+
})
9398
});
9499
}
95100

@@ -167,7 +172,9 @@ fn bench_actor_mesh_message_sizes(c: &mut Criterion) {
167172

168173
let mut msg_rcv = 0;
169174
while msg_rcv < actor_count {
170-
let _ = rx.recv().await.unwrap();
175+
let _ = tokio::time::timeout(Duration::from_secs(10), rx.recv())
176+
.await
177+
.unwrap();
171178
msg_rcv += 1;
172179
}
173180
}

0 commit comments

Comments
 (0)