Skip to content

Commit 06e0c36

Browse files
committed
[monarch][supervision] Increase GetState::<ActorState> default timeout and make it configurable
The current timeout of 1 second for `GetState::<ActorState>` for supervision was causing at least one test to fail. This diff makes the value configurable, and also increases the default to 30 seconds. The failing test now passes. Example test failure that shows the supervision timeout: P1984316048 Differential Revision: [D84232284](https://our.internmc.facebook.com/intern/diff/D84232284/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D84232284/)! ghstack-source-id: 315062680 Pull Request resolved: #1474
1 parent 9700834 commit 06e0c36

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

hyperactor_mesh/src/v1/proc_mesh.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ declare_attrs! {
7272
/// The maximum idle time between updates while spawning actor meshes.
7373
@meta(CONFIG_ENV_VAR = "HYPERACTOR_MESH_ACTOR_SPAWN_MAX_IDLE".to_string())
7474
pub attr ACTOR_SPAWN_MAX_IDLE: Duration = Duration::from_secs(30);
75+
76+
@meta(CONFIG_ENV_VAR = "HYPERACTOR_MESH_GET_ACTOR_STATE_MAX_IDLE".to_string())
77+
pub attr GET_ACTOR_STATE_MAX_IDLE: Duration = Duration::from_secs(30);
7578
}
7679

7780
/// A reference to a single [`hyperactor::Proc`].
@@ -533,7 +536,9 @@ impl ProcMeshRef {
533536
// the agent will be unresponsive.
534537
// We handle this by setting a timeout on the recv, and if we don't get a
535538
// message we assume the agent is dead and return a failed state.
536-
let state = RealClock.timeout(Duration::from_secs(1), rx.recv()).await;
539+
let state = RealClock
540+
.timeout(config::global::get(GET_ACTOR_STATE_MAX_IDLE), rx.recv())
541+
.await;
537542
if let Ok(state) = state {
538543
// Handle non-timeout receiver error.
539544
let state = state?;

0 commit comments

Comments
 (0)