@@ -164,21 +164,29 @@ impl State {
164
164
}
165
165
}
166
166
167
+ /// Actor state used for v1 API.
168
+ #[ derive( Debug ) ]
169
+ struct ActorInstanceState {
170
+ create_rank : usize ,
171
+ spawn : Result < ActorId , anyhow:: Error > ,
172
+ }
173
+
167
174
/// A mesh agent is responsible for managing procs in a [`ProcMesh`].
168
175
#[ derive( Debug ) ]
169
176
#[ hyperactor:: export(
170
177
handlers=[
171
178
MeshAgentMessage ,
172
- resource:: CreateOrUpdate <ActorSpec >,
179
+ resource:: CreateOrUpdate <ActorSpec > { cast = true } ,
173
180
resource:: GetState <ActorState > { cast = true } ,
181
+ resource:: GetRankStatus { cast = true } ,
174
182
]
175
183
) ]
176
184
pub struct ProcMeshAgent {
177
185
proc : Proc ,
178
186
remote : Remote ,
179
187
state : State ,
180
188
/// Actors created and tracked through the resource behavior.
181
- created : HashMap < Name , Result < ActorId , anyhow :: Error > > ,
189
+ actor_states : HashMap < Name , ActorInstanceState > ,
182
190
/// If true, and supervisor is None, record supervision events to be reported
183
191
/// to owning actors later.
184
192
record_supervision_events : bool ,
@@ -203,7 +211,7 @@ impl ProcMeshAgent {
203
211
proc : proc. clone ( ) ,
204
212
remote : Remote :: collect ( ) ,
205
213
state : State :: UnconfiguredV0 { sender } ,
206
- created : HashMap :: new ( ) ,
214
+ actor_states : HashMap :: new ( ) ,
207
215
record_supervision_events : false ,
208
216
supervision_events : HashMap :: new ( ) ,
209
217
} ;
@@ -216,7 +224,7 @@ impl ProcMeshAgent {
216
224
proc : proc. clone ( ) ,
217
225
remote : Remote :: collect ( ) ,
218
226
state : State :: V1 ,
219
- created : HashMap :: new ( ) ,
227
+ actor_states : HashMap :: new ( ) ,
220
228
record_supervision_events : true ,
221
229
supervision_events : HashMap :: new ( ) ,
222
230
} ;
@@ -442,10 +450,10 @@ pub struct ActorState {
442
450
impl Handler < resource:: CreateOrUpdate < ActorSpec > > for ProcMeshAgent {
443
451
async fn handle (
444
452
& mut self ,
445
- cx : & Context < Self > ,
453
+ _cx : & Context < Self > ,
446
454
create_or_update : resource:: CreateOrUpdate < ActorSpec > ,
447
455
) -> anyhow:: Result < ( ) > {
448
- if self . created . contains_key ( & create_or_update. name ) {
456
+ if self . actor_states . contains_key ( & create_or_update. name ) {
449
457
// There is no update.
450
458
return Ok ( ( ) ) ;
451
459
}
@@ -454,19 +462,63 @@ impl Handler<resource::CreateOrUpdate<ActorSpec>> for ProcMeshAgent {
454
462
actor_type,
455
463
params_data,
456
464
} = create_or_update. spec ;
457
- self . created . insert (
465
+ self . actor_states . insert (
458
466
create_or_update. name . clone ( ) ,
459
- self . remote
460
- . gspawn (
461
- & self . proc ,
462
- & actor_type,
463
- & create_or_update. name . to_string ( ) ,
464
- params_data,
465
- )
466
- . await ,
467
+ ActorInstanceState {
468
+ create_rank : create_or_update. rank . unwrap ( ) ,
469
+ spawn : self
470
+ . remote
471
+ . gspawn (
472
+ & self . proc ,
473
+ & actor_type,
474
+ & create_or_update. name . to_string ( ) ,
475
+ params_data,
476
+ )
477
+ . await ,
478
+ } ,
467
479
) ;
468
480
469
- create_or_update. reply . send ( cx, true ) ?;
481
+ Ok ( ( ) )
482
+ }
483
+ }
484
+
485
+ #[ async_trait]
486
+ impl Handler < resource:: GetRankStatus > for ProcMeshAgent {
487
+ async fn handle (
488
+ & mut self ,
489
+ cx : & Context < Self > ,
490
+ get_rank_status : resource:: GetRankStatus ,
491
+ ) -> anyhow:: Result < ( ) > {
492
+ let ( rank, status) = match self . actor_states . get ( & get_rank_status. name ) {
493
+ Some ( ActorInstanceState {
494
+ spawn : Ok ( actor_id) ,
495
+ create_rank,
496
+ } ) => {
497
+ let supervision_events = self
498
+ . supervision_events
499
+ . get ( actor_id)
500
+ . map_or_else ( Vec :: new, |a| a. clone ( ) ) ;
501
+ (
502
+ * create_rank,
503
+ if supervision_events. is_empty ( ) {
504
+ resource:: Status :: Running
505
+ } else {
506
+ resource:: Status :: Failed ( format ! (
507
+ "because of supervision events: {:?}" ,
508
+ supervision_events
509
+ ) )
510
+ } ,
511
+ )
512
+ }
513
+ Some ( ActorInstanceState {
514
+ spawn : Err ( e) ,
515
+ create_rank,
516
+ } ) => ( * create_rank, resource:: Status :: Failed ( e. to_string ( ) ) ) ,
517
+ // TODO: represent unknown rank
518
+ None => ( usize:: MAX , resource:: Status :: NotExist ) ,
519
+ } ;
520
+
521
+ get_rank_status. reply . send ( cx, ( rank, status) . into ( ) ) ?;
470
522
Ok ( ( ) )
471
523
}
472
524
}
@@ -478,12 +530,11 @@ impl Handler<resource::GetState<ActorState>> for ProcMeshAgent {
478
530
cx : & Context < Self > ,
479
531
get_state : resource:: GetState < ActorState > ,
480
532
) -> anyhow:: Result < ( ) > {
481
- let rank = self
482
- . state
483
- . rank ( )
484
- . ok_or_else ( || anyhow:: anyhow!( "tried to get status of unconfigured proc" ) ) ?;
485
- let state = match self . created . get ( & get_state. name ) {
486
- Some ( Ok ( actor_id) ) => {
533
+ let state = match self . actor_states . get ( & get_state. name ) {
534
+ Some ( ActorInstanceState {
535
+ create_rank,
536
+ spawn : Ok ( actor_id) ,
537
+ } ) => {
487
538
let supervision_events = self
488
539
. supervision_events
489
540
. get ( actor_id)
@@ -501,12 +552,12 @@ impl Handler<resource::GetState<ActorState>> for ProcMeshAgent {
501
552
status,
502
553
state : Some ( ActorState {
503
554
actor_id : actor_id. clone ( ) ,
504
- create_rank : rank ,
555
+ create_rank : * create_rank ,
505
556
supervision_events,
506
557
} ) ,
507
558
}
508
559
}
509
- Some ( Err ( e) ) => resource:: State {
560
+ Some ( ActorInstanceState { spawn : Err ( e) , .. } ) => resource:: State {
510
561
name : get_state. name . clone ( ) ,
511
562
status : resource:: Status :: Failed ( e. to_string ( ) ) ,
512
563
state : None ,
0 commit comments