@@ -359,22 +359,37 @@ def from_dict(cls, d, **kwargs):
359
359
if attr in d :
360
360
setattr (self , attr , d [attr ])
361
361
362
+ cluster_key = ClusterManager ._cluster_key (self )
363
+
362
364
if d .get ("controller" ):
363
365
controller_info = d ["controller" ]
364
366
cls = self .controller_launcher_class = import_item (controller_info ["class" ])
365
367
if controller_info ["state" ]:
366
- self .controller = cls .from_dict (controller_info ["state" ], parent = self )
368
+ try :
369
+ self .controller = cls .from_dict (
370
+ controller_info ["state" ], parent = self
371
+ )
372
+ except launcher .NotRunning as e :
373
+ self .log .error (f"Controller for { cluster_key } not running: { e } " )
367
374
368
375
engine_info = d .get ("engines" )
369
376
if engine_info :
370
377
cls = self .engine_launcher_class = import_item (engine_info ["class" ])
371
378
for engine_set_id , engine_state in engine_info .get ("sets" , {}).items ():
372
- self .engines [engine_set_id ] = cls .from_dict (
373
- engine_state ,
374
- engine_set_id = engine_set_id ,
375
- parent = self ,
376
- )
377
-
379
+ try :
380
+ self .engines [engine_set_id ] = cls .from_dict (
381
+ engine_state ,
382
+ engine_set_id = engine_set_id ,
383
+ parent = self ,
384
+ )
385
+ except launcher .NotRunning as e :
386
+ self .log .error (
387
+ f"Engine set { cluster_key } { engine_set_id } not running: { e } "
388
+ )
389
+ # check if state changed
390
+ if self .to_dict () != d :
391
+ # if so, update our cluster file
392
+ self .update_cluster_file ()
378
393
return self
379
394
380
395
@classmethod
@@ -703,7 +718,8 @@ class ClusterManager(LoggingConfigurable):
703
718
704
719
_clusters = Dict (help = "My cluster objects" )
705
720
706
- def _cluster_key (self , cluster ):
721
+ @staticmethod
722
+ def _cluster_key (cluster ):
707
723
"""Return a unique cluster key for a cluster
708
724
709
725
Default is {profile}:{cluster_id}
0 commit comments