@@ -275,6 +275,10 @@ func (d *defaultManager) RunWorkloadDetached(ctx context.Context, runConfig *run
275
275
276
276
// Start the detached process
277
277
if err := detachedCmd .Start (); err != nil {
278
+ // If the start failed, we need to set the status to error before returning.
279
+ if err := d .statuses .SetWorkloadStatus (ctx , runConfig .BaseName , rt .WorkloadStatusError , "" ); err != nil {
280
+ logger .Warnf ("Failed to set workload %s status to error: %v" , runConfig .BaseName , err )
281
+ }
278
282
return fmt .Errorf ("failed to start detached process: %v" , err )
279
283
}
280
284
@@ -444,79 +448,130 @@ func (d *defaultManager) RestartWorkloads(ctx context.Context, names []string, f
444
448
445
449
for _ , name := range names {
446
450
group .Go (func () error {
447
- // Create a child context with a longer timeout
448
- childCtx , cancel := context . WithTimeout ( context . Background (), AsyncOperationTimeout )
449
- defer cancel ()
451
+ return d . restartSingleWorkload ( ctx , name , foreground )
452
+ } )
453
+ }
450
454
451
- // NOTE: Once we have the status manager implemented, we can use it
452
- // to ensure that the workload exists and is in the `stopped` state
453
- // before restarting.
454
- var containerBaseName string
455
- var running bool
456
- // Try to find the container.
457
- container , err := d .runtime .GetWorkloadInfo (childCtx , name )
458
- if err != nil {
459
- if errors .Is (err , rt .ErrWorkloadNotFound ) {
460
- logger .Warnf ("Warning: Failed to find container: %v" , err )
461
- logger .Warnf ("Trying to find state with name %s directly..." , name )
462
-
463
- // Try to use the provided name as the base name
464
- containerBaseName = name
465
- running = false
466
- } else {
467
- return fmt .Errorf ("failed to find workload %s: %v" , name , err )
468
- }
469
- } else {
470
- // Container found, check if it's running and get the base name,
471
- running = container .IsRunning ()
472
- containerBaseName = labels .GetContainerBaseName (container .Labels )
473
- }
455
+ return group , nil
456
+ }
474
457
475
- // Check if the proxy process is running
476
- proxyRunning := proxy .IsRunning (containerBaseName )
458
+ // restartSingleWorkload handles the restart logic for a single workload
459
+ func (d * defaultManager ) restartSingleWorkload (ctx context.Context , name string , foreground bool ) error {
460
+ // Create a child context with a longer timeout
461
+ childCtx , cancel := context .WithTimeout (context .Background (), AsyncOperationTimeout )
462
+ defer cancel ()
477
463
478
- if running && proxyRunning {
479
- logger .Infof ("Container %s and proxy are already running" , name )
480
- return nil
481
- }
464
+ // Get workload state information
465
+ workloadState , err := d .getWorkloadState (childCtx , name )
466
+ if err != nil {
467
+ return err
468
+ }
482
469
483
- // Load the configuration from the state store
484
- // This is done synchronously since it is relatively inexpensive operation
485
- // and it allows for better error handling.
486
- mcpRunner , err := d .loadRunnerFromState (childCtx , containerBaseName )
487
- if err != nil {
488
- // TODO: If the state file has gone missing, we should delete
489
- // the workload - since there is no chance of recovery.
490
- return fmt .Errorf ("failed to load state for %s: %v" , containerBaseName , err )
491
- }
470
+ // Check if already running
471
+ if d .isWorkloadAlreadyRunning (name , workloadState ) {
472
+ return nil
473
+ }
492
474
493
- // At this point we're sure that the workload exists but is not running.
494
- // Transition workload to `starting` state.
495
- if err := d .statuses .SetWorkloadStatus (ctx , name , rt .WorkloadStatusStarting , "" ); err != nil {
496
- logger .Warnf ("Failed to set workload %s status to starting: %v" , name , err )
497
- }
498
- logger .Infof ("Loaded configuration from state for %s" , containerBaseName )
475
+ // Load runner configuration from state
476
+ mcpRunner , err := d .loadRunnerFromState (childCtx , workloadState .BaseName )
477
+ if err != nil {
478
+ return fmt .Errorf ("failed to load state for %s: %v" , workloadState .BaseName , err )
479
+ }
499
480
500
- // Run the tooling server inside a detached process.
501
- logger .Infof ("Starting tooling server %s..." , name )
481
+ // Set workload status to starting
482
+ if err := d .statuses .SetWorkloadStatus (ctx , name , rt .WorkloadStatusStarting , "" ); err != nil {
483
+ logger .Warnf ("Failed to set workload %s status to starting: %v" , name , err )
484
+ }
485
+ logger .Infof ("Loaded configuration from state for %s" , workloadState .BaseName )
502
486
503
- // If the container is running but the proxy is not, stop the container first
504
- if running { // && !proxyRunning was previously here but is implied by previous if statement.
505
- logger .Infof ("Container %s is running but proxy is not. Stopping container..." , name )
506
- if err = d .runtime .StopWorkload (childCtx , name ); err != nil {
507
- return fmt .Errorf ("failed to stop container %s: %v" , name , err )
508
- }
509
- logger .Infof ("Container %s stopped" , name )
510
- }
487
+ // Stop container if running but proxy is not
488
+ if err := d .stopContainerIfNeeded (childCtx , ctx , name , workloadState ); err != nil {
489
+ return err
490
+ }
511
491
512
- if foreground {
513
- return d .RunWorkload (ctx , mcpRunner .Config )
514
- }
515
- return d .RunWorkloadDetached (ctx , mcpRunner .Config )
516
- })
492
+ // Start the workload
493
+ return d .startWorkload (ctx , name , mcpRunner , foreground )
494
+ }
495
+
496
+ // workloadState holds the current state of a workload for restart operations
497
+ type workloadState struct {
498
+ BaseName string
499
+ Running bool
500
+ ProxyRunning bool
501
+ }
502
+
503
+ // getWorkloadState retrieves the current state of a workload
504
+ func (d * defaultManager ) getWorkloadState (ctx context.Context , name string ) (* workloadState , error ) {
505
+ workloadSt := & workloadState {}
506
+
507
+ // Try to find the container
508
+ container , err := d .runtime .GetWorkloadInfo (ctx , name )
509
+ if err != nil {
510
+ if errors .Is (err , rt .ErrWorkloadNotFound ) {
511
+ logger .Warnf ("Warning: Failed to find container: %v" , err )
512
+ logger .Warnf ("Trying to find state with name %s directly..." , name )
513
+ // Try to use the provided name as the base name
514
+ workloadSt .BaseName = name
515
+ workloadSt .Running = false
516
+ } else {
517
+ return nil , fmt .Errorf ("failed to find workload %s: %v" , name , err )
518
+ }
519
+ } else {
520
+ // Container found, check if it's running and get the base name
521
+ workloadSt .Running = container .IsRunning ()
522
+ workloadSt .BaseName = labels .GetContainerBaseName (container .Labels )
517
523
}
518
524
519
- return group , nil
525
+ // Check if the proxy process is running
526
+ workloadSt .ProxyRunning = proxy .IsRunning (workloadSt .BaseName )
527
+
528
+ return workloadSt , nil
529
+ }
530
+
531
+ // isWorkloadAlreadyRunning checks if the workload is already fully running
532
+ func (* defaultManager ) isWorkloadAlreadyRunning (name string , workloadSt * workloadState ) bool {
533
+ if workloadSt .Running && workloadSt .ProxyRunning {
534
+ logger .Infof ("Container %s and proxy are already running" , name )
535
+ return true
536
+ }
537
+ return false
538
+ }
539
+
540
+ // stopContainerIfNeeded stops the container if it's running but proxy is not
541
+ func (d * defaultManager ) stopContainerIfNeeded (childCtx , ctx context.Context , name string , workloadSt * workloadState ) error {
542
+ if ! workloadSt .Running {
543
+ return nil
544
+ }
545
+
546
+ logger .Infof ("Container %s is running but proxy is not. Stopping container..." , name )
547
+ if err := d .runtime .StopWorkload (childCtx , name ); err != nil {
548
+ if statusErr := d .statuses .SetWorkloadStatus (ctx , name , rt .WorkloadStatusError , "" ); statusErr != nil {
549
+ logger .Warnf ("Failed to set workload %s status to error: %v" , name , statusErr )
550
+ }
551
+ return fmt .Errorf ("failed to stop container %s: %v" , name , err )
552
+ }
553
+ logger .Infof ("Container %s stopped" , name )
554
+ return nil
555
+ }
556
+
557
+ // startWorkload starts the workload in either foreground or background mode
558
+ func (d * defaultManager ) startWorkload (ctx context.Context , name string , mcpRunner * runner.Runner , foreground bool ) error {
559
+ logger .Infof ("Starting tooling server %s..." , name )
560
+
561
+ var err error
562
+ if foreground {
563
+ err = d .RunWorkload (ctx , mcpRunner .Config )
564
+ } else {
565
+ err = d .RunWorkloadDetached (ctx , mcpRunner .Config )
566
+ }
567
+
568
+ if err != nil {
569
+ // If we could not start the workload, set the status to error before returning
570
+ if statusErr := d .statuses .SetWorkloadStatus (ctx , name , rt .WorkloadStatusError , "" ); statusErr != nil {
571
+ logger .Warnf ("Failed to set workload %s status to error: %v" , name , statusErr )
572
+ }
573
+ }
574
+ return err
520
575
}
521
576
522
577
// TODO: Move to dedicated config management interface.
0 commit comments