Skip to content

Commit f80e528

Browse files
authored
Fix listing and removing workloads in inconsistent state (#1503)
1 parent b3ef9e8 commit f80e528

File tree

2 files changed

+88
-55
lines changed

2 files changed

+88
-55
lines changed

pkg/workloads/manager.go

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -336,30 +336,29 @@ func (d *defaultManager) deleteWorkload(ctx context.Context, name string) error
336336
if err != nil {
337337
return err
338338
}
339-
if container == nil {
340-
return nil // Container not found, but operation should succeed
341-
}
342339

343340
// Set status to removing
344341
if err := d.statuses.SetWorkloadStatus(ctx, name, rt.WorkloadStatusRemoving, ""); err != nil {
345342
logger.Warnf("Failed to set workload %s status to removing: %v", name, err)
346343
}
347344

348-
containerLabels := container.Labels
349-
baseName := labels.GetContainerBaseName(containerLabels)
345+
if container != nil {
346+
containerLabels := container.Labels
347+
baseName := labels.GetContainerBaseName(containerLabels)
350348

351-
// Stop proxy if running
352-
if container.IsRunning() {
353-
d.stopProxyIfNeeded(name, baseName)
354-
}
349+
// Stop proxy if running
350+
if container.IsRunning() {
351+
d.stopProxyIfNeeded(name, baseName)
352+
}
355353

356-
// Remove the container
357-
if err := d.removeContainer(childCtx, ctx, name); err != nil {
358-
return err
359-
}
354+
// Remove the container
355+
if err := d.removeContainer(childCtx, ctx, name); err != nil {
356+
return err
357+
}
360358

361-
// Clean up associated resources
362-
d.cleanupWorkloadResources(childCtx, name, baseName)
359+
// Clean up associated resources
360+
d.cleanupWorkloadResources(childCtx, name, baseName)
361+
}
363362

364363
// Remove the workload status from the status store
365364
if err := d.statuses.DeleteWorkloadStatus(ctx, name); err != nil {
@@ -375,7 +374,7 @@ func (d *defaultManager) getWorkloadContainer(childCtx, ctx context.Context, nam
375374
if err != nil {
376375
if errors.Is(err, rt.ErrWorkloadNotFound) {
377376
// Log but don't fail the entire operation for not found containers
378-
logger.Warnf("Warning: Failed to delete workload %s: %v", name, err)
377+
logger.Warnf("Warning: Failed to get workload %s: %v", name, err)
379378
return nil, nil
380379
}
381380
if statusErr := d.statuses.SetWorkloadStatus(ctx, name, rt.WorkloadStatusError, err.Error()); statusErr != nil {

pkg/workloads/statuses/file_status.go

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -128,45 +128,7 @@ func (f *fileStatusManager) ListWorkloads(ctx context.Context, listAll bool, lab
128128
}
129129

130130
// Create a map of runtime workloads by name for easy lookup
131-
runtimeWorkloadMap := make(map[string]rt.ContainerInfo)
132-
for _, container := range runtimeContainers {
133-
runtimeWorkloadMap[container.Name] = container
134-
}
135-
136-
// Create result map to avoid duplicates and merge data
137-
workloadMap := make(map[string]core.Workload)
138-
139-
// First, add all runtime workloads
140-
for _, container := range runtimeContainers {
141-
workload, err := types.WorkloadFromContainerInfo(&container)
142-
if err != nil {
143-
logger.Warnf("failed to convert container info for workload %s: %v", container.Name, err)
144-
continue
145-
}
146-
workloadMap[container.Name] = workload
147-
}
148-
149-
// Then, merge with file workloads, validating running workloads
150-
for name, fileWorkload := range fileWorkloads {
151-
if runtimeContainer, exists := runtimeWorkloadMap[name]; exists {
152-
// Validate running workloads similar to GetWorkload
153-
validatedWorkload, err := f.validateWorkloadInList(ctx, name, fileWorkload, runtimeContainer)
154-
if err != nil {
155-
logger.Warnf("failed to validate workload %s in list: %v", name, err)
156-
// Fall back to basic merge without validation
157-
runtimeWorkload := workloadMap[name]
158-
runtimeWorkload.Status = fileWorkload.Status
159-
runtimeWorkload.StatusContext = fileWorkload.StatusContext
160-
runtimeWorkload.CreatedAt = fileWorkload.CreatedAt
161-
workloadMap[name] = runtimeWorkload
162-
} else {
163-
workloadMap[name] = validatedWorkload
164-
}
165-
} else {
166-
// File-only workload (runtime not available)
167-
workloadMap[name] = fileWorkload
168-
}
169-
}
131+
workloadMap := f.mergeRuntimeAndFileWorkloads(ctx, runtimeContainers, fileWorkloads)
170132

171133
// Convert map to slice and apply filters
172134
var workloads []core.Workload
@@ -510,6 +472,25 @@ func (f *fileStatusManager) handleRuntimeMismatch(
510472
return runtimeResult, nil
511473
}
512474

475+
// handleRuntimeMissing handles the case where the file indicates running or stopped but the runtime
476+
// does not have the workload running. This can happen if using different versions of ToolHive, for example
477+
// the CLI and UI have different versions.
478+
func (f *fileStatusManager) handleRuntimeMissing(
479+
ctx context.Context, workloadName string, fileWorkload core.Workload,
480+
) (core.Workload, error) {
481+
if fileWorkload.Status == rt.WorkloadStatusRunning || fileWorkload.Status == rt.WorkloadStatusStopped {
482+
// The workload cannot be running or stopped if the runtime container is not found
483+
contextMsg := fmt.Sprintf("workload %s not found in runtime, marking as unhealthy", workloadName)
484+
if err := f.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusUnhealthy, contextMsg); err != nil {
485+
return core.Workload{}, err
486+
}
487+
fileWorkload.Status = rt.WorkloadStatusUnhealthy
488+
}
489+
490+
// If the workload has another status, like starting or stopping, we can keep it as is
491+
return fileWorkload, nil
492+
}
493+
513494
// checkProxyHealth checks if the proxy process is running for the workload.
514495
// Returns (unhealthyWorkload, true) if proxy is not running, (emptyWorkload, false) if proxy is healthy or not applicable.
515496
func (f *fileStatusManager) checkProxyHealth(
@@ -592,3 +573,56 @@ func (f *fileStatusManager) validateWorkloadInList(
592573
// Runtime and proxy confirm workload is healthy - merge runtime data with file status
593574
return f.mergeHealthyWorkloadData(containerInfo, fileWorkload)
594575
}
576+
577+
// mergeRuntimeAndFileWorkloads returns a map of workloads that combines runtime containers and file-based workloads.
578+
func (f *fileStatusManager) mergeRuntimeAndFileWorkloads(
579+
ctx context.Context,
580+
runtimeContainers []rt.ContainerInfo,
581+
fileWorkloads map[string]core.Workload,
582+
) map[string]core.Workload {
583+
runtimeWorkloadMap := make(map[string]rt.ContainerInfo)
584+
for _, container := range runtimeContainers {
585+
runtimeWorkloadMap[container.Name] = container
586+
}
587+
588+
// Create result map to avoid duplicates and merge data
589+
workloadMap := make(map[string]core.Workload)
590+
591+
// First, add all runtime workloads
592+
for _, container := range runtimeContainers {
593+
workload, err := types.WorkloadFromContainerInfo(&container)
594+
if err != nil {
595+
logger.Warnf("failed to convert container info for workload %s: %v", container.Name, err)
596+
continue
597+
}
598+
workloadMap[container.Name] = workload
599+
}
600+
601+
// Then, merge with file workloads, validating running workloads
602+
for name, fileWorkload := range fileWorkloads {
603+
if runtimeContainer, exists := runtimeWorkloadMap[name]; exists {
604+
// Validate running workloads similar to GetWorkload
605+
validatedWorkload, err := f.validateWorkloadInList(ctx, name, fileWorkload, runtimeContainer)
606+
if err != nil {
607+
logger.Warnf("failed to validate workload %s in list: %v", name, err)
608+
// Fall back to basic merge without validation
609+
runtimeWorkload := workloadMap[name]
610+
runtimeWorkload.Status = fileWorkload.Status
611+
runtimeWorkload.StatusContext = fileWorkload.StatusContext
612+
runtimeWorkload.CreatedAt = fileWorkload.CreatedAt
613+
workloadMap[name] = runtimeWorkload
614+
} else {
615+
workloadMap[name] = validatedWorkload
616+
}
617+
} else {
618+
// File-only workload (runtime not available)
619+
updatedWorkload, err := f.handleRuntimeMissing(ctx, name, fileWorkload)
620+
if err != nil {
621+
logger.Warnf("failed to handle missing runtime for workload %s: %v", name, err)
622+
workloadMap[name] = fileWorkload
623+
}
624+
workloadMap[name] = updatedWorkload
625+
}
626+
}
627+
return workloadMap
628+
}

0 commit comments

Comments
 (0)