Skip to content

Commit fcf408c

Browse files
authored
Make workload deletion asynchronous (#738)
This copies the same behaviour added recently for stop. This change also fixes a bug where the deletion operation did not clean up the ingress/egress containers. While looking at the runtime code, I discovered that the runtime does a force delete of containers, so there is no need to explicitly stop the container before force deleting.
1 parent 0bdd54f commit fcf408c

File tree

3 files changed

+51
-66
lines changed

3 files changed

+51
-66
lines changed

cmd/thv/app/rm.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ func rmCmdFunc(cmd *cobra.Command, args []string) error {
2929
}
3030

3131
// Delete container.
32-
if err := manager.DeleteWorkload(ctx, containerName); err != nil {
32+
group, err := manager.DeleteWorkload(ctx, containerName)
33+
if err != nil {
34+
return fmt.Errorf("failed to delete container: %v", err)
35+
}
36+
37+
// Wait for the deletion to complete.
38+
if err := group.Wait(); err != nil {
3339
return fmt.Errorf("failed to delete container: %v", err)
3440
}
3541

pkg/api/v1/workloads.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func (s *WorkloadRoutes) stopWorkload(w http.ResponseWriter, r *http.Request) {
124124
ctx := r.Context()
125125
name := chi.URLParam(r, "name")
126126
// Note that this is an asynchronous operation.
127-
// In the API, we do not wait for the operation to complete.
127+
// The request is not blocked on completion.
128128
_, err := s.manager.StopWorkload(ctx, name)
129129
if err != nil {
130130
if errors.Is(err, workloads.ErrContainerNotFound) {
@@ -174,7 +174,9 @@ func (s *WorkloadRoutes) stopAllWorkloads(w http.ResponseWriter, r *http.Request
174174
func (s *WorkloadRoutes) deleteWorkload(w http.ResponseWriter, r *http.Request) {
175175
ctx := r.Context()
176176
name := chi.URLParam(r, "name")
177-
err := s.manager.DeleteWorkload(ctx, name)
177+
// Note that this is an asynchronous operation.
178+
// The request is not blocked on completion.
179+
_, err := s.manager.DeleteWorkload(ctx, name)
178180
if err != nil {
179181
if errors.Is(err, workloads.ErrContainerNotFound) {
180182
http.Error(w, "Workload not found", http.StatusNotFound)

pkg/workloads/manager.go

Lines changed: 40 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ type Manager interface {
2929
GetWorkload(ctx context.Context, name string) (Workload, error)
3030
// ListWorkloads lists all ToolHive-managed containers.
3131
ListWorkloads(ctx context.Context, listAll bool) ([]Workload, error)
32-
// DeleteWorkload deletes a container and its associated proxy process.
33-
// The container will be stopped if it is still running.
34-
DeleteWorkload(ctx context.Context, name string) error
32+
// DeleteWorkload deletes a container and all associated processes/containers.
33+
// It is implemented as an asynchronous operation which returns an errgroup.Group
34+
DeleteWorkload(ctx context.Context, name string) (*errgroup.Group, error)
3535
// StopWorkload stops the named workload.
36-
// It is implemented as an asynchronous operation which returns a errgroup.Group
36+
// It is implemented as an asynchronous operation which returns an errgroup.Group
3737
StopWorkload(ctx context.Context, name string) (*errgroup.Group, error)
3838
// StopAllWorkloads stops all running workloads.
3939
// It is implemented as an asynchronous operation which returns an errgroup.Group
@@ -101,66 +101,55 @@ func (d *defaultManager) ListWorkloads(ctx context.Context, listAll bool) ([]Wor
101101
return workloads, nil
102102
}
103103

104-
func (d *defaultManager) DeleteWorkload(ctx context.Context, name string) error {
104+
func (d *defaultManager) DeleteWorkload(ctx context.Context, name string) (*errgroup.Group, error) {
105105
// We need several fields from the container struct for deletion.
106106
container, err := d.findContainerByName(ctx, name)
107107
if err != nil {
108-
return err
108+
return nil, err
109109
}
110110

111111
containerID := container.ID
112-
isRunning := isContainerRunning(container)
113112
containerLabels := container.Labels
114-
115-
// Check if the container is running
116-
if isRunning {
117-
// Get the base container name for proxy stopping
118-
containerBaseName := labels.GetContainerBaseName(containerLabels)
119-
120-
// Stop the proxy process first (like StopWorkload does)
121-
if containerBaseName != "" {
122-
proxy.StopProcess(containerBaseName)
123-
}
124-
125-
// Stop the container if it's running
126-
if err := d.stopContainer(ctx, containerID, name); err != nil {
127-
return fmt.Errorf("failed to stop container: %v", err)
128-
}
129-
}
130-
131-
// Remove the container
132-
logger.Infof("Removing container %s...", name)
133-
if err := d.runtime.RemoveWorkload(ctx, containerID); err != nil {
134-
return fmt.Errorf("failed to remove container: %v", err)
135-
}
136-
137-
// Get the base name from the container labels
138113
baseName := labels.GetContainerBaseName(containerLabels)
139-
if baseName != "" {
140-
// Clean up temporary permission profile before deleting saved state
141-
if err := d.cleanupTempPermissionProfile(ctx, baseName); err != nil {
142-
logger.Warnf("Warning: Failed to cleanup temporary permission profile: %v", err)
143-
}
144114

145-
// Delete the saved state if it exists
146-
if err := runner.DeleteSavedConfig(ctx, baseName); err != nil {
147-
logger.Warnf("Warning: Failed to delete saved state: %v", err)
148-
} else {
149-
logger.Infof("Saved state for %s removed", baseName)
115+
// Create second errorgroup for deletion.
116+
deleteGroup := &errgroup.Group{}
117+
deleteGroup.Go(func() error {
118+
// Remove the container
119+
logger.Infof("Removing container %s...", name)
120+
if err := d.runtime.RemoveWorkload(ctx, containerID); err != nil {
121+
return fmt.Errorf("failed to remove container: %v", err)
150122
}
151123

152-
logger.Infof("Container %s removed", name)
124+
// Get the base name from the container labels
125+
if baseName != "" {
126+
// Clean up temporary permission profile before deleting saved state
127+
if err := d.cleanupTempPermissionProfile(ctx, baseName); err != nil {
128+
logger.Warnf("Warning: Failed to cleanup temporary permission profile: %v", err)
129+
}
153130

154-
if shouldRemoveClientConfig() {
155-
if err := removeClientConfigurations(name); err != nil {
156-
logger.Warnf("Warning: Failed to remove client configurations: %v", err)
131+
// Delete the saved state if it exists
132+
if err := runner.DeleteSavedConfig(ctx, baseName); err != nil {
133+
logger.Warnf("Warning: Failed to delete saved state: %v", err)
157134
} else {
158-
logger.Infof("Client configurations for %s removed", name)
135+
logger.Infof("Saved state for %s removed", baseName)
136+
}
137+
138+
logger.Infof("Container %s removed", name)
139+
140+
if shouldRemoveClientConfig() {
141+
if err := removeClientConfigurations(name); err != nil {
142+
logger.Warnf("Warning: Failed to remove client configurations: %v", err)
143+
} else {
144+
logger.Infof("Client configurations for %s removed", name)
145+
}
159146
}
160147
}
161-
}
162148

163-
return nil
149+
return nil
150+
})
151+
152+
return deleteGroup, nil
164153
}
165154

166155
func (d *defaultManager) StopWorkload(ctx context.Context, name string) (*errgroup.Group, error) {
@@ -484,17 +473,6 @@ func (d *defaultManager) findContainerByName(ctx context.Context, name string) (
484473
return nil, fmt.Errorf("%w: %s", ErrContainerNotFound, name)
485474
}
486475

487-
// stopContainer stops the container
488-
func (d *defaultManager) stopContainer(ctx context.Context, containerID, containerName string) error {
489-
logger.Infof("Stopping container %s...", containerName)
490-
if err := d.runtime.StopWorkload(ctx, containerID); err != nil {
491-
return fmt.Errorf("failed to stop container: %w", err)
492-
}
493-
494-
logger.Infof("Container %s stopped", containerName)
495-
return nil
496-
}
497-
498476
func shouldRemoveClientConfig() bool {
499477
c := config.GetConfig()
500478
return len(c.Clients.RegisteredClients) > 0 || c.Clients.AutoDiscovery
@@ -589,14 +567,13 @@ func (d *defaultManager) stopWorkloads(ctx context.Context, workloads []stopWork
589567
group := errgroup.Group{}
590568
for _, workload := range workloads {
591569
group.Go(func() error {
592-
logger.Infof("Stopping workload %s...", workload.Name)
593570
// Stop the proxy process
594571
proxy.StopProcess(workload.Name)
595572

573+
logger.Infof("Stopping containers for %s...", workload.Name)
596574
// Stop the container
597-
err := d.stopContainer(ctx, workload.ID, workload.Name)
598-
if err != nil {
599-
return err
575+
if err := d.runtime.StopWorkload(ctx, workload.ID); err != nil {
576+
return fmt.Errorf("failed to stop container: %w", err)
600577
}
601578

602579
if shouldRemoveClientConfig() {

0 commit comments

Comments
 (0)