Skip to content

Commit da40734

Browse files
committed
force kill first
1 parent f32997a commit da40734

File tree

4 files changed

+48
-8
lines changed

4 files changed

+48
-8
lines changed

components/ws-daemon/pkg/container/container.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ type Runtime interface {
5757
DisposeContainer(ctx context.Context, workspaceInstanceID string)
5858

5959
GetContainerTaskInfo(ctx context.Context, id ID) (*task.Process, error)
60+
61+
ForceKillContainerTask(ctx context.Context, id ID) error
6062
}
6163

6264
var (

components/ws-daemon/pkg/container/containerd.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -587,15 +587,16 @@ func (s *Containerd) GetContainerTaskInfo(ctx context.Context, id ID) (*task.Pro
587587
if task.Process == nil {
588588
return nil, fmt.Errorf("task has no process")
589589
}
590+
return task.Process, nil
591+
}
590592

591-
// try send last kill signal to the task
592-
_, _ = s.Client.TaskService().Kill(ctx, &tasks.KillRequest{
593+
func (s *Containerd) ForceKillContainerTask(ctx context.Context, id ID) error {
594+
_, err := s.Client.TaskService().Kill(ctx, &tasks.KillRequest{
593595
ContainerID: string(id),
594596
Signal: 9,
595597
All: true,
596598
})
597-
598-
return task.Process, nil
599+
return err
599600
}
600601

601602
var kubepodsQoSRegexp = regexp.MustCompile(`([^/]+)-([^/]+)-pod`)

components/ws-daemon/pkg/controller/workspace_controller.go

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,30 @@ func (wsc *WorkspaceController) doWorkspaceContentBackup(ctx context.Context, sp
358358
// We should get an event when the condition changes, but requeue
359359
// anyways to make sure we act on it in time.
360360
return ctrl.Result{RequeueAfter: 500 * time.Millisecond}, nil
361-
} else {
362-
glog.WithFields(ws.OWI()).WithField("workspace", req.NamespacedName).Warn("workspace container is still running after 5 minutes of deletion, starting backup anyway")
363-
err = wsc.dumpWorkspaceContainerInfo(ctx, ws)
361+
}
362+
363+
if !ws.IsConditionTrue(workspacev1.WorkspaceConditionForceKilledTask) {
364+
err = wsc.forceKillContainerTask(ctx, ws)
365+
if err != nil {
366+
return ctrl.Result{}, fmt.Errorf("failed to force kill container task: %w", err)
367+
}
368+
err = retry.RetryOnConflict(retryParams, func() error {
369+
if err := wsc.Get(ctx, req.NamespacedName, ws); err != nil {
370+
return err
371+
}
372+
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionForceKilledTask())
373+
return wsc.Client.Status().Update(ctx, ws)
374+
})
364375
if err != nil {
365-
glog.WithFields(ws.OWI()).WithField("workspace", req.NamespacedName).Errorf("failed to dump container info: %v", err)
376+
return ctrl.Result{}, fmt.Errorf("failed to set force killed task condition: %w", err)
366377
}
378+
return ctrl.Result{Requeue: true, RequeueAfter: 2 * time.Second}, nil
379+
}
380+
381+
glog.WithFields(ws.OWI()).WithField("workspace", req.NamespacedName).Warn("workspace container is still running after 5 minutes of deletion, starting backup anyway")
382+
err = wsc.dumpWorkspaceContainerInfo(ctx, ws)
383+
if err != nil {
384+
glog.WithFields(ws.OWI()).WithField("workspace", req.NamespacedName).Errorf("failed to dump container info: %v", err)
367385
}
368386
}
369387

@@ -474,6 +492,14 @@ func (wsc *WorkspaceController) dumpWorkspaceContainerInfo(ctx context.Context,
474492
return nil
475493
}
476494

495+
func (wsc *WorkspaceController) forceKillContainerTask(ctx context.Context, ws *workspacev1.Workspace) error {
496+
id, err := wsc.runtime.WaitForContainer(ctx, ws.Name)
497+
if err != nil {
498+
return fmt.Errorf("failed to wait for container: %w", err)
499+
}
500+
return wsc.runtime.ForceKillContainerTask(ctx, id)
501+
}
502+
477503
func (wsc *WorkspaceController) prepareInitializer(ctx context.Context, ws *workspacev1.Workspace) (*csapi.WorkspaceInitializer, error) {
478504
var init csapi.WorkspaceInitializer
479505
err := proto.Unmarshal(ws.Spec.Initializer, &init)

components/ws-manager-api/go/crd/v1/workspace_types.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ const (
286286

287287
// WorkspaceConditionStateWiped is true once all state has successfully been wiped by ws-daemon. This is only set if PodRejected=true, and the rejected workspace has been deleted.
288288
WorkspaceConditionStateWiped WorkspaceCondition = "StateWiped"
289+
290+
// WorkspaceConditionForceKilledTask is true if we send a SIGKILL to the task
291+
WorkspaceConditionForceKilledTask WorkspaceCondition = "ForceKilledTask"
289292
)
290293

291294
func NewWorkspaceConditionDeployed() metav1.Condition {
@@ -440,6 +443,14 @@ func NewWorkspaceConditionContainerRunning(status metav1.ConditionStatus) metav1
440443
}
441444
}
442445

446+
func NewWorkspaceConditionForceKilledTask() metav1.Condition {
447+
return metav1.Condition{
448+
Type: string(WorkspaceConditionForceKilledTask),
449+
LastTransitionTime: metav1.Now(),
450+
Status: metav1.ConditionTrue,
451+
}
452+
}
453+
443454
// +kubebuilder:validation:Enum:=Unknown;Pending;Imagebuild;Creating;Initializing;Running;Stopping;Stopped
444455
type WorkspacePhase string
445456

0 commit comments

Comments
 (0)