[ws-manager] Sketch of re-creating workspace pods

geropl · geropl · commit b1e5d07cca1e · 2024-09-19T15:43:05.000Z
diff --git a/components/ws-manager-api/go/config/config.go b/components/ws-manager-api/go/config/config.go
@@ -142,6 +142,11 @@ type Configuration struct {
 
 	// SSHGatewayCAPublicKey is a CA public key
 	SSHGatewayCAPublicKey string
+
+	// PodRecreationMaxRetries
+	PodRecreationMaxRetries int `json:"podRecreationMaxRetries,omitempty"`
+	// PodRecreationBackoff
+	PodRecreationBackoff util.Duration `json:"podRecreationBackoff,omitempty"`
 }
 
 type WorkspaceClass struct {
diff --git a/components/ws-manager-api/go/crd/v1/workspace_types.go b/components/ws-manager-api/go/crd/v1/workspace_types.go
@@ -170,9 +170,10 @@ func (ps PortSpec) Equal(other PortSpec) bool {
 
 // WorkspaceStatus defines the observed state of Workspace
 type WorkspaceStatus struct {
-	PodStarts  int    `json:"podStarts"`
-	URL        string `json:"url,omitempty" scrub:"redact"`
-	OwnerToken string `json:"ownerToken,omitempty" scrub:"redact"`
+	PodStarts    int    `json:"podStarts"`
+	PodRecreated int    `json:"podRecreated,omitempty"`
+	URL          string `json:"url,omitempty" scrub:"redact"`
+	OwnerToken   string `json:"ownerToken,omitempty" scrub:"redact"`
 
 	// +kubebuilder:default=Unknown
 	Phase WorkspacePhase `json:"phase,omitempty"`
@@ -263,6 +264,9 @@ const (
 	// WorkspaceContainerRunning is true if the workspace container is running.
 	// Used to determine if a backup can be taken, only once the container is stopped.
 	WorkspaceConditionContainerRunning WorkspaceCondition = "WorkspaceContainerRunning"
+
+	// WorkspaceConditionPodRejected is true if we detected that the pod was rejected by the node
+	WorkspaceConditionPodRejected WorkspaceCondition = "PodRejected"
 )
 
 func NewWorkspaceConditionDeployed() metav1.Condition {
@@ -291,6 +295,15 @@ func NewWorkspaceConditionFailed(message string) metav1.Condition {
 	}
 }
 
+func NewWorkspaceConditionPodRejected(message string, status metav1.ConditionStatus) metav1.Condition {
+	return metav1.Condition{
+		Type:               string(WorkspaceConditionPodRejected),
+		LastTransitionTime: metav1.Now(),
+		Status:             status,
+		Message:            message,
+	}
+}
+
 func NewWorkspaceConditionTimeout(message string) metav1.Condition {
 	return metav1.Condition{
 		Type:               string(WorkspaceConditionTimeout),
diff --git a/components/ws-manager-mk2/controllers/status.go b/components/ws-manager-mk2/controllers/status.go
@@ -123,6 +123,16 @@ func (r *WorkspaceReconciler) updateWorkspaceStatus(ctx context.Context, workspa
 		workspace.Status.Phase = *phase
 	}
 
+	if failure != "" && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionFailed) {
+		// Check: A situation where we want to retry?
+		if pod.Status.Phase == corev1.PodFailed && (pod.Status.Reason == "NodeAffinity" || pod.Status.Reason == "OutOfCPU") && strings.HasPrefix(pod.Status.Message, "Pod was rejected") {
+			// This is a situation where we want to re-create the pod!
+			log.Info("workspace scheduling failed", "workspace", workspace.Name, "reason", failure)
+			workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(failure, metav1.ConditionTrue))
+			r.Recorder.Event(workspace, corev1.EventTypeWarning, "PodRejected", failure)
+		}
+	}
+
 	if failure != "" && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionFailed) {
 		// workspaces can fail only once - once there is a failed condition set, stick with it
 		log.Info("workspace failed", "workspace", workspace.Name, "reason", failure)
diff --git a/components/ws-manager-mk2/controllers/subscriber_controller.go b/components/ws-manager-mk2/controllers/subscriber_controller.go
@@ -61,6 +61,12 @@ func (r *SubscriberReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		workspace.Status.Conditions = []metav1.Condition{}
 	}
 
+	if workspace.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected) {
+		// In this situation, we are about to re-create the pod. We don't want clients to see all the "stopping, stopped, starting" chatter, so we hide it here.
+		// TODO(gpl) Is this a sane approach?
+		return ctrl.Result{}, nil
+	}
+
 	if r.OnReconcile != nil {
 		r.OnReconcile(ctx, &workspace)
 	}
diff --git a/components/ws-manager-mk2/controllers/workspace_controller.go b/components/ws-manager-mk2/controllers/workspace_controller.go
@@ -204,8 +204,6 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
 				log.Error(err, "unable to create Pod for Workspace", "pod", pod)
 				return ctrl.Result{Requeue: true}, err
 			} else {
-				// TODO(cw): replicate the startup mechanism where pods can fail to be scheduled,
-				//			 need to be deleted and re-created
 				// Must increment and persist the pod starts, and ensure we retry on conflict.
 				// If we fail to persist this value, it's possible that the Pod gets recreated
 				// when the workspace stops, due to PodStarts still being 0 when the original Pod
@@ -221,6 +219,34 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
 				r.Recorder.Event(workspace, corev1.EventTypeNormal, "Creating", "")
 			}
 
+		case workspace.Status.Phase == workspacev1.WorkspacePhaseStopped && workspace.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected):
+			if workspace.Status.PodRecreated > r.Config.PodRecreationMaxRetries {
+				workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(fmt.Sprintf("Pod reached maximum recreations %d, failing", workspace.Status.PodRecreated), metav1.ConditionFalse))
+				return ctrl.Result{Requeue: true}, nil // requeue so we end up in the "Stopped" case below
+			}
+
+			// Must persist the modification pod starts, and ensure we retry on conflict.
+			// If we fail to persist this value, it's possible that the Pod gets recreated endlessly
+			// when the workspace stops, due to PodStarts still being 0 when the original Pod
+			// disappears.
+			// Use a Patch instead of an Update, to prevent conflicts.
+			patch := client.MergeFrom(workspace.DeepCopy())
+			workspace.Status.PodStarts = 0
+			workspace.Status.PodRecreated++
+			workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(fmt.Sprintf("Recreating pod... (%d retry)", workspace.Status.PodRecreated), metav1.ConditionFalse))
+			if err := r.Status().Patch(ctx, workspace, patch); err != nil {
+				log.Error(err, "Failed to patch PodStarts=0,PodRecreated++ in workspace status")
+				return ctrl.Result{}, err
+			}
+
+			requeueAfter := 5 * time.Second
+			if r.Config.PodRecreationBackoff != 0 {
+				requeueAfter = time.Duration(r.Config.PodRecreationBackoff)
+			}
+
+			r.Recorder.Event(workspace, corev1.EventTypeNormal, "Recreating", "")
+			return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil
+
 		case workspace.Status.Phase == workspacev1.WorkspacePhaseStopped:
 			if err := r.deleteWorkspaceSecrets(ctx, workspace); err != nil {
 				return ctrl.Result{}, err
@@ -403,7 +429,9 @@ func isStartFailure(ws *workspacev1.Workspace) bool {
 	isAborted := ws.IsConditionTrue(workspacev1.WorkspaceConditionAborted)
 	// Also ignore workspaces that are requested to be stopped before they became ready.
 	isStoppedByRequest := ws.IsConditionTrue(workspacev1.WorkspaceConditionStoppedByRequest)
-	return !everReady && !isAborted && !isStoppedByRequest
+	// Also ignore pods that got rejected by the node
+	isPodRejected := ws.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected)
+	return !everReady && !isAborted && !isStoppedByRequest && !isPodRejected
 }
 
 func (r *WorkspaceReconciler) emitPhaseEvents(ctx context.Context, ws *workspacev1.Workspace, old *workspacev1.WorkspaceStatus) {

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,12 @@ func (r *SubscriberReconciler) Reconcile(ctx context.Context, req ctrl.Request)`
`61`	`61`	`workspace.Status.Conditions = []metav1.Condition{}`
`62`	`62`	`}`
`63`	`63`
	`64`	`+ if workspace.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected) {`
	`65`	`+ // In this situation, we are about to re-create the pod. We don't want clients to see all the "stopping, stopped, starting" chatter, so we hide it here.`
	`66`	`+ // TODO(gpl) Is this a sane approach?`
	`67`	`+ return ctrl.Result{}, nil`
	`68`	`+ }`
	`69`	`+`
`64`	`70`	`if r.OnReconcile != nil {`
`65`	`71`	`r.OnReconcile(ctx, &workspace)`
`66`	`72`	`}`