Skip to content

Commit d5512c7

Browse files
committed
WIP: test
1 parent b1e5d07 commit d5512c7

File tree

3 files changed

+144
-5
lines changed

3 files changed

+144
-5
lines changed

components/ws-manager-api/go/crd/v1/workspace_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func (ps PortSpec) Equal(other PortSpec) bool {
171171
// WorkspaceStatus defines the observed state of Workspace
172172
type WorkspaceStatus struct {
173173
PodStarts int `json:"podStarts"`
174-
PodRecreated int `json:"podRecreated,omitempty"`
174+
PodRecreated int `json:"podRecreated"`
175175
URL string `json:"url,omitempty" scrub:"redact"`
176176
OwnerToken string `json:"ownerToken,omitempty" scrub:"redact"`
177177

components/ws-manager-mk2/controllers/workspace_controller.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,22 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
230230
// when the workspace stops, due to PodStarts still being 0 when the original Pod
231231
// disappears.
232232
// Use a Patch instead of an Update, to prevent conflicts.
233-
patch := client.MergeFrom(workspace.DeepCopy())
234-
workspace.Status.PodStarts = 0
235-
workspace.Status.PodRecreated++
233+
dc := workspace.DeepCopy()
234+
patch := client.MergeFrom(dc)
235+
236+
// Reset status
237+
sc := workspace.Status.DeepCopy()
238+
workspace.Status = workspacev1.WorkspaceStatus{}
239+
workspace.Status.OwnerToken = sc.OwnerToken
240+
workspace.Status.PodRecreated = sc.PodRecreated + 1
241+
log.WithValues("ws", workspace.Status.PodRecreated, "oldWs", sc.PodRecreated, "patch", dc.Status.PodRecreated).Info("PATCH")
242+
236243
workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(fmt.Sprintf("Recreating pod... (%d retry)", workspace.Status.PodRecreated), metav1.ConditionFalse))
237244
if err := r.Status().Patch(ctx, workspace, patch); err != nil {
238-
log.Error(err, "Failed to patch PodStarts=0,PodRecreated++ in workspace status")
245+
log.Error(err, "Failed to patch workspace status-reset")
239246
return ctrl.Result{}, err
240247
}
248+
log.WithValues("ws", workspace.Status.PodRecreated, "oldWs", sc.PodRecreated, "patch", dc.Status.PodRecreated).Info("AFTER PATCH")
241249

242250
requeueAfter := 5 * time.Second
243251
if r.Config.PodRecreationBackoff != 0 {

components/ws-manager-mk2/controllers/workspace_controller_test.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,127 @@ var _ = Describe("WorkspaceController", func() {
389389
})
390390
})
391391

392+
It("pod rejection should result in a retry", func() {
393+
ws := newWorkspace(uuid.NewString(), "default")
394+
395+
// ### prepare block start
396+
By("creating workspace")
397+
// Simulate pod getting scheduled to a node.
398+
var node corev1.Node
399+
node.Name = uuid.NewString()
400+
Expect(k8sClient.Create(ctx, &node)).To(Succeed())
401+
// Manually create the workspace pod with the node name.
402+
// We can't update the pod with the node name, as this operation
403+
// is only allowed for the scheduler. So as a hack, we manually
404+
// create the workspace's pod.
405+
pod := &corev1.Pod{
406+
ObjectMeta: metav1.ObjectMeta{
407+
Name: fmt.Sprintf("ws-%s", ws.Name),
408+
Namespace: ws.Namespace,
409+
Finalizers: []string{workspacev1.GitpodFinalizerName},
410+
Labels: map[string]string{
411+
wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,
412+
},
413+
},
414+
Spec: corev1.PodSpec{
415+
NodeName: node.Name,
416+
Containers: []corev1.Container{{
417+
Name: "workspace",
418+
Image: "someimage",
419+
}},
420+
},
421+
}
422+
423+
Expect(k8sClient.Create(ctx, pod)).To(Succeed())
424+
pod = createWorkspaceExpectPod(ws)
425+
updateObjWithRetries(k8sClient, pod, false, func(pod *corev1.Pod) {
426+
Expect(ctrl.SetControllerReference(ws, pod, k8sClient.Scheme())).To(Succeed())
427+
})
428+
// mimic the regular "start" phase
429+
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
430+
ws.Status.PodStarts = 1
431+
ws.Status.PodRecreated = 0
432+
})
433+
434+
// Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).
435+
// This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation
436+
// might be once the Pod is already in a running state, and hence the metric state might not record e.g. content
437+
// restore.
438+
// This is only necessary because we manually created the pod, normally the Pod creation is the controller's
439+
// first reconciliation which ensures the metrics are recorded from the workspace's initial state.
440+
441+
Eventually(func(g Gomega) {
442+
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
443+
g.Expect(ws.Status.Runtime).ToNot(BeNil())
444+
g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))
445+
}, timeout, interval).Should(Succeed())
446+
447+
// Await "deployed" condition, and check we are good
448+
expectConditionEventually(ws, string(workspacev1.WorkspaceConditionDeployed), metav1.ConditionTrue, "")
449+
Eventually(func(g Gomega) {
450+
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
451+
g.Expect(ws.Status.PodStarts).To(Equal(1))
452+
g.Expect(ws.Status.PodRecreated).To(Equal(0))
453+
}, timeout, interval).Should(Succeed())
454+
455+
// ### prepare block end
456+
457+
// ### trigger block start
458+
// Make pod be rejected 🪄
459+
By("rejecting pod")
460+
rejectPod(pod)
461+
462+
// TODO(gpl): how to check for transient states like:
463+
// - pod deletion
464+
// - PodRejected condition
465+
// By("await pod deleted")
466+
// expectWorkspaceCleanup(ws, pod)
467+
// Eventually(func(g Gomega) {
468+
// g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, pod)).To(MatchError(ContainSubstring("not found")))
469+
// }, timeout, interval).Should(Succeed())
470+
471+
By("await pod recreation")
472+
Eventually(func(g Gomega) {
473+
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
474+
g.Expect(ws.Status.PodRecreated).To(Equal(1))
475+
g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhasePending))
476+
}, timeout, interval).Should(Succeed())
477+
// ### trigger block end
478+
479+
// ### retry block start
480+
// Transition Pod to pending, and expect workspace to reach Creating phase.
481+
// This should also cause create time metrics to be recorded.
482+
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
483+
pod.Status.Phase = corev1.PodPending
484+
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
485+
State: corev1.ContainerState{
486+
Waiting: &corev1.ContainerStateWaiting{
487+
Reason: "ContainerCreating",
488+
},
489+
},
490+
Name: "workspace",
491+
}}
492+
})
493+
494+
expectPhaseEventually(ws, workspacev1.WorkspacePhaseCreating)
495+
496+
// Transition Pod to running, and expect workspace to reach Running phase.
497+
// This should also cause e.g. startup time metrics to be recorded.
498+
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
499+
pod.Status.Phase = corev1.PodRunning
500+
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
501+
Name: "workspace",
502+
Ready: true,
503+
}}
504+
})
505+
506+
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
507+
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))
508+
})
509+
510+
expectPhaseEventually(ws, workspacev1.WorkspacePhaseRunning)
511+
// ### retry block end
512+
})
392513
})
393514

394515
Context("with headless workspaces", func() {
@@ -634,6 +755,16 @@ func requestStop(ws *workspacev1.Workspace) {
634755
})
635756
}
636757

758+
func rejectPod(pod *corev1.Pod) {
759+
GinkgoHelper()
760+
By("adding pod rejected condition")
761+
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
762+
pod.Status.Phase = corev1.PodFailed
763+
pod.Status.Reason = "NodeAffinity"
764+
pod.Status.Message = "Pod was rejected"
765+
})
766+
}
767+
637768
func markReady(ws *workspacev1.Workspace) {
638769
GinkgoHelper()
639770
By("adding content ready condition")

0 commit comments

Comments
 (0)