@@ -389,6 +389,127 @@ var _ = Describe("WorkspaceController", func() {
389389 })
390390 })
391391
392+ It ("pod rejection should result in a retry" , func () {
393+ ws := newWorkspace (uuid .NewString (), "default" )
394+
395+ // ### prepare block start
396+ By ("creating workspace" )
397+ // Simulate pod getting scheduled to a node.
398+ var node corev1.Node
399+ node .Name = uuid .NewString ()
400+ Expect (k8sClient .Create (ctx , & node )).To (Succeed ())
401+ // Manually create the workspace pod with the node name.
402+ // We can't update the pod with the node name, as this operation
403+ // is only allowed for the scheduler. So as a hack, we manually
404+ // create the workspace's pod.
405+ pod := & corev1.Pod {
406+ ObjectMeta : metav1.ObjectMeta {
407+ Name : fmt .Sprintf ("ws-%s" , ws .Name ),
408+ Namespace : ws .Namespace ,
409+ Finalizers : []string {workspacev1 .GitpodFinalizerName },
410+ Labels : map [string ]string {
411+ wsk8s .WorkspaceManagedByLabel : constants .ManagedBy ,
412+ },
413+ },
414+ Spec : corev1.PodSpec {
415+ NodeName : node .Name ,
416+ Containers : []corev1.Container {{
417+ Name : "workspace" ,
418+ Image : "someimage" ,
419+ }},
420+ },
421+ }
422+
423+ Expect (k8sClient .Create (ctx , pod )).To (Succeed ())
424+ pod = createWorkspaceExpectPod (ws )
425+ updateObjWithRetries (k8sClient , pod , false , func (pod * corev1.Pod ) {
426+ Expect (ctrl .SetControllerReference (ws , pod , k8sClient .Scheme ())).To (Succeed ())
427+ })
428+ // mimic the regular "start" phase
429+ updateObjWithRetries (k8sClient , ws , true , func (ws * workspacev1.Workspace ) {
430+ ws .Status .PodStarts = 1
431+ ws .Status .PodRecreated = 0
432+ })
433+
434+ // Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).
435+ // This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation
436+ // might be once the Pod is already in a running state, and hence the metric state might not record e.g. content
437+ // restore.
438+ // This is only necessary because we manually created the pod, normally the Pod creation is the controller's
439+ // first reconciliation which ensures the metrics are recorded from the workspace's initial state.
440+
441+ Eventually (func (g Gomega ) {
442+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
443+ g .Expect (ws .Status .Runtime ).ToNot (BeNil ())
444+ g .Expect (ws .Status .Runtime .PodName ).To (Equal (pod .Name ))
445+ }, timeout , interval ).Should (Succeed ())
446+
447+ // Await "deployed" condition, and check we are good
448+ expectConditionEventually (ws , string (workspacev1 .WorkspaceConditionDeployed ), metav1 .ConditionTrue , "" )
449+ Eventually (func (g Gomega ) {
450+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
451+ g .Expect (ws .Status .PodStarts ).To (Equal (1 ))
452+ g .Expect (ws .Status .PodRecreated ).To (Equal (0 ))
453+ }, timeout , interval ).Should (Succeed ())
454+
455+ // ### prepare block end
456+
457+ // ### trigger block start
458+ // Make pod be rejected 🪄
459+ By ("rejecting pod" )
460+ rejectPod (pod )
461+
462+ // TODO(gpl): how to check for transient states like:
463+ // - pod deletion
464+ // - PodRejected condition
465+ // By("await pod deleted")
466+ // expectWorkspaceCleanup(ws, pod)
467+ // Eventually(func(g Gomega) {
468+ // g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, pod)).To(MatchError(ContainSubstring("not found")))
469+ // }, timeout, interval).Should(Succeed())
470+
471+ By ("await pod recreation" )
472+ Eventually (func (g Gomega ) {
473+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
474+ g .Expect (ws .Status .PodRecreated ).To (Equal (1 ))
475+ g .Expect (ws .Status .Phase ).To (Equal (workspacev1 .WorkspacePhasePending ))
476+ }, timeout , interval ).Should (Succeed ())
477+ // ### trigger block end
478+
479+ // ### retry block start
480+ // Transition Pod to pending, and expect workspace to reach Creating phase.
481+ // This should also cause create time metrics to be recorded.
482+ updateObjWithRetries (k8sClient , pod , true , func (pod * corev1.Pod ) {
483+ pod .Status .Phase = corev1 .PodPending
484+ pod .Status .ContainerStatuses = []corev1.ContainerStatus {{
485+ State : corev1.ContainerState {
486+ Waiting : & corev1.ContainerStateWaiting {
487+ Reason : "ContainerCreating" ,
488+ },
489+ },
490+ Name : "workspace" ,
491+ }}
492+ })
493+
494+ expectPhaseEventually (ws , workspacev1 .WorkspacePhaseCreating )
495+
496+ // Transition Pod to running, and expect workspace to reach Running phase.
497+ // This should also cause e.g. startup time metrics to be recorded.
498+ updateObjWithRetries (k8sClient , pod , true , func (pod * corev1.Pod ) {
499+ pod .Status .Phase = corev1 .PodRunning
500+ pod .Status .ContainerStatuses = []corev1.ContainerStatus {{
501+ Name : "workspace" ,
502+ Ready : true ,
503+ }}
504+ })
505+
506+ updateObjWithRetries (k8sClient , ws , true , func (ws * workspacev1.Workspace ) {
507+ ws .Status .SetCondition (workspacev1 .NewWorkspaceConditionContentReady (metav1 .ConditionTrue , workspacev1 .ReasonInitializationSuccess , "" ))
508+ })
509+
510+ expectPhaseEventually (ws , workspacev1 .WorkspacePhaseRunning )
511+ // ### retry block end
512+ })
392513 })
393514
394515 Context ("with headless workspaces" , func () {
@@ -634,6 +755,16 @@ func requestStop(ws *workspacev1.Workspace) {
634755 })
635756}
636757
758+ func rejectPod (pod * corev1.Pod ) {
759+ GinkgoHelper ()
760+ By ("adding pod rejected condition" )
761+ updateObjWithRetries (k8sClient , pod , true , func (pod * corev1.Pod ) {
762+ pod .Status .Phase = corev1 .PodFailed
763+ pod .Status .Reason = "NodeAffinity"
764+ pod .Status .Message = "Pod was rejected"
765+ })
766+ }
767+
637768func markReady (ws * workspacev1.Workspace ) {
638769 GinkgoHelper ()
639770 By ("adding content ready condition" )
0 commit comments