@@ -538,6 +538,154 @@ var _ = Describe("WorkspaceController", func() {
538538 expectPhaseTransitions (su , []workspacev1.WorkspacePhase {workspacev1 .WorkspacePhasePending , workspacev1 .WorkspacePhaseCreating , workspacev1 .WorkspacePhaseInitializing , workspacev1 .WorkspacePhaseRunning })
539539 // ### validate end
540540 })
541+
542+ It ("pod recreation is only tried until limit" , func () {
543+ ws := newWorkspace (uuid .NewString (), "default" )
544+ m := collectMetricCounts (wsMetrics , ws )
545+ su := collectSubscriberUpdates ()
546+
547+ // ### prepare block start
548+ By ("creating workspace" )
549+ // Simulate pod getting scheduled to a node.
550+ var node corev1.Node
551+ node .Name = uuid .NewString ()
552+ Expect (k8sClient .Create (ctx , & node )).To (Succeed ())
553+ // Manually create the workspace pod with the node name.
554+ // We can't update the pod with the node name, as this operation
555+ // is only allowed for the scheduler. So as a hack, we manually
556+ // create the workspace's pod.
557+ pod := & corev1.Pod {
558+ ObjectMeta : metav1.ObjectMeta {
559+ Name : fmt .Sprintf ("ws-%s" , ws .Name ),
560+ Namespace : ws .Namespace ,
561+ Finalizers : []string {workspacev1 .GitpodFinalizerName },
562+ Labels : map [string ]string {
563+ wsk8s .WorkspaceManagedByLabel : constants .ManagedBy ,
564+ },
565+ },
566+ Spec : corev1.PodSpec {
567+ NodeName : node .Name ,
568+ Containers : []corev1.Container {{
569+ Name : "workspace" ,
570+ Image : "someimage" ,
571+ }},
572+ },
573+ }
574+
575+ Expect (k8sClient .Create (ctx , pod )).To (Succeed ())
576+ pod = createWorkspaceExpectPod (ws )
577+ updateObjWithRetries (k8sClient , pod , false , func (pod * corev1.Pod ) {
578+ Expect (ctrl .SetControllerReference (ws , pod , k8sClient .Scheme ())).To (Succeed ())
579+ })
580+
581+ podStarts := 1
582+ podRecreated := 0
583+ // mimic the regular "start" phase
584+ updateObjWithRetries (k8sClient , ws , true , func (ws * workspacev1.Workspace ) {
585+ ws .Status .PodStarts = podStarts
586+ ws .Status .PodRecreated = podRecreated
587+ })
588+
589+ startAndRejectWorkspace := func () {
590+
591+ // Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).
592+ // This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation
593+ // might be once the Pod is already in a running state, and hence the metric state might not record e.g. content
594+ // restore.
595+ // This is only necessary because we manually created the pod, normally the Pod creation is the controller's
596+ // first reconciliation which ensures the metrics are recorded from the workspace's initial state.
597+
598+ Eventually (func (g Gomega ) {
599+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
600+ g .Expect (ws .Status .Runtime ).ToNot (BeNil ())
601+ g .Expect (ws .Status .Runtime .PodName ).To (Equal (pod .Name ))
602+ }, timeout , interval ).Should (Succeed ())
603+
604+ // Await "deployed" condition, and check we are good
605+ expectConditionEventually (ws , string (workspacev1 .WorkspaceConditionDeployed ), metav1 .ConditionTrue , "" )
606+ Eventually (func (g Gomega ) {
607+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
608+ g .Expect (ws .Status .PodStarts ).To (Equal (podStarts ))
609+ g .Expect (ws .Status .PodRecreated ).To (Equal (podRecreated ))
610+ }, timeout , interval ).Should (Succeed ())
611+
612+ // ### prepare block end
613+
614+ // ### trigger block start
615+ // Make pod be rejected 🪄
616+ By ("rejecting pod" )
617+ rejectPod (pod )
618+
619+ By ("await pod being in stopping" )
620+ Eventually (func (g Gomega ) {
621+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
622+ g .Expect (ws .Status .Phase ).To (Equal (workspacev1 .WorkspacePhaseStopping ))
623+ }, timeout , interval ).Should (Succeed ())
624+
625+ // when a rejected workspace pod is in stopping, ws-daemon wipes the state before it's moved to "stopped"
626+ // mimic this ws-daemon behavior
627+ updateObjWithRetries (k8sClient , ws , true , func (ws * workspacev1.Workspace ) {
628+ ws .Status .SetCondition (workspacev1 .NewWorkspaceConditionStateWiped ("" , metav1 .ConditionTrue ))
629+ })
630+
631+ podStarts ++
632+ podRecreated ++
633+
634+ By ("await pod recreation" )
635+ Eventually (func (g Gomega ) {
636+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
637+ g .Expect (ws .Status .PodRecreated ).To (Equal (podRecreated ))
638+ g .Expect (ws .Status .Phase ).To (Equal (workspacev1 .WorkspacePhasePending ))
639+ }, timeout , interval ).Should (Succeed ())
640+ // ### trigger block end
641+
642+ // ### retry block start
643+ // Transition Pod to pending, and expect workspace to reach Creating phase.
644+ // This should also cause create time metrics to be recorded.
645+ updateObjWithRetries (k8sClient , pod , true , func (pod * corev1.Pod ) {
646+ pod .Status .Phase = corev1 .PodPending
647+ pod .Status .ContainerStatuses = []corev1.ContainerStatus {{
648+ State : corev1.ContainerState {
649+ Waiting : & corev1.ContainerStateWaiting {
650+ Reason : "ContainerCreating" ,
651+ },
652+ },
653+ Name : "workspace" ,
654+ }}
655+ })
656+
657+ expectPhaseEventually (ws , workspacev1 .WorkspacePhaseCreating )
658+ // ### retry block end
659+ }
660+
661+ for j := 0 ; j < 4 ; j ++ {
662+ startAndRejectWorkspace ()
663+ }
664+
665+ expectWorkspaceCleanup (ws , pod )
666+
667+ // ### validate result
668+ Eventually (func (g Gomega ) {
669+ g .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : ws .Name , Namespace : ws .Namespace }, ws )).To (Succeed ())
670+ g .Expect (ws .Status .PodStarts ).To (Equal (4 ))
671+ g .Expect (ws .Status .PodRecreated ).To (Equal (3 ))
672+ }, timeout , interval ).Should (Succeed ())
673+ expectConditionEventually (ws , string (workspacev1 .WorkspaceConditionFailed ), metav1 .ConditionTrue , "" )
674+
675+ expectMetricsDelta (m , collectMetricCounts (wsMetrics , ws ), metricCounts {
676+ restores : 1 ,
677+ backups : 0 ,
678+ backupFailures : 0 ,
679+ failures : 1 ,
680+ creatingCounts : 1 ,
681+ stops : map [StopReason ]int {StopReasonStartFailure : 1 },
682+ starts : 1 , // this is NOT PodStarts, but merely an artifact of how we count it in the tests
683+ recreations : map [int ]int {1 : 1 },
684+ })
685+
686+ expectPhaseTransitions (su , []workspacev1.WorkspacePhase {workspacev1 .WorkspacePhasePending , workspacev1 .WorkspacePhaseCreating , workspacev1 .WorkspacePhaseInitializing , workspacev1 .WorkspacePhaseStopping , workspacev1 .WorkspacePhaseStopped })
687+ // ### validate end
688+ })
541689 })
542690
543691 Context ("with headless workspaces" , func () {
0 commit comments