Skip to content

Commit 118b393

Browse files
committed
pkg/operator: deactivate azure path fix job
removes the job if it exists, and does nothing when it doesn't. we added this job to help users work around a breaking change in the uptsream distribution azure storage driver that changed the root storage path. this is no longer needed at this point, so we remove it.
1 parent 705f032 commit 118b393

File tree

2 files changed

+18
-109
lines changed

2 files changed

+18
-109
lines changed

pkg/operator/azurepathfixcontroller.go

Lines changed: 16 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
package operator
22

33
import (
4-
"context"
54
"fmt"
65
"strings"
76
"time"
87

9-
batchv1 "k8s.io/api/batch/v1"
10-
corev1 "k8s.io/api/core/v1"
118
"k8s.io/apimachinery/pkg/api/errors"
12-
"k8s.io/apimachinery/pkg/labels"
13-
"k8s.io/apimachinery/pkg/selection"
14-
utilerrors "k8s.io/apimachinery/pkg/util/errors"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1510
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1611
"k8s.io/apimachinery/pkg/util/wait"
1712
batchv1informers "k8s.io/client-go/informers/batch/v1"
@@ -25,7 +20,6 @@ import (
2520
"k8s.io/klog/v2"
2621

2722
configapiv1 "github.com/openshift/api/config/v1"
28-
operatorv1 "github.com/openshift/api/operator/v1"
2923
configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
3024
configlisters "github.com/openshift/client-go/config/listers/config/v1"
3125
imageregistryv1informers "github.com/openshift/client-go/imageregistry/informers/externalversions/imageregistry/v1"
@@ -172,7 +166,6 @@ func (c *AzurePathFixController) sync() error {
172166
return nil
173167
}
174168

175-
ctx := context.TODO()
176169
imageRegistryConfig, err := c.imageRegistryConfigLister.Get("cluster")
177170
if err != nil {
178171
return err
@@ -203,110 +196,27 @@ func (c *AzurePathFixController) sync() error {
203196
c.kubeconfig,
204197
)
205198

206-
progressingCondition := operatorv1.OperatorCondition{
207-
Type: "AzurePathFixProgressing",
208-
Status: operatorv1.ConditionUnknown,
209-
}
210-
degradedCondition := operatorv1.OperatorCondition{
211-
Type: "AzurePathFixControllerDegraded",
212-
Status: operatorv1.ConditionFalse,
213-
Reason: "AsExpected",
214-
}
215-
216-
jobObj, err := gen.Get()
199+
// this controller was created to aid users migrating from 4.13.z to >=4.14.z.
200+
// once users have migrated to an OCP version and have run this job at least once,
201+
// this job is no longer needed. on OCP versions >=4.17 we can be certain that
202+
// this has already migrated the blobs to the correct place, and we can now
203+
// safely remove the job. see OCPBUGS-29003 for details.
204+
_, err = gen.Get()
217205
if errors.IsNotFound(err) {
218-
progressingCondition.Status = operatorv1.ConditionTrue
219-
progressingCondition.Reason = "NotFound"
220-
progressingCondition.Message = "The job does not exist"
206+
return nil
221207
} else if err != nil {
222-
progressingCondition.Reason = "Unknown"
223-
progressingCondition.Message = fmt.Sprintf("Unable to check job progress: %s", err)
208+
return err
224209
} else {
225-
job := jobObj.(*batchv1.Job)
226-
jobProgressing := true
227-
var jobCondition batchv1.JobConditionType
228-
for _, cond := range job.Status.Conditions {
229-
if (cond.Type == batchv1.JobComplete || cond.Type == batchv1.JobFailed) && cond.Status == corev1.ConditionTrue {
230-
jobProgressing = false
231-
jobCondition = cond.Type
232-
break
233-
}
234-
}
235-
236-
if jobProgressing {
237-
progressingCondition.Reason = "Migrating"
238-
progressingCondition.Message = fmt.Sprintf("Azure path fix job is progressing: %d pods active; %d pods failed", job.Status.Active, job.Status.Failed)
239-
progressingCondition.Status = operatorv1.ConditionTrue
210+
gracePeriod := int64(0)
211+
propagationPolicy := metav1.DeletePropagationForeground
212+
opts := metav1.DeleteOptions{
213+
GracePeriodSeconds: &gracePeriod,
214+
PropagationPolicy: &propagationPolicy,
240215
}
241-
242-
if jobCondition == batchv1.JobComplete {
243-
progressingCondition.Reason = "AsExpected"
244-
progressingCondition.Status = operatorv1.ConditionFalse
245-
}
246-
247-
if jobCondition == batchv1.JobFailed {
248-
progressingCondition.Reason = "Failed"
249-
progressingCondition.Status = operatorv1.ConditionFalse
250-
degradedCondition.Reason = "Failed"
251-
degradedCondition.Status = operatorv1.ConditionTrue
252-
253-
// if the job still executing (i.e there are attempts left before backoff),
254-
// we don't want to report degraded, but we let users know that some attempt(s)
255-
// failed, and the job is still progressing.
256-
257-
requirement, err := labels.NewRequirement("batch.kubernetes.io/job-name", selection.Equals, []string{gen.GetName()})
258-
if err != nil {
259-
// this is extremely unlikely to happen
260-
return err
261-
}
262-
pods, err := c.podLister.List(labels.NewSelector().Add(*requirement))
263-
if err != nil {
264-
// there's not much that can be done about an error here,
265-
// the next reconciliation(s) are likely to succeed.
266-
return err
267-
}
268-
269-
if len(pods) == 0 {
270-
msg := "Migration failed but no job pods are left to inspect"
271-
progressingCondition.Message = msg
272-
degradedCondition.Message = msg
273-
}
274-
275-
if len(pods) > 0 {
276-
mostRecentPod := pods[0]
277-
for _, pod := range pods {
278-
if mostRecentPod.CreationTimestamp.Before(&pod.CreationTimestamp) {
279-
mostRecentPod = pod
280-
}
281-
}
282-
283-
if len(mostRecentPod.Status.ContainerStatuses) > 0 {
284-
status := mostRecentPod.Status.ContainerStatuses[0]
285-
msg := fmt.Sprintf("Migration failed: %s", status.State.Terminated.Message)
286-
progressingCondition.Message = msg
287-
degradedCondition.Message = msg
288-
}
289-
}
216+
if err := gen.Delete(opts); err != nil {
217+
return err
290218
}
291219
}
292-
293-
err = resource.ApplyMutator(gen)
294-
if err != nil {
295-
_, _, updateError := v1helpers.UpdateStatus(
296-
ctx,
297-
c.operatorClient,
298-
v1helpers.UpdateConditionFn(progressingCondition),
299-
v1helpers.UpdateConditionFn(degradedCondition),
300-
)
301-
return utilerrors.NewAggregate([]error{err, updateError})
302-
}
303-
304-
_, _, err = v1helpers.UpdateStatus(
305-
ctx,
306-
c.operatorClient,
307-
v1helpers.UpdateConditionFn(progressingCondition),
308-
v1helpers.UpdateConditionFn(degradedCondition),
309-
)
310220
return err
311221
}
312222

pkg/resource/azurepathfixjob.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
kcorev1 "k8s.io/api/core/v1"
1212
"k8s.io/apimachinery/pkg/api/errors"
1313
"k8s.io/apimachinery/pkg/api/resource"
14-
metaapi "k8s.io/apimachinery/pkg/apis/meta/v1"
1514
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1615
"k8s.io/apimachinery/pkg/runtime"
1716
batchset "k8s.io/client-go/kubernetes/typed/batch/v1"
@@ -305,8 +304,8 @@ func (gapfj *generatorAzurePathFixJob) Update(o runtime.Object) (runtime.Object,
305304
// if we are here it means the expected container envs differed from
306305
// the actual container envs, so we recreate the job.
307306
gracePeriod := int64(0)
308-
propagationPolicy := metaapi.DeletePropagationForeground
309-
opts := metaapi.DeleteOptions{
307+
propagationPolicy := metav1.DeletePropagationForeground
308+
opts := metav1.DeleteOptions{
310309
GracePeriodSeconds: &gracePeriod,
311310
PropagationPolicy: &propagationPolicy,
312311
}

0 commit comments

Comments
 (0)