Skip to content

Commit 96c8292

Browse files
Merge pull request #1061 from flavianmissi/deactivate-azure-path-fix
IR-477: pkg/operator: deactivate azure path fix job
2 parents e6c8c4c + 527120f commit 96c8292

File tree

2 files changed

+43
-105
lines changed

2 files changed

+43
-105
lines changed

pkg/operator/azurepathfixcontroller.go

Lines changed: 41 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,9 @@ import (
66
"strings"
77
"time"
88

9-
batchv1 "k8s.io/api/batch/v1"
10-
corev1 "k8s.io/api/core/v1"
9+
operatorv1 "github.com/openshift/api/operator/v1"
1110
"k8s.io/apimachinery/pkg/api/errors"
12-
"k8s.io/apimachinery/pkg/labels"
13-
"k8s.io/apimachinery/pkg/selection"
14-
utilerrors "k8s.io/apimachinery/pkg/util/errors"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1512
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1613
"k8s.io/apimachinery/pkg/util/wait"
1714
batchv1informers "k8s.io/client-go/informers/batch/v1"
@@ -25,7 +22,6 @@ import (
2522
"k8s.io/klog/v2"
2623

2724
configapiv1 "github.com/openshift/api/config/v1"
28-
operatorv1 "github.com/openshift/api/operator/v1"
2925
configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
3026
configlisters "github.com/openshift/client-go/config/listers/config/v1"
3127
imageregistryv1informers "github.com/openshift/client-go/imageregistry/informers/externalversions/imageregistry/v1"
@@ -177,7 +173,6 @@ func (c *AzurePathFixController) sync() error {
177173
return nil
178174
}
179175

180-
ctx := context.TODO()
181176
imageRegistryConfig, err := c.imageRegistryConfigLister.Get("cluster")
182177
if err != nil {
183178
return err
@@ -208,110 +203,54 @@ func (c *AzurePathFixController) sync() error {
208203
c.kubeconfig,
209204
)
210205

211-
progressingCondition := operatorv1.OperatorCondition{
212-
Type: "AzurePathFixProgressing",
213-
Status: operatorv1.ConditionUnknown,
206+
// this controller was created to aid users migrating from 4.13.z to >=4.14.z.
207+
// once users have migrated to an OCP version and have run this job at least once,
208+
// this job is no longer needed. on OCP versions >=4.17 we can be certain that
209+
// this has already migrated the blobs to the correct place, and we can now
210+
// safely remove the job. see OCPBUGS-29003 for details.
211+
progressing := "AzurePathFixProgressing"
212+
degraded := "AzurePathFixControllerDegraded"
213+
removeConditionFn := func(conditionType string) v1helpers.UpdateStatusFunc {
214+
return func(oldStatus *operatorv1.OperatorStatus) error {
215+
v1helpers.RemoveOperatorCondition(&oldStatus.Conditions, conditionType)
216+
return nil
217+
}
218+
}
219+
removeConditionFns := []v1helpers.UpdateStatusFunc{}
220+
progressingConditionFound := v1helpers.FindOperatorCondition(imageRegistryConfig.Status.Conditions, progressing) != nil
221+
if progressingConditionFound {
222+
removeConditionFns = append(removeConditionFns, removeConditionFn(progressing))
223+
}
224+
degradedConditionFound := v1helpers.FindOperatorCondition(imageRegistryConfig.Status.Conditions, degraded) != nil
225+
if degradedConditionFound {
226+
removeConditionFns = append(removeConditionFns, removeConditionFn(degraded))
214227
}
215-
degradedCondition := operatorv1.OperatorCondition{
216-
Type: "AzurePathFixControllerDegraded",
217-
Status: operatorv1.ConditionFalse,
218-
Reason: "AsExpected",
228+
if len(removeConditionFns) > 0 {
229+
if _, _, err := v1helpers.UpdateStatus(
230+
context.TODO(),
231+
c.operatorClient,
232+
removeConditionFns...,
233+
); err != nil {
234+
return err
235+
}
219236
}
220237

221-
jobObj, err := gen.Get()
238+
_, err = gen.Get()
222239
if errors.IsNotFound(err) {
223-
progressingCondition.Status = operatorv1.ConditionTrue
224-
progressingCondition.Reason = "NotFound"
225-
progressingCondition.Message = "The job does not exist"
240+
return nil
226241
} else if err != nil {
227-
progressingCondition.Reason = "Unknown"
228-
progressingCondition.Message = fmt.Sprintf("Unable to check job progress: %s", err)
242+
return err
229243
} else {
230-
job := jobObj.(*batchv1.Job)
231-
jobProgressing := true
232-
var jobCondition batchv1.JobConditionType
233-
for _, cond := range job.Status.Conditions {
234-
if (cond.Type == batchv1.JobComplete || cond.Type == batchv1.JobFailed) && cond.Status == corev1.ConditionTrue {
235-
jobProgressing = false
236-
jobCondition = cond.Type
237-
break
238-
}
239-
}
240-
241-
if jobProgressing {
242-
progressingCondition.Reason = "Migrating"
243-
progressingCondition.Message = fmt.Sprintf("Azure path fix job is progressing: %d pods active; %d pods failed", job.Status.Active, job.Status.Failed)
244-
progressingCondition.Status = operatorv1.ConditionTrue
244+
gracePeriod := int64(0)
245+
propagationPolicy := metav1.DeletePropagationForeground
246+
opts := metav1.DeleteOptions{
247+
GracePeriodSeconds: &gracePeriod,
248+
PropagationPolicy: &propagationPolicy,
245249
}
246-
247-
if jobCondition == batchv1.JobComplete {
248-
progressingCondition.Reason = "AsExpected"
249-
progressingCondition.Status = operatorv1.ConditionFalse
250+
if err := gen.Delete(opts); err != nil {
251+
return err
250252
}
251-
252-
if jobCondition == batchv1.JobFailed {
253-
progressingCondition.Reason = "Failed"
254-
progressingCondition.Status = operatorv1.ConditionFalse
255-
degradedCondition.Reason = "Failed"
256-
degradedCondition.Status = operatorv1.ConditionTrue
257-
258-
// if the job still executing (i.e there are attempts left before backoff),
259-
// we don't want to report degraded, but we let users know that some attempt(s)
260-
// failed, and the job is still progressing.
261-
262-
requirement, err := labels.NewRequirement("batch.kubernetes.io/job-name", selection.Equals, []string{gen.GetName()})
263-
if err != nil {
264-
// this is extremely unlikely to happen
265-
return err
266-
}
267-
pods, err := c.podLister.List(labels.NewSelector().Add(*requirement))
268-
if err != nil {
269-
// there's not much that can be done about an error here,
270-
// the next reconciliation(s) are likely to succeed.
271-
return err
272-
}
273-
274-
if len(pods) == 0 {
275-
msg := "Migration failed but no job pods are left to inspect"
276-
progressingCondition.Message = msg
277-
degradedCondition.Message = msg
278-
}
279-
280-
if len(pods) > 0 {
281-
mostRecentPod := pods[0]
282-
for _, pod := range pods {
283-
if mostRecentPod.CreationTimestamp.Before(&pod.CreationTimestamp) {
284-
mostRecentPod = pod
285-
}
286-
}
287-
288-
if len(mostRecentPod.Status.ContainerStatuses) > 0 {
289-
status := mostRecentPod.Status.ContainerStatuses[0]
290-
msg := fmt.Sprintf("Migration failed: %s", status.State.Terminated.Message)
291-
progressingCondition.Message = msg
292-
degradedCondition.Message = msg
293-
}
294-
}
295-
}
296-
}
297-
298-
err = resource.ApplyMutator(gen)
299-
if err != nil {
300-
_, _, updateError := v1helpers.UpdateStatus(
301-
ctx,
302-
c.operatorClient,
303-
v1helpers.UpdateConditionFn(progressingCondition),
304-
v1helpers.UpdateConditionFn(degradedCondition),
305-
)
306-
return utilerrors.NewAggregate([]error{err, updateError})
307253
}
308-
309-
_, _, err = v1helpers.UpdateStatus(
310-
ctx,
311-
c.operatorClient,
312-
v1helpers.UpdateConditionFn(progressingCondition),
313-
v1helpers.UpdateConditionFn(degradedCondition),
314-
)
315254
return err
316255
}
317256

pkg/resource/azurepathfixjob.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
kcorev1 "k8s.io/api/core/v1"
1212
"k8s.io/apimachinery/pkg/api/errors"
1313
"k8s.io/apimachinery/pkg/api/resource"
14-
metaapi "k8s.io/apimachinery/pkg/apis/meta/v1"
1514
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1615
"k8s.io/apimachinery/pkg/runtime"
1716
batchset "k8s.io/client-go/kubernetes/typed/batch/v1"
@@ -305,8 +304,8 @@ func (gapfj *generatorAzurePathFixJob) Update(o runtime.Object) (runtime.Object,
305304
// if we are here it means the expected container envs differed from
306305
// the actual container envs, so we recreate the job.
307306
gracePeriod := int64(0)
308-
propagationPolicy := metaapi.DeletePropagationForeground
309-
opts := metaapi.DeleteOptions{
307+
propagationPolicy := metav1.DeletePropagationForeground
308+
opts := metav1.DeleteOptions{
310309
GracePeriodSeconds: &gracePeriod,
311310
PropagationPolicy: &propagationPolicy,
312311
}

0 commit comments

Comments
 (0)