Skip to content

Commit 4650923

Browse files
committed
Do not retry pods that are not scheduled
In our pod handlers we have code that checks if a pod is scheduled. If the pod is not scheduled, then we do not add the pod to our retry framework. However, there are times where we automatically enqueue all pods on a node or across the cluster. This can be from when a new node comes up (pre-IC) and need to add all pods on that node. Another use case is when a new UDN/NAD is created and the controller spins up...we add all pods then. We shouldn't be queuing pods to the retry framework that are not scheduled. It's a waste of operations. However, even if we do enqueue them, we definitely should not be treating a non-scheduled resource as an error and retrying it again later. This commit changes the retry framework to detect the above case, and log an error. It does not trigger retrying of the resource, which may perpetually fail and then then cause OVNKubernetesResourceRetryFailure. Signed-off-by: Tim Rozet <[email protected]>
1 parent 75fe04c commit 4650923

File tree

1 file changed

+14
-13
lines changed

1 file changed

+14
-13
lines changed

go-controller/pkg/retry/obj_retry.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,11 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) {
320320
}
321321
if r.ResourceHandler.NeedsUpdateDuringRetry && entry.config != nil && entry.newObj != nil {
322322
klog.Infof("%v retry: updating object %s", r.ResourceHandler.ObjType, objKey)
323-
if err := r.ResourceHandler.UpdateResource(entry.config, entry.newObj, true); err != nil {
323+
if !r.ResourceHandler.IsResourceScheduled(entry.newObj) {
324+
// unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry.
325+
// we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error.
326+
klog.Errorf("%v retry: cannot update object that is not scheduled: %s", r.ResourceHandler.ObjType, objKey)
327+
} else if err := r.ResourceHandler.UpdateResource(entry.config, entry.newObj, true); err != nil {
324328
entry.timeStamp = time.Now()
325329
entry.failedAttempts++
326330
if entry.failedAttempts >= MaxFailedAttempts {
@@ -336,14 +340,12 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) {
336340
} else {
337341
// delete old object if needed
338342
if entry.oldObj != nil {
339-
klog.Infof("Removing old object: %s %s (failed: %v)",
340-
r.ResourceHandler.ObjType, objKey, entry.failedAttempts)
343+
klog.Infof("Removing old object: %s %s (failed: %v)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts)
341344
if !r.ResourceHandler.IsResourceScheduled(entry.oldObj) {
342-
klog.V(5).Infof("Retry: %s %s not scheduled", r.ResourceHandler.ObjType, objKey)
343-
entry.failedAttempts++
344-
return
345-
}
346-
if err := r.ResourceHandler.DeleteResource(entry.oldObj, entry.config); err != nil {
345+
// unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry.
346+
// we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error.
347+
klog.Errorf("%v retry: cannot delete object that was not scheduled %s", r.ResourceHandler.ObjType, objKey)
348+
} else if err := r.ResourceHandler.DeleteResource(entry.oldObj, entry.config); err != nil {
347349
entry.timeStamp = time.Now()
348350
entry.failedAttempts++
349351
if entry.failedAttempts >= MaxFailedAttempts {
@@ -363,11 +365,10 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) {
363365
if entry.newObj != nil {
364366
klog.Infof("Adding new object: %s %s", r.ResourceHandler.ObjType, objKey)
365367
if !r.ResourceHandler.IsResourceScheduled(entry.newObj) {
366-
klog.V(5).Infof("Retry: %s %s not scheduled", r.ResourceHandler.ObjType, objKey)
367-
entry.failedAttempts++
368-
return
369-
}
370-
if err := r.ResourceHandler.AddResource(entry.newObj, true); err != nil {
368+
// unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry.
369+
// we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error.
370+
klog.Errorf("%v retry: cannot create object that is not scheduled %s", r.ResourceHandler.ObjType, objKey)
371+
} else if err := r.ResourceHandler.AddResource(entry.newObj, true); err != nil {
371372
entry.timeStamp = time.Now()
372373
entry.failedAttempts++
373374
if entry.failedAttempts >= MaxFailedAttempts {

0 commit comments

Comments
 (0)