Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions go-controller/pkg/clustermanager/egressip_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors"
"k8s.io/apimachinery/pkg/api/meta"
)

const (
Expand Down Expand Up @@ -211,6 +212,26 @@ func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigOps(egressIPNa
if cloudPrivateIPConfig.GetDeletionTimestamp() != nil && !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() {
return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName)
}

// Handle a scenario in which the object exists in a failed state by removing it
assignedCondition := meta.FindStatusCondition(cloudPrivateIPConfig.Status.Conditions, string(ocpcloudnetworkapi.Assigned))
if assignedCondition != nil && assignedCondition.Status == metav1.ConditionFalse {
klog.Warningf("CloudPrivateIPConfig: %s is in Failed state (reason: %s), deleting to allow retry", cloudPrivateIPConfigName, assignedCondition.Message)
eIPRef := corev1.ObjectReference{
Kind: "EgressIP",
Name: egressIPName,
}
eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "CloudAssignmentRetry",
"egress IP: %s previously failed on node %s (reason: %s), will retry assignment",
egressIP, cloudPrivateIPConfig.Spec.Node, assignedCondition.Message)
if err := eIPC.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil {
return fmt.Errorf("failed to delete failed CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err)
}

// Return an error to trigger retry
return fmt.Errorf("deleted failed CloudPrivateIPConfig: %s, will retry creation in next reconciliation", cloudPrivateIPConfigName)
}

if op.toAdd == cloudPrivateIPConfig.Spec.Node {
klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node)
continue
Expand Down Expand Up @@ -1498,18 +1519,20 @@ func (eIPC *egressIPClusterController) reconcileCloudPrivateIPConfig(old, new *o
}
if new != nil {
newCloudPrivateIPConfig = new
assignedCondition := meta.FindStatusCondition(newCloudPrivateIPConfig.Status.Conditions, string(ocpcloudnetworkapi.Assigned))
// We should only proceed to setting things up for objects where the new
// object has the same .spec.node and .status.node, and assignment
// condition being true. This is how the cloud-network-config-controller
// indicates a successful cloud assignment.
shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node &&
ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned &&
corev1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == corev1.ConditionTrue
shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node && assignedCondition != nil && assignedCondition.Status == metav1.ConditionTrue
// See above explanation for the delete
shouldDelete = shouldDelete &&
(newCloudPrivateIPConfig.Status.Node == "" || newCloudPrivateIPConfig.Status.Node != oldCloudPrivateIPConfig.Status.Node) &&
ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned &&
corev1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == corev1.ConditionTrue
assignedCondition != nil && assignedCondition.Status == metav1.ConditionTrue

// Handle CloudPrivateIPConfigs that transitioned into a failed state
shouldDelete = shouldDelete || assignedCondition != nil && assignedCondition.Status == metav1.ConditionFalse

// On UPDATE we need to delete the old .status.node
if shouldDelete {
nodeToDelete = oldCloudPrivateIPConfig.Status.Node
Expand Down Expand Up @@ -1570,7 +1593,7 @@ func (eIPC *egressIPClusterController) reconcileCloudPrivateIPConfig(old, new *o
}
for _, resyncEgressIP := range resyncEgressIPs {
if err := eIPC.reconcileEgressIP(nil, resyncEgressIP); err != nil {
return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err)
return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", resyncEgressIP.Name, err)
}
}
}
Expand Down