@@ -1138,6 +1138,8 @@ func (e *EgressIPController) isLocalZoneNode(node *corev1.Node) bool {
11381138type egressIPCache struct {
11391139 // egressIP name -> network name -> cache
11401140 egressIPNameToPods map [string ]map [string ]selectedPods
1141+ // egressIP name -> to assigned Node names
1142+ egressIPNameToAssignedNodes map [string ][]string
11411143 // egressLocalNodes will contain all nodes that are local
11421144 // to this zone which are serving this egressIP object..
11431145 // This will help sync SNATs
@@ -1154,7 +1156,7 @@ type egressIPCache struct {
11541156}
11551157
11561158type nodeNetworkRedirects struct {
1157- // node name -> network name -> redirect IPs
1159+ // network name -> node name -> redirect IPs
11581160 cache map [string ]map [string ]redirectIPs
11591161}
11601162
@@ -1600,21 +1602,36 @@ func (e *EgressIPController) syncPodAssignmentCache(egressIPCache egressIPCache)
16001602// It also removes stale nexthops from router policies used by EgressIPs.
16011603// Upon failure, it may be invoked multiple times in order to avoid a pod restart.
16021604func (e * EgressIPController ) syncStaleEgressReroutePolicy (cache egressIPCache ) error {
1603- for _ , networkCache := range cache .egressIPNameToPods {
1605+ for eipName , networkCache := range cache .egressIPNameToPods {
16041606 for networkName , data := range networkCache {
16051607 logicalRouterPolicyStaleNexthops := []* nbdb.LogicalRouterPolicy {}
1608+ // select LRPs scoped to the correct LRP priority, network and EIP name
16061609 p := func (item * nbdb.LogicalRouterPolicy ) bool {
16071610 if item .Priority != types .EgressIPReroutePriority || item .ExternalIDs [libovsdbops .NetworkKey .String ()] != networkName {
16081611 return false
16091612 }
1610- egressIPName , _ := getEIPLRPObjK8MetaData (item .ExternalIDs )
1611- if egressIPName == "" {
1613+ networkNodeRedirectCache , ok := cache .egressNodeRedirectsCache .cache [networkName ]
1614+ if ! ok || len (networkNodeRedirectCache ) == 0 {
1615+ klog .Infof ("syncStaleEgressReroutePolicy found invalid logical router policy (UUID: %s) because no assigned Nodes for EgressIP %s" , item .UUID , eipName )
1616+ return true
1617+ }
1618+ extractedEgressIPName , _ := getEIPLRPObjK8MetaData (item .ExternalIDs )
1619+ if extractedEgressIPName == "" {
16121620 klog .Errorf ("syncStaleEgressReroutePolicy found logical router policy (UUID: %s) with invalid meta data associated with network %s" , item .UUID , networkName )
1613- return false
1621+ return true
1622+ }
1623+ if extractedEgressIPName != eipName {
1624+ // remove if there's no reference to this EIP name
1625+ _ , ok := cache .egressIPNameToPods [extractedEgressIPName ]
1626+ return ! ok
16141627 }
16151628 splitMatch := strings .Split (item .Match , " " )
1616- logicalIP := splitMatch [len (splitMatch )- 1 ]
1617- parsedLogicalIP := net .ParseIP (logicalIP )
1629+ podIPStr := splitMatch [len (splitMatch )- 1 ]
1630+ podIP := net .ParseIP (podIPStr )
1631+ if podIP == nil {
1632+ klog .Infof ("syncStaleEgressReroutePolicy found invalid LRP with broken match with UID %q" , item .UUID )
1633+ return true
1634+ }
16181635 egressPodIPs := sets .NewString ()
16191636 // Since LRPs are created only for pods local to this zone
16201637 // we need to care about only those pods. Nexthop for them will
@@ -1624,31 +1641,24 @@ func (e *EgressIPController) syncStaleEgressReroutePolicy(cache egressIPCache) e
16241641 for _ , podIPs := range data .egressLocalPods {
16251642 egressPodIPs .Insert (podIPs .UnsortedList ()... )
16261643 }
1627- if ! egressPodIPs .Has (parsedLogicalIP .String ()) {
1628- klog .Infof ("syncStaleEgressReroutePolicy will delete %s due to no nexthop or stale logical ip: %v" , egressIPName , item )
1644+ if ! egressPodIPs .Has (podIP .String ()) {
1645+ klog .Infof ("syncStaleEgressReroutePolicy will delete %s due to no nexthop or stale logical ip: %v" , extractedEgressIPName , item )
16291646 return true
16301647 }
16311648 // Check for stale nexthops that may exist in the logical router policy and store that in logicalRouterPolicyStaleNexthops.
16321649 // Note: adding missing nexthop(s) to the logical router policy is done outside the scope of this function.
16331650 staleNextHops := []string {}
16341651 for _ , nexthop := range item .Nexthops {
1635- nodeName , ok := cache .egressIPIPToNodeCache [parsedLogicalIP .String ()]
1636- if ok {
1637- klog .Infof ("syncStaleEgressReroutePolicy will delete %s due to no node assigned to logical ip: %v" , egressIPName , item )
1638- return true
1639- }
1640- networksRedirects , ok := cache .egressNodeRedirectsCache .cache [nodeName ]
1641- if ok {
1642- klog .Infof ("syncStaleEgressReroutePolicy will delete %s due to no network in cache: %v" , egressIPName , item )
1643- return true
1644- }
1645- redirects , ok := networksRedirects [networkName ]
1646- if ! ok {
1647- klog .Infof ("syncStaleEgressReroutePolicy will delete %s due to no redirects for network in cache: %v" , egressIPName , item )
1648- return true
1652+ // ensure valid next hop by iterating through the node config
1653+ var isFound bool // isFound is true, if the next hop IP is found within the set of assigned nodes
1654+ for _ , nodeRedirect := range networkNodeRedirectCache {
1655+ if nodeRedirect .containsIP (nexthop ) {
1656+ isFound = true
1657+ break
1658+ }
16491659 }
1650- //FIXME: be more specific about which is the valid next hop instead of relying on verifying if the IP is within a valid set of IPs.
1651- if ! redirects . containsIP ( nexthop ) {
1660+ if ! isFound {
1661+ //FIXME: be more specific about which is the valid next hop instead of relying on verifying if the IP is within a valid set of IPs.
16521662 staleNextHops = append (staleNextHops , nexthop )
16531663 }
16541664 }
@@ -1907,9 +1917,12 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) {
19071917 // This will help sync SNATs
19081918 egressLocalNodesCache := sets .New [string ]()
19091919 cache .egressLocalNodesCache = egressLocalNodesCache
1910- // egressIP name -> node name
1911- egressNodesCache := make (map [string ]string , 0 )
1912- cache .egressIPIPToNodeCache = egressNodesCache
1920+ // egressIP name -> nodes where the IPs are assigned
1921+ egressIPNameNodesCache := make (map [string ][]string , 0 )
1922+ cache .egressIPNameToAssignedNodes = egressIPNameNodesCache
1923+ // egressIP IP -> node name. Assigned node for EIP.
1924+ egressIPIPNodeCache := make (map [string ]string , 0 )
1925+ cache .egressIPIPToNodeCache = egressIPIPNodeCache
19131926 cache .markCache = make (map [string ]string )
19141927 egressIPs , err := e .watchFactory .GetEgressIPs ()
19151928 if err != nil {
@@ -1922,11 +1935,18 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) {
19221935 }
19231936 cache .markCache [egressIP .Name ] = mark .String ()
19241937 egressIPsCache [egressIP .Name ] = make (map [string ]selectedPods , 0 )
1938+ egressIPNameNodesCache [egressIP .Name ] = make ([]string , 0 , len (egressIP .Status .Items ))
19251939 for _ , status := range egressIP .Status .Items {
1940+ eipIP := net .ParseIP (status .EgressIP )
1941+ if eipIP == nil {
1942+ klog .Errorf ("Failed to parse EgressIP %s IP %q from status" , egressIP .Name , status .EgressIP )
1943+ continue
1944+ }
1945+ egressIPIPNodeCache [eipIP .String ()] = status .Node
19261946 if localZoneNodes .Has (status .Node ) {
19271947 egressLocalNodesCache .Insert (status .Node )
19281948 }
1929- egressNodesCache [ status . EgressIP ] = status .Node
1949+ egressIPNameNodesCache [ egressIP . Name ] = append ( egressIPNameNodesCache [ egressIP . Name ], status .Node )
19301950 }
19311951 namespaces , err = e .watchFactory .GetNamespacesBySelector (egressIP .Spec .NamespaceSelector )
19321952 if err != nil {
0 commit comments