Skip to content

Commit 8223f18

Browse files
authored
Merge pull request #5286 from trozet/optimize_egress_ip
Optimize egress ip performance with UDNs
2 parents 171e40b + 5308cbf commit 8223f18

15 files changed

+285
-61
lines changed

go-controller/pkg/kubevirt/router.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,16 @@ func EnsureLocalZonePodAddressesToNodeRoute(watchFactory *factory.WatchFactory,
9595
if config.OVNKubernetesFeature.EnableInterconnect {
9696
// NOTE: EIP & ESVC use same route and if this is already present thanks to those features,
9797
// this will be a no-op
98-
if err := libovsdbutil.CreateDefaultRouteToExternal(nbClient, types.OVNClusterRouter, types.GWRouterPrefix+pod.Spec.NodeName, clusterSubnets); err != nil {
98+
node, err := watchFactory.GetNode(pod.Spec.NodeName)
99+
if err != nil {
100+
return fmt.Errorf("failed getting to list node %q for pod %s/%s: %w", pod.Spec.NodeName, pod.Namespace, pod.Name, err)
101+
}
102+
gatewayIPs, err := util.ParseNodeGatewayRouterJoinAddrs(node, types.DefaultNetworkName)
103+
if err != nil {
104+
return fmt.Errorf("failed to get default network gateway router join IPs for node %q: %w", node.Name, err)
105+
}
106+
if err := libovsdbutil.CreateDefaultRouteToExternal(nbClient, types.OVNClusterRouter,
107+
types.GWRouterPrefix+pod.Spec.NodeName, clusterSubnets, gatewayIPs); err != nil {
99108
return err
100109
}
101110
}

go-controller/pkg/libovsdb/util/router.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
1414
libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
1515
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
16-
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
1716
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
1817
)
1918

@@ -34,11 +33,7 @@ import (
3433
// (TODO: FIXME): With this route, we are officially breaking support for IC with zones that have multiple-nodes
3534
// NOTE: This route is exactly the same as what is added by pod-live-migration feature and we keep the route exactly
3635
// same across the 3 features so that if the route already exists on the node, this is just a no-op
37-
func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, clusterSubnets []config.CIDRNetworkEntry) error {
38-
gatewayIPs, err := GetLRPAddrs(nbClient, types.GWRouterToJoinSwitchPrefix+gwRouterName)
39-
if err != nil {
40-
return fmt.Errorf("attempt at finding node gateway router %s network information failed, err: %w", gwRouterName, err)
41-
}
36+
func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, clusterSubnets []config.CIDRNetworkEntry, gatewayIPs []*net.IPNet) error {
4237
for _, clusterSubnet := range clusterSubnets {
4338
isClusterSubnetIPV6 := utilnet.IsIPv6String(clusterSubnet.CIDR.IP.String())
4439
gatewayIP, err := util.MatchFirstIPNetFamily(isClusterSubnetIPV6, gatewayIPs)

go-controller/pkg/libovsdb/util/router_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ func TestCreateDefaultRouteToExternal(t *testing.T) {
3131
gwRouterPortName := types.GWRouterToJoinSwitchPrefix + gwRouterName
3232
gwRouterIPAddressV4 := "100.64.0.3"
3333
gwRouterIPAddressV6 := "fd98::3"
34+
gwRouterIPAddressV4CIDR := fmt.Sprintf("%s/32", gwRouterIPAddressV4)
35+
gwRouterIPAddressV6CIDR := fmt.Sprintf("%s/128", gwRouterIPAddressV6)
36+
gatewayIPs := []*net.IPNet{ovntest.MustParseIPNet(gwRouterIPAddressV4CIDR), ovntest.MustParseIPNet(gwRouterIPAddressV6CIDR)}
3437
gwRouterPort := &nbdb.LogicalRouterPort{
3538
UUID: gwRouterPortName + "-uuid",
3639
Name: gwRouterPortName,
@@ -228,7 +231,7 @@ func TestCreateDefaultRouteToExternal(t *testing.T) {
228231
tc.preTestAction()
229232
}
230233

231-
if err = CreateDefaultRouteToExternal(nbClient, ovnClusterRouterName, gwRouterName, config.Default.ClusterSubnets); err != nil {
234+
if err = CreateDefaultRouteToExternal(nbClient, ovnClusterRouterName, gwRouterName, config.Default.ClusterSubnets, gatewayIPs); err != nil {
232235
t.Fatal(fmt.Errorf("failed to run CreateDefaultRouteToExternal: %v", err))
233236
}
234237

go-controller/pkg/ovn/controller/egressservice/egressservice_zone.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ type InitClusterEgressPoliciesFunc func(client libovsdbclient.Client, addressSet
5050
type EnsureNoRerouteNodePoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory,
5151
networkName, controllerName, clusterRouter string, nodeLister corelisters.NodeLister, v4, v6 bool) error
5252
type DeleteLegacyDefaultNoRerouteNodePoliciesFunc func(nbClient libovsdbclient.Client, clusterRouter, nodeName string) error
53-
type CreateDefaultRouteToExternalFunc func(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, clusterSubnets []config.CIDRNetworkEntry) error
53+
type CreateDefaultRouteToExternalFunc func(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, clusterSubnets []config.CIDRNetworkEntry, gatewayIPs []*net.IPNet) error
5454

5555
type Controller struct {
5656
// network information

go-controller/pkg/ovn/controller/egressservice/egressservice_zone_node.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,15 @@ func (c *Controller) syncNode(key string) error {
151151
return nil
152152
}
153153

154+
gatewayIPs, err := util.ParseNodeGatewayRouterJoinAddrs(n, types.DefaultNetworkName)
155+
if err != nil {
156+
return fmt.Errorf("failed to get default network gateway router join IPs for node %q: %w", n.Name, err)
157+
}
158+
154159
// At this point the node exists and is ready
155160
if config.OVNKubernetesFeature.EnableInterconnect && c.zone != types.OvnDefaultZone && c.isNodeInLocalZone(n) {
156-
if err := c.createDefaultRouteToExternalForIC(c.nbClient, c.GetNetworkScopedClusterRouterName(), c.GetNetworkScopedGWRouterName(nodeName), c.Subnets()); err != nil {
161+
if err := c.createDefaultRouteToExternalForIC(c.nbClient, c.GetNetworkScopedClusterRouterName(),
162+
c.GetNetworkScopedGWRouterName(nodeName), c.Subnets(), gatewayIPs); err != nil {
157163
return err
158164
}
159165
}

go-controller/pkg/ovn/default_network_controller.go

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ type DefaultNetworkController struct {
131131
syncZoneICFailed sync.Map
132132
syncHostNetAddrSetFailed sync.Map
133133
syncEIPNodeRerouteFailed sync.Map
134+
syncEIPNodeFailed sync.Map
134135

135136
// variable to determine if all pods present on the node during startup have been processed
136137
// updated atomically
@@ -843,8 +844,10 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
843844
h.oc.eIPC.nodeZoneState.UnlockKey(node.Name)
844845

845846
shouldSyncReroute := true
847+
shouldSyncEIPNode := true
846848
if fromRetryLoop {
847849
_, shouldSyncReroute = h.oc.syncEIPNodeRerouteFailed.Load(node.Name)
850+
_, shouldSyncEIPNode = h.oc.syncEIPNodeFailed.Load(node.Name)
848851
}
849852

850853
if shouldSyncReroute {
@@ -862,10 +865,19 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
862865
h.oc.syncEIPNodeRerouteFailed.Store(node.Name, true)
863866
return err
864867
}
868+
h.oc.syncEIPNodeRerouteFailed.Delete(node.Name)
865869
}
866-
// Add routing specific to Egress IP NOTE: GARP configuration that
867-
// Egress IP depends on is added from the gateway reconciliation logic
868-
return h.oc.eIPC.addEgressNode(node)
870+
if shouldSyncEIPNode {
871+
// Add routing specific to Egress IP NOTE: GARP configuration that
872+
// Egress IP depends on is added from the gateway reconciliation logic
873+
err := h.oc.eIPC.addEgressNode(node)
874+
if err != nil {
875+
h.oc.syncEIPNodeFailed.Store(node.Name, true)
876+
return err
877+
}
878+
h.oc.syncEIPNodeFailed.Delete(node.Name)
879+
}
880+
return nil
869881

870882
case factory.NamespaceType:
871883
ns, ok := obj.(*corev1.Namespace)
@@ -1038,24 +1050,36 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
10381050
h.oc.eIPC.nodeZoneState.Store(newNode.Name, h.oc.isLocalZoneNode(newNode))
10391051
h.oc.eIPC.nodeZoneState.UnlockKey(newNode.Name)
10401052

1041-
_, failed := h.oc.syncEIPNodeRerouteFailed.Load(newNode.Name)
1053+
_, syncEIPNodeRerouteFailed := h.oc.syncEIPNodeRerouteFailed.Load(newNode.Name)
10421054

10431055
// node moved from remote -> local or previously failed reroute config
1044-
if (!h.oc.isLocalZoneNode(oldNode) || failed) && h.oc.isLocalZoneNode(newNode) {
1056+
if (!h.oc.isLocalZoneNode(oldNode) || syncEIPNodeRerouteFailed) && h.oc.isLocalZoneNode(newNode) {
10451057
if err := h.oc.eIPC.ensureDefaultNoRerouteQoSRules(newNode.Name); err != nil {
10461058
return err
10471059
}
10481060
}
10491061
// update the nodeIP in the default-reRoute (102 priority) destination address-set
1050-
if failed || util.NodeHostCIDRsAnnotationChanged(oldNode, newNode) {
1062+
if syncEIPNodeRerouteFailed || util.NodeHostCIDRsAnnotationChanged(oldNode, newNode) {
10511063
klog.Infof("Egress IP detected IP address change for node %s. Updating no re-route policies", newNode.Name)
10521064
err := h.oc.eIPC.ensureDefaultNoRerouteNodePolicies()
10531065
if err != nil {
1066+
h.oc.syncEIPNodeRerouteFailed.Store(newNode.Name, true)
10541067
return err
10551068
}
1069+
h.oc.syncEIPNodeRerouteFailed.Delete(newNode.Name)
10561070
}
1057-
h.oc.syncEIPNodeRerouteFailed.Delete(newNode.Name)
1058-
return h.oc.eIPC.addEgressNode(newNode)
1071+
1072+
_, syncEIPNodeFailed := h.oc.syncEIPNodeFailed.Load(newNode.Name)
1073+
// update only if the GR join IP changed for default network
1074+
if syncEIPNodeFailed || joinCIDRChanged(oldNode, newNode, h.oc.GetNetworkName()) {
1075+
err := h.oc.eIPC.addEgressNode(newNode)
1076+
if err != nil {
1077+
h.oc.syncEIPNodeFailed.Store(newNode.Name, true)
1078+
return err
1079+
}
1080+
h.oc.syncEIPNodeFailed.Delete(newNode.Name)
1081+
}
1082+
return nil
10591083

10601084
case factory.NamespaceType:
10611085
oldNs, newNs := oldObj.(*corev1.Namespace), newObj.(*corev1.Namespace)
@@ -1118,6 +1142,7 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int
11181142
h.oc.eIPC.nodeZoneState.Delete(node.Name)
11191143
h.oc.eIPC.nodeZoneState.UnlockKey(node.Name)
11201144
h.oc.syncEIPNodeRerouteFailed.Delete(node.Name)
1145+
h.oc.syncEIPNodeFailed.Delete(node.Name)
11211146
return nil
11221147

11231148
case factory.NamespaceType:

go-controller/pkg/ovn/egressip.go

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,28 +2050,14 @@ func (e *EgressIPController) addEgressNode(node *corev1.Node) error {
20502050
// NOTE3: When the node gets deleted we do not remove this route intentionally because
20512051
// on IC if the node is gone, then the ovn_cluster_router is also gone along with all
20522052
// the routes on it.
2053-
processNetworkFn := func(ni util.NetInfo) error {
2054-
if ni.TopologyType() == types.Layer2Topology || len(ni.Subnets()) == 0 {
2055-
return nil
2056-
}
2057-
if err := libovsdbutil.CreateDefaultRouteToExternal(e.nbClient, ni.GetNetworkScopedClusterRouterName(),
2058-
ni.GetNetworkScopedGWRouterName(node.Name), ni.Subnets()); err != nil {
2059-
return fmt.Errorf("failed to create route to external for network %s: %v", ni.GetNetworkName(), err)
2060-
}
2061-
return nil
2062-
}
20632053
ni := e.networkManager.GetNetwork(types.DefaultNetworkName)
2064-
if ni == nil {
2065-
return fmt.Errorf("failed to get default network from NAD controller")
2066-
}
2067-
if err := processNetworkFn(ni); err != nil {
2068-
return fmt.Errorf("failed to process default network: %v", err)
2069-
}
2070-
if !isEgressIPForUDNSupported() {
2071-
return nil
2054+
gatewayIPs, err := util.ParseNodeGatewayRouterJoinAddrs(node, types.DefaultNetworkName)
2055+
if err != nil {
2056+
return fmt.Errorf("failed to get default network gateway router join IPs for node %q: %w", node.Name, err)
20722057
}
2073-
if err := e.networkManager.DoWithLock(processNetworkFn); err != nil {
2074-
return fmt.Errorf("failed to process all user defined networks route to external: %v", err)
2058+
if err := libovsdbutil.CreateDefaultRouteToExternal(e.nbClient, ni.GetNetworkScopedClusterRouterName(),
2059+
ni.GetNetworkScopedGWRouterName(node.Name), ni.Subnets(), gatewayIPs); err != nil {
2060+
return fmt.Errorf("failed to create route to external for network %s: %v", ni.GetNetworkName(), err)
20752061
}
20762062
}
20772063
}
@@ -3139,7 +3125,7 @@ func createDefaultNoRerouteServicePolicies(nbClient libovsdbclient.Client, netwo
31393125
return nil
31403126
}
31413127

3142-
func (e *EgressIPController) ensureRouterPoliciesForNetwork(ni util.NetInfo) error {
3128+
func (e *EgressIPController) ensureRouterPoliciesForNetwork(ni util.NetInfo, node *corev1.Node) error {
31433129
e.nodeUpdateMutex.Lock()
31443130
defer e.nodeUpdateMutex.Unlock()
31453131
subnetEntries := ni.Subnets()
@@ -3164,8 +3150,12 @@ func (e *EgressIPController) ensureRouterPoliciesForNetwork(ni util.NetInfo) err
31643150
return fmt.Errorf("failed to ensure no reroute node policies for network %s: %v", ni.GetNetworkName(), err)
31653151
}
31663152
if config.OVNKubernetesFeature.EnableInterconnect && ni.TopologyType() == types.Layer3Topology {
3153+
gatewayIPs, err := util.ParseNodeGatewayRouterJoinAddrs(node, ni.GetNetworkName())
3154+
if err != nil {
3155+
return fmt.Errorf("failed to get %q network gateway router join IPs for node %q, err: %w", ni.GetNetworkName(), node.Name, err)
3156+
}
31673157
if err := libovsdbutil.CreateDefaultRouteToExternal(e.nbClient, routerName,
3168-
ni.GetNetworkScopedGWRouterName(localNode), subnetEntries); err != nil {
3158+
ni.GetNetworkScopedGWRouterName(localNode), subnetEntries, gatewayIPs); err != nil {
31693159
return fmt.Errorf("failed to create route to external for network %s: %v", ni.GetNetworkName(), err)
31703160
}
31713161
}

go-controller/pkg/ovn/egressip_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5418,6 +5418,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network"
54185418
"k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"100.88.0.2/16\"}", // used only for ic=true test
54195419
"k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test
54205420
util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR),
5421+
util.OVNNodeGRLRPAddrs: fmt.Sprintf(`{"default":{"ipv4":"%s/16"}}`, nodeLogicalRouterIPv4[0]),
54215422
}
54225423
if node1Zone != "global" {
54235424
annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test
@@ -5432,6 +5433,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network"
54325433
"k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"100.88.0.3/16\"}", // used only for ic=true test
54335434
"k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test
54345435
util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR),
5436+
util.OVNNodeGRLRPAddrs: fmt.Sprintf(`{"default":{"ipv4":"%s/16"}}`, node2LogicalRouterIPv4[0]),
54355437
}
54365438
if node2Zone != "global" {
54375439
annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test

go-controller/pkg/ovn/egressip_udn_l2_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol
304304
fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false)
305305
fakeOvn.controller.eIPC.zone = node1.Name
306306
fakeOvn.controller.zone = node1.Name
307-
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo)
307+
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo, &node1)
308308
gomega.Expect(err).NotTo(gomega.HaveOccurred())
309309
err = fakeOvn.eIPController.ensureSwitchPoliciesForNode(netInfo, node1Name)
310310
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -670,7 +670,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol
670670
gomega.Expect(err).NotTo(gomega.HaveOccurred())
671671
defer fakeOvn.networkManager.Stop()
672672
// simulate Start() of secondary network controller
673-
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(secConInfo.bnc.GetNetInfo())
673+
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(secConInfo.bnc.GetNetInfo(), &node1)
674674
gomega.Expect(err).NotTo(gomega.HaveOccurred())
675675
err = fakeOvn.eIPController.ensureSwitchPoliciesForNode(secConInfo.bnc.GetNetInfo(), node1Name)
676676
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -1662,7 +1662,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol
16621662
defer fakeOvn.networkManager.Stop()
16631663
err = fakeOvn.controller.WatchEgressNodes()
16641664
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1665-
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo)
1665+
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo, &node1)
16661666
gomega.Expect(err).NotTo(gomega.HaveOccurred())
16671667
err = fakeOvn.eIPController.ensureSwitchPoliciesForNode(netInfo, node1Name)
16681668
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -2026,7 +2026,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol
20262026
err = fakeOvn.networkManager.Start()
20272027
gomega.Expect(err).NotTo(gomega.HaveOccurred())
20282028
defer fakeOvn.networkManager.Stop()
2029-
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo)
2029+
err = fakeOvn.eIPController.ensureRouterPoliciesForNetwork(netInfo, &node1)
20302030
gomega.Expect(err).NotTo(gomega.HaveOccurred())
20312031
err = fakeOvn.eIPController.ensureSwitchPoliciesForNode(netInfo, node1Name)
20322032
gomega.Expect(err).NotTo(gomega.HaveOccurred())

0 commit comments

Comments
 (0)