Skip to content

Commit b56df72

Browse files
committed
Some quality of life improvements for layer 3 controllers node handling
During update node events, local and remote addOrUpdate functions are called. There are a series of sync checks used to know what to configure. However, in some cases log messages were being printed no matter what, and hybrid overlay was being processed on every node event. This cleans things up so that hybrid overlay is only sync'ed when necessary, and logs are only printed when work is being done to add the local or remote node. Also, removes an old test case for hybrid overlay where the node-subnets annotation of a node was being removed. First introduced here: ovn-kubernetes/ovn-kubernetes@aef135c#diff-9ab180ea9a39f81dc8334a00ca8ea5e4cd04f9491c27dcfd910b07929c9ddbb5R193 It's not totally clear what the purpose of this test was, but we do not support clearing OVN configuration when OVNK assigned annotations are removed by the user. The node-subnets annotation should not be removed, and if is removed, it should be configured back onto the node by cluster-manager. Signed-off-by: Tim Rozet <[email protected]>
1 parent b31c67a commit b56df72

File tree

6 files changed

+89
-261
lines changed

6 files changed

+89
-261
lines changed

go-controller/pkg/ovn/default_network_controller.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"k8s.io/client-go/util/workqueue"
1616
"k8s.io/klog/v2"
1717

18+
hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types"
1819
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
1920
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller"
2021
egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1"
@@ -762,6 +763,16 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
762763
var aggregatedErrors []error
763764
if h.oc.isLocalZoneNode(node) {
764765
var nodeParams *nodeSyncs
766+
hoNeedsCleanup := false
767+
if !config.HybridOverlay.Enabled {
768+
// check if the node has the stale annotations on it to signal that we need to clean up
769+
if _, exists := node.Annotations[hotypes.HybridOverlayDRIP]; exists {
770+
hoNeedsCleanup = true
771+
}
772+
if _, exist := node.Annotations[hotypes.HybridOverlayDRMAC]; exist {
773+
hoNeedsCleanup = true
774+
}
775+
}
765776
if fromRetryLoop {
766777
_, nodeSync := h.oc.addNodeFailed.Load(node.Name)
767778
_, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name)
@@ -774,18 +785,17 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
774785
syncClusterRouterPort: clusterRtrSync,
775786
syncMgmtPort: mgmtSync,
776787
syncGw: gwSync,
777-
syncHo: hoSync,
788+
syncHo: hoSync || hoNeedsCleanup,
778789
syncZoneIC: zoneICSync}
779790
} else {
780791
nodeParams = &nodeSyncs{
781792
syncNode: true,
782793
syncClusterRouterPort: true,
783794
syncMgmtPort: true,
784795
syncGw: true,
785-
syncHo: config.HybridOverlay.Enabled,
796+
syncHo: config.HybridOverlay.Enabled || hoNeedsCleanup,
786797
syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect}
787798
}
788-
789799
if err = h.oc.addUpdateLocalNodeEvent(node, nodeParams); err != nil {
790800
klog.Infof("Node add failed for %s, will try again later: %v",
791801
node.Name, err)
@@ -941,6 +951,16 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
941951
_, failed = h.oc.gatewaysFailed.Load(newNode.Name)
942952
gwSync := failed || gatewayChanged(oldNode, newNode) || nodeSubnetChange ||
943953
hostCIDRsChanged(oldNode, newNode) || nodeGatewayMTUSupportChanged(oldNode, newNode)
954+
hoNeedsCleanup := false
955+
if !config.HybridOverlay.Enabled {
956+
// check if the node has the stale annotations on it to signal that we need to clean up
957+
if _, exists := newNode.Annotations[hotypes.HybridOverlayDRIP]; exists {
958+
hoNeedsCleanup = true
959+
}
960+
if _, exist := newNode.Annotations[hotypes.HybridOverlayDRMAC]; exist {
961+
hoNeedsCleanup = true
962+
}
963+
}
944964
_, hoSync := h.oc.hybridOverlayFailed.Load(newNode.Name)
945965
_, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name)
946966
syncZoneIC = syncZoneIC || zoneClusterChanged || primaryAddrChanged(oldNode, newNode)
@@ -949,12 +969,12 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
949969
syncClusterRouterPort: clusterRtrSync,
950970
syncMgmtPort: mgmtSync,
951971
syncGw: gwSync,
952-
syncHo: hoSync,
972+
syncHo: hoSync || hoNeedsCleanup,
953973
syncZoneIC: syncZoneIC,
954974
}
955975
} else {
956-
klog.Infof("Node %s moved from the remote zone %s to local zone %s.",
957-
newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode))
976+
klog.Infof("Node %s moved from the remote zone %s to local zone %s, in network: %q",
977+
newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode), h.oc.GetNetworkName())
958978
// The node is now a local zone node. Trigger a full node sync.
959979
nodeSyncsParam = &nodeSyncs{
960980
syncNode: true,
@@ -964,7 +984,6 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
964984
syncHo: true,
965985
syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect}
966986
}
967-
968987
if err := h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam); err != nil {
969988
aggregatedErrors = append(aggregatedErrors, err)
970989
}
@@ -977,8 +996,8 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
977996
syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) || nodeSubnetChange || zoneClusterChanged ||
978997
switchToOvnNode || nodeEncapIPsChanged
979998
if syncZoneIC {
980-
klog.Infof("Node %s in remote zone %s needs interconnect zone sync up. Zone cluster changed: %v",
981-
newNode.Name, util.GetNodeZone(newNode), zoneClusterChanged)
999+
klog.Infof("Node %q in remote zone %q, network %q, needs interconnect zone sync up. Zone cluster changed: %v",
1000+
newNode.Name, util.GetNodeZone(newNode), h.oc.GetNetworkName(), zoneClusterChanged)
9821001
}
9831002
if err := h.oc.addUpdateRemoteNodeEvent(newNode, syncZoneIC); err != nil {
9841003
aggregatedErrors = append(aggregatedErrors, err)

go-controller/pkg/ovn/hybrid.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,18 @@ func (oc *DefaultNetworkController) handleHybridOverlayPort(node *corev1.Node, a
137137
}
138138

139139
func (oc *DefaultNetworkController) deleteHybridOverlayPort(node *corev1.Node) error {
140-
klog.Infof("Removing node %s hybrid overlay port", node.Name)
141140
portName := util.GetHybridOverlayPortName(node.Name)
142141
lsp := nbdb.LogicalSwitchPort{Name: portName}
143-
sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(node.Name)}
144-
if err := libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, &sw, &lsp); err != nil {
145-
return err
142+
if _, err := libovsdbops.GetLogicalSwitchPort(oc.nbClient, &lsp); err != nil {
143+
if !errors.Is(err, libovsdbclient.ErrNotFound) {
144+
return fmt.Errorf("failed to get logical switch port for hybrid overlay port %s, err: %v", portName, err)
145+
}
146+
} else {
147+
sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(node.Name)}
148+
klog.Infof("Removing node %s hybrid overlay port", node.Name)
149+
if err := libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, &sw, &lsp); err != nil {
150+
return err
151+
}
146152
}
147153
if err := oc.removeHybridLRPolicySharedGW(node); err != nil {
148154
return err

go-controller/pkg/ovn/hybrid_test.go

Lines changed: 0 additions & 223 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,229 +1475,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() {
14751475
gomega.Expect(err).NotTo(gomega.HaveOccurred())
14761476
})
14771477

1478-
ginkgo.It("cleans up a Linux node when the OVN hostsubnet annotation is removed", func() {
1479-
app.Action = func(ctx *cli.Context) error {
1480-
const (
1481-
nodeHOMAC string = "0a:58:0a:01:01:03"
1482-
hoSubnet string = "11.1.0.0/16"
1483-
nodeHOIP string = "10.1.1.3"
1484-
)
1485-
node1 := tNode{
1486-
Name: "node1",
1487-
NodeIP: "1.2.3.4",
1488-
NodeLRPMAC: "0a:58:0a:01:01:01",
1489-
LrpIP: "100.64.0.2",
1490-
DrLrpIP: "100.64.0.1",
1491-
PhysicalBridgeMAC: "11:22:33:44:55:66",
1492-
SystemID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6",
1493-
NodeSubnet: "10.1.1.0/24",
1494-
GWRouter: types.GWRouterPrefix + "node1",
1495-
GatewayRouterIPMask: "172.16.16.2/24",
1496-
GatewayRouterIP: "172.16.16.2",
1497-
GatewayRouterNextHop: "172.16.16.1",
1498-
PhysicalBridgeName: "br-eth0",
1499-
NodeGWIP: "10.1.1.1/24",
1500-
NodeMgmtPortIP: "10.1.1.2",
1501-
//NodeMgmtPortMAC: "0a:58:0a:01:01:02",
1502-
NodeMgmtPortMAC: "0a:58:64:40:00:03",
1503-
DnatSnatIP: "169.254.0.1",
1504-
}
1505-
testNode := node1.k8sNode("2")
1506-
1507-
kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{
1508-
Items: []corev1.Node{testNode},
1509-
})
1510-
egressFirewallFakeClient := &egressfirewallfake.Clientset{}
1511-
egressIPFakeClient := &egressipfake.Clientset{}
1512-
egressQoSFakeClient := &egressqosfake.Clientset{}
1513-
egressServiceFakeClient := &egressservicefake.Clientset{}
1514-
fakeClient := &util.OVNMasterClientset{
1515-
KubeClient: kubeFakeClient,
1516-
EgressIPClient: egressIPFakeClient,
1517-
EgressFirewallClient: egressFirewallFakeClient,
1518-
EgressQoSClient: egressQoSFakeClient,
1519-
EgressServiceClient: egressServiceFakeClient,
1520-
}
1521-
1522-
vlanID := 1024
1523-
_, err := config.InitConfig(ctx, nil, nil)
1524-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1525-
config.Kubernetes.HostNetworkNamespace = ""
1526-
nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{KClient: kubeFakeClient}, testNode.Name)
1527-
l3Config := node1.gatewayConfig(config.GatewayModeShared, uint(vlanID))
1528-
err = util.SetL3GatewayConfig(nodeAnnotator, l3Config)
1529-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1530-
err = util.UpdateNodeManagementPortMACAddresses(&testNode, nodeAnnotator,
1531-
ovntest.MustParseMAC(node1.NodeMgmtPortMAC), types.DefaultNetworkName)
1532-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1533-
err = util.SetNodeHostSubnetAnnotation(nodeAnnotator, ovntest.MustParseIPNets(node1.NodeSubnet))
1534-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1535-
err = util.SetNodeHostCIDRs(nodeAnnotator, sets.New(fmt.Sprintf("%s/24", node1.NodeIP)))
1536-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1537-
err = nodeAnnotator.Run()
1538-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1539-
1540-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1541-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1542-
l3GatewayConfig, err := util.ParseNodeL3GatewayAnnotation(updatedNode)
1543-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1544-
hostAddrs, err := util.ParseNodeHostCIDRsDropNetMask(updatedNode)
1545-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1546-
1547-
f, err = factory.NewMasterWatchFactory(fakeClient)
1548-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1549-
err = f.Start()
1550-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1551-
1552-
expectedClusterLBGroup := newLoadBalancerGroup(types.ClusterLBGroupName)
1553-
expectedSwitchLBGroup := newLoadBalancerGroup(types.ClusterSwitchLBGroupName)
1554-
expectedRouterLBGroup := newLoadBalancerGroup(types.ClusterRouterLBGroupName)
1555-
expectedOVNClusterRouter := newOVNClusterRouter()
1556-
ovnClusterRouterLRP := &nbdb.LogicalRouterPort{
1557-
Name: types.GWRouterToJoinSwitchPrefix + types.OVNClusterRouter,
1558-
Networks: []string{"100.64.0.1/16"},
1559-
UUID: types.GWRouterToJoinSwitchPrefix + types.OVNClusterRouter + "-UUID",
1560-
}
1561-
expectedOVNClusterRouter.Ports = []string{ovnClusterRouterLRP.UUID}
1562-
expectedNodeSwitch := node1.logicalSwitch([]string{expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID})
1563-
expectedClusterRouterPortGroup := newRouterPortGroup()
1564-
expectedClusterPortGroup := newClusterPortGroup()
1565-
1566-
dbSetup := libovsdbtest.TestSetup{
1567-
NBData: []libovsdbtest.TestData{
1568-
newClusterJoinSwitch(),
1569-
expectedNodeSwitch,
1570-
ovnClusterRouterLRP,
1571-
expectedOVNClusterRouter,
1572-
expectedClusterRouterPortGroup,
1573-
expectedClusterPortGroup,
1574-
expectedClusterLBGroup,
1575-
expectedSwitchLBGroup,
1576-
expectedRouterLBGroup,
1577-
},
1578-
}
1579-
var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client
1580-
libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup)
1581-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1582-
1583-
expectedDatabaseState := []libovsdbtest.TestData{ovnClusterRouterLRP}
1584-
expectedDatabaseState = addNodeLogicalFlows(expectedDatabaseState, expectedOVNClusterRouter, expectedNodeSwitch, expectedClusterRouterPortGroup, expectedClusterPortGroup, &node1)
1585-
1586-
clusterController, err := NewOvnController(
1587-
fakeClient,
1588-
f,
1589-
stopChan,
1590-
nil,
1591-
networkmanager.Default().Interface(),
1592-
libovsdbOvnNBClient,
1593-
libovsdbOvnSBClient,
1594-
record.NewFakeRecorder(10),
1595-
wg,
1596-
nil,
1597-
NewPortCache(stopChan),
1598-
)
1599-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1600-
1601-
setupCOPP := true
1602-
setupClusterController(clusterController, setupCOPP)
1603-
1604-
//assuming all the pods have finished processing
1605-
atomic.StoreUint32(&clusterController.allInitialPodsProcessed, 1)
1606-
// Let the real code run and ensure OVN database sync
1607-
gomega.Expect(clusterController.WatchNodes()).To(gomega.Succeed())
1608-
1609-
gomega.Eventually(func() (map[string]string, error) {
1610-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1611-
if err != nil {
1612-
return nil, err
1613-
}
1614-
return updatedNode.Annotations, nil
1615-
}, 2).Should(gomega.HaveKeyWithValue(hotypes.HybridOverlayDRMAC, nodeHOMAC))
1616-
1617-
subnet := ovntest.MustParseIPNet(node1.NodeSubnet)
1618-
err = clusterController.syncDefaultGatewayLogicalNetwork(updatedNode, l3GatewayConfig, []*net.IPNet{subnet}, hostAddrs.UnsortedList())
1619-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1620-
1621-
var clusterSubnets []*net.IPNet
1622-
for _, clusterSubnet := range config.Default.ClusterSubnets {
1623-
clusterSubnets = append(clusterSubnets, clusterSubnet.CIDR)
1624-
}
1625-
1626-
skipSnat := false
1627-
expectedDatabaseState = generateGatewayInitExpectedNB(expectedDatabaseState, expectedOVNClusterRouter,
1628-
expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3Config,
1629-
[]*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, skipSnat,
1630-
node1.NodeMgmtPortIP, "1400")
1631-
1632-
hybridSubnetStaticRoute1, hybridLogicalRouterStaticRoute, hybridSubnetLRP1, hybridSubnetLRP2, hybridLogicalSwitchPort := setupHybridOverlayOVNObjects(node1, "", hoSubnet, nodeHOIP, nodeHOMAC)
1633-
1634-
var node1LogicalRouter *nbdb.LogicalRouter
1635-
var basicNode1StaticRoutes []string
1636-
1637-
for _, obj := range expectedDatabaseState {
1638-
if logicalRouter, ok := obj.(*nbdb.LogicalRouter); ok {
1639-
if logicalRouter.Name == "GR_node1" {
1640-
// keep a referance so that we can edit this object
1641-
node1LogicalRouter = logicalRouter
1642-
basicNode1StaticRoutes = logicalRouter.StaticRoutes
1643-
logicalRouter.StaticRoutes = append(logicalRouter.StaticRoutes, hybridLogicalRouterStaticRoute.UUID)
1644-
}
1645-
}
1646-
}
1647-
1648-
// keep copies of these before appending hybrid overlay elements
1649-
basicExpectedNodeSwitchPorts := expectedNodeSwitch.Ports
1650-
basicExpectedOVNClusterRouterPolicies := expectedOVNClusterRouter.Policies
1651-
basicExpectedOVNClusterStaticRoutes := expectedOVNClusterRouter.StaticRoutes
1652-
1653-
expectedNodeSwitch.Ports = append(expectedNodeSwitch.Ports, hybridLogicalSwitchPort.UUID)
1654-
expectedOVNClusterRouter.Policies = append(expectedOVNClusterRouter.Policies, hybridSubnetLRP1.UUID, hybridSubnetLRP2.UUID)
1655-
expectedOVNClusterRouter.StaticRoutes = append(expectedOVNClusterRouter.StaticRoutes, hybridSubnetStaticRoute1.UUID)
1656-
1657-
expectedDatabaseStateWithHybridNode := append([]libovsdbtest.TestData{hybridSubnetStaticRoute1, hybridSubnetLRP2, hybridSubnetLRP1, hybridLogicalSwitchPort, hybridLogicalRouterStaticRoute}, expectedDatabaseState...)
1658-
expectedStaticMACBinding := &nbdb.StaticMACBinding{
1659-
UUID: "MAC-binding-HO-UUID",
1660-
IP: nodeHOIP,
1661-
LogicalPort: "rtos-node1",
1662-
MAC: nodeHOMAC,
1663-
OverrideDynamicMAC: true,
1664-
}
1665-
expectedDatabaseStateWithHybridNode = append(expectedDatabaseStateWithHybridNode, expectedStaticMACBinding)
1666-
gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithHybridNode))
1667-
1668-
nodeAnnotator = kube.NewNodeAnnotator(&kube.Kube{KClient: kubeFakeClient}, testNode.Name)
1669-
util.DeleteNodeHostSubnetAnnotation(nodeAnnotator)
1670-
err = nodeAnnotator.Run()
1671-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1672-
1673-
gomega.Eventually(func() (map[string]string, error) {
1674-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1675-
if err != nil {
1676-
return nil, err
1677-
}
1678-
return updatedNode.Annotations, nil
1679-
}, 5).ShouldNot(gomega.HaveKey(hotypes.HybridOverlayDRMAC))
1680-
1681-
// restore values from the non-hybrid versions
1682-
expectedNodeSwitch.Ports = basicExpectedNodeSwitchPorts
1683-
expectedOVNClusterRouter.Policies = basicExpectedOVNClusterRouterPolicies
1684-
expectedOVNClusterRouter.StaticRoutes = basicExpectedOVNClusterStaticRoutes
1685-
node1LogicalRouter.StaticRoutes = basicNode1StaticRoutes
1686-
1687-
gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
1688-
1689-
return nil
1690-
}
1691-
err := app.Run([]string{
1692-
app.Name,
1693-
"-cluster-subnets=" + clusterCIDR,
1694-
"-gateway-mode=shared",
1695-
"-enable-hybrid-overlay",
1696-
"-hybrid-overlay-cluster-subnets=" + hybridOverlayClusterCIDR,
1697-
})
1698-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1699-
})
1700-
17011478
ginkgo.It("cleans up a Linux node that has hybridOverlay annotations and database objects when hybrid overlay is disabled", func() {
17021479
app.Action = func(ctx *cli.Context) error {
17031480
const (

0 commit comments

Comments
 (0)