Skip to content

Commit d9ba339

Browse files
authored
Merge pull request #5278 from trozet/node_update_improvements
Some quality of life improvements for layer 3 controllers node handling
2 parents 15a2c63 + 98518ea commit d9ba339

File tree

6 files changed

+149
-318
lines changed

6 files changed

+149
-318
lines changed

go-controller/pkg/ovn/default_network_controller.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"k8s.io/client-go/util/workqueue"
1616
"k8s.io/klog/v2"
1717

18+
hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types"
1819
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
1920
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller"
2021
egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1"
@@ -762,6 +763,16 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
762763
var aggregatedErrors []error
763764
if h.oc.isLocalZoneNode(node) {
764765
var nodeParams *nodeSyncs
766+
hoNeedsCleanup := false
767+
if !config.HybridOverlay.Enabled {
768+
// check if the node has the stale annotations on it to signal that we need to clean up
769+
if _, exists := node.Annotations[hotypes.HybridOverlayDRIP]; exists {
770+
hoNeedsCleanup = true
771+
}
772+
if _, exist := node.Annotations[hotypes.HybridOverlayDRMAC]; exist {
773+
hoNeedsCleanup = true
774+
}
775+
}
765776
if fromRetryLoop {
766777
_, nodeSync := h.oc.addNodeFailed.Load(node.Name)
767778
_, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name)
@@ -774,18 +785,17 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from
774785
syncClusterRouterPort: clusterRtrSync,
775786
syncMgmtPort: mgmtSync,
776787
syncGw: gwSync,
777-
syncHo: hoSync,
788+
syncHo: hoSync || hoNeedsCleanup,
778789
syncZoneIC: zoneICSync}
779790
} else {
780791
nodeParams = &nodeSyncs{
781792
syncNode: true,
782793
syncClusterRouterPort: true,
783794
syncMgmtPort: true,
784795
syncGw: true,
785-
syncHo: config.HybridOverlay.Enabled,
796+
syncHo: config.HybridOverlay.Enabled || hoNeedsCleanup,
786797
syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect}
787798
}
788-
789799
if err = h.oc.addUpdateLocalNodeEvent(node, nodeParams); err != nil {
790800
klog.Infof("Node add failed for %s, will try again later: %v",
791801
node.Name, err)
@@ -941,6 +951,16 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
941951
_, failed = h.oc.gatewaysFailed.Load(newNode.Name)
942952
gwSync := failed || gatewayChanged(oldNode, newNode) || nodeSubnetChange ||
943953
hostCIDRsChanged(oldNode, newNode) || nodeGatewayMTUSupportChanged(oldNode, newNode)
954+
hoNeedsCleanup := false
955+
if !config.HybridOverlay.Enabled {
956+
// check if the node has the stale annotations on it to signal that we need to clean up
957+
if _, exists := newNode.Annotations[hotypes.HybridOverlayDRIP]; exists {
958+
hoNeedsCleanup = true
959+
}
960+
if _, exist := newNode.Annotations[hotypes.HybridOverlayDRMAC]; exist {
961+
hoNeedsCleanup = true
962+
}
963+
}
944964
_, hoSync := h.oc.hybridOverlayFailed.Load(newNode.Name)
945965
_, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name)
946966
syncZoneIC = syncZoneIC || zoneClusterChanged || primaryAddrChanged(oldNode, newNode)
@@ -949,12 +969,12 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
949969
syncClusterRouterPort: clusterRtrSync,
950970
syncMgmtPort: mgmtSync,
951971
syncGw: gwSync,
952-
syncHo: hoSync,
972+
syncHo: hoSync || hoNeedsCleanup,
953973
syncZoneIC: syncZoneIC,
954974
}
955975
} else {
956-
klog.Infof("Node %s moved from the remote zone %s to local zone %s.",
957-
newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode))
976+
klog.Infof("Node %s moved from the remote zone %s to local zone %s, in network: %q",
977+
newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode), h.oc.GetNetworkName())
958978
// The node is now a local zone node. Trigger a full node sync.
959979
nodeSyncsParam = &nodeSyncs{
960980
syncNode: true,
@@ -964,7 +984,6 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
964984
syncHo: true,
965985
syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect}
966986
}
967-
968987
if err := h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam); err != nil {
969988
aggregatedErrors = append(aggregatedErrors, err)
970989
}
@@ -977,8 +996,8 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
977996
syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) || nodeSubnetChange || zoneClusterChanged ||
978997
switchToOvnNode || nodeEncapIPsChanged
979998
if syncZoneIC {
980-
klog.Infof("Node %s in remote zone %s needs interconnect zone sync up. Zone cluster changed: %v",
981-
newNode.Name, util.GetNodeZone(newNode), zoneClusterChanged)
999+
klog.Infof("Node %q in remote zone %q, network %q, needs interconnect zone sync up. Zone cluster changed: %v",
1000+
newNode.Name, util.GetNodeZone(newNode), h.oc.GetNetworkName(), zoneClusterChanged)
9821001
}
9831002
if err := h.oc.addUpdateRemoteNodeEvent(newNode, syncZoneIC); err != nil {
9841003
aggregatedErrors = append(aggregatedErrors, err)

go-controller/pkg/ovn/hybrid.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,18 @@ func (oc *DefaultNetworkController) handleHybridOverlayPort(node *corev1.Node, a
137137
}
138138

139139
func (oc *DefaultNetworkController) deleteHybridOverlayPort(node *corev1.Node) error {
140-
klog.Infof("Removing node %s hybrid overlay port", node.Name)
141140
portName := util.GetHybridOverlayPortName(node.Name)
142141
lsp := nbdb.LogicalSwitchPort{Name: portName}
143-
sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(node.Name)}
144-
if err := libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, &sw, &lsp); err != nil {
145-
return err
142+
if _, err := libovsdbops.GetLogicalSwitchPort(oc.nbClient, &lsp); err != nil {
143+
if !errors.Is(err, libovsdbclient.ErrNotFound) {
144+
return fmt.Errorf("failed to get logical switch port for hybrid overlay port %s, err: %v", portName, err)
145+
}
146+
} else {
147+
sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(node.Name)}
148+
klog.Infof("Removing node %s hybrid overlay port", node.Name)
149+
if err := libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, &sw, &lsp); err != nil {
150+
return err
151+
}
146152
}
147153
if err := oc.removeHybridLRPolicySharedGW(node); err != nil {
148154
return err

go-controller/pkg/ovn/hybrid_test.go

Lines changed: 0 additions & 223 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,229 +1475,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() {
14751475
gomega.Expect(err).NotTo(gomega.HaveOccurred())
14761476
})
14771477

1478-
ginkgo.It("cleans up a Linux node when the OVN hostsubnet annotation is removed", func() {
1479-
app.Action = func(ctx *cli.Context) error {
1480-
const (
1481-
nodeHOMAC string = "0a:58:0a:01:01:03"
1482-
hoSubnet string = "11.1.0.0/16"
1483-
nodeHOIP string = "10.1.1.3"
1484-
)
1485-
node1 := tNode{
1486-
Name: "node1",
1487-
NodeIP: "1.2.3.4",
1488-
NodeLRPMAC: "0a:58:0a:01:01:01",
1489-
LrpIP: "100.64.0.2",
1490-
DrLrpIP: "100.64.0.1",
1491-
PhysicalBridgeMAC: "11:22:33:44:55:66",
1492-
SystemID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6",
1493-
NodeSubnet: "10.1.1.0/24",
1494-
GWRouter: types.GWRouterPrefix + "node1",
1495-
GatewayRouterIPMask: "172.16.16.2/24",
1496-
GatewayRouterIP: "172.16.16.2",
1497-
GatewayRouterNextHop: "172.16.16.1",
1498-
PhysicalBridgeName: "br-eth0",
1499-
NodeGWIP: "10.1.1.1/24",
1500-
NodeMgmtPortIP: "10.1.1.2",
1501-
//NodeMgmtPortMAC: "0a:58:0a:01:01:02",
1502-
NodeMgmtPortMAC: "0a:58:64:40:00:03",
1503-
DnatSnatIP: "169.254.0.1",
1504-
}
1505-
testNode := node1.k8sNode("2")
1506-
1507-
kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{
1508-
Items: []corev1.Node{testNode},
1509-
})
1510-
egressFirewallFakeClient := &egressfirewallfake.Clientset{}
1511-
egressIPFakeClient := &egressipfake.Clientset{}
1512-
egressQoSFakeClient := &egressqosfake.Clientset{}
1513-
egressServiceFakeClient := &egressservicefake.Clientset{}
1514-
fakeClient := &util.OVNMasterClientset{
1515-
KubeClient: kubeFakeClient,
1516-
EgressIPClient: egressIPFakeClient,
1517-
EgressFirewallClient: egressFirewallFakeClient,
1518-
EgressQoSClient: egressQoSFakeClient,
1519-
EgressServiceClient: egressServiceFakeClient,
1520-
}
1521-
1522-
vlanID := 1024
1523-
_, err := config.InitConfig(ctx, nil, nil)
1524-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1525-
config.Kubernetes.HostNetworkNamespace = ""
1526-
nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{KClient: kubeFakeClient}, testNode.Name)
1527-
l3Config := node1.gatewayConfig(config.GatewayModeShared, uint(vlanID))
1528-
err = util.SetL3GatewayConfig(nodeAnnotator, l3Config)
1529-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1530-
err = util.UpdateNodeManagementPortMACAddresses(&testNode, nodeAnnotator,
1531-
ovntest.MustParseMAC(node1.NodeMgmtPortMAC), types.DefaultNetworkName)
1532-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1533-
err = util.SetNodeHostSubnetAnnotation(nodeAnnotator, ovntest.MustParseIPNets(node1.NodeSubnet))
1534-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1535-
err = util.SetNodeHostCIDRs(nodeAnnotator, sets.New(fmt.Sprintf("%s/24", node1.NodeIP)))
1536-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1537-
err = nodeAnnotator.Run()
1538-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1539-
1540-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1541-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1542-
l3GatewayConfig, err := util.ParseNodeL3GatewayAnnotation(updatedNode)
1543-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1544-
hostAddrs, err := util.ParseNodeHostCIDRsDropNetMask(updatedNode)
1545-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1546-
1547-
f, err = factory.NewMasterWatchFactory(fakeClient)
1548-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1549-
err = f.Start()
1550-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1551-
1552-
expectedClusterLBGroup := newLoadBalancerGroup(types.ClusterLBGroupName)
1553-
expectedSwitchLBGroup := newLoadBalancerGroup(types.ClusterSwitchLBGroupName)
1554-
expectedRouterLBGroup := newLoadBalancerGroup(types.ClusterRouterLBGroupName)
1555-
expectedOVNClusterRouter := newOVNClusterRouter()
1556-
ovnClusterRouterLRP := &nbdb.LogicalRouterPort{
1557-
Name: types.GWRouterToJoinSwitchPrefix + types.OVNClusterRouter,
1558-
Networks: []string{"100.64.0.1/16"},
1559-
UUID: types.GWRouterToJoinSwitchPrefix + types.OVNClusterRouter + "-UUID",
1560-
}
1561-
expectedOVNClusterRouter.Ports = []string{ovnClusterRouterLRP.UUID}
1562-
expectedNodeSwitch := node1.logicalSwitch([]string{expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID})
1563-
expectedClusterRouterPortGroup := newRouterPortGroup()
1564-
expectedClusterPortGroup := newClusterPortGroup()
1565-
1566-
dbSetup := libovsdbtest.TestSetup{
1567-
NBData: []libovsdbtest.TestData{
1568-
newClusterJoinSwitch(),
1569-
expectedNodeSwitch,
1570-
ovnClusterRouterLRP,
1571-
expectedOVNClusterRouter,
1572-
expectedClusterRouterPortGroup,
1573-
expectedClusterPortGroup,
1574-
expectedClusterLBGroup,
1575-
expectedSwitchLBGroup,
1576-
expectedRouterLBGroup,
1577-
},
1578-
}
1579-
var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client
1580-
libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup)
1581-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1582-
1583-
expectedDatabaseState := []libovsdbtest.TestData{ovnClusterRouterLRP}
1584-
expectedDatabaseState = addNodeLogicalFlows(expectedDatabaseState, expectedOVNClusterRouter, expectedNodeSwitch, expectedClusterRouterPortGroup, expectedClusterPortGroup, &node1)
1585-
1586-
clusterController, err := NewOvnController(
1587-
fakeClient,
1588-
f,
1589-
stopChan,
1590-
nil,
1591-
networkmanager.Default().Interface(),
1592-
libovsdbOvnNBClient,
1593-
libovsdbOvnSBClient,
1594-
record.NewFakeRecorder(10),
1595-
wg,
1596-
nil,
1597-
NewPortCache(stopChan),
1598-
)
1599-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1600-
1601-
setupCOPP := true
1602-
setupClusterController(clusterController, setupCOPP)
1603-
1604-
//assuming all the pods have finished processing
1605-
atomic.StoreUint32(&clusterController.allInitialPodsProcessed, 1)
1606-
// Let the real code run and ensure OVN database sync
1607-
gomega.Expect(clusterController.WatchNodes()).To(gomega.Succeed())
1608-
1609-
gomega.Eventually(func() (map[string]string, error) {
1610-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1611-
if err != nil {
1612-
return nil, err
1613-
}
1614-
return updatedNode.Annotations, nil
1615-
}, 2).Should(gomega.HaveKeyWithValue(hotypes.HybridOverlayDRMAC, nodeHOMAC))
1616-
1617-
subnet := ovntest.MustParseIPNet(node1.NodeSubnet)
1618-
err = clusterController.syncDefaultGatewayLogicalNetwork(updatedNode, l3GatewayConfig, []*net.IPNet{subnet}, hostAddrs.UnsortedList())
1619-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1620-
1621-
var clusterSubnets []*net.IPNet
1622-
for _, clusterSubnet := range config.Default.ClusterSubnets {
1623-
clusterSubnets = append(clusterSubnets, clusterSubnet.CIDR)
1624-
}
1625-
1626-
skipSnat := false
1627-
expectedDatabaseState = generateGatewayInitExpectedNB(expectedDatabaseState, expectedOVNClusterRouter,
1628-
expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3Config,
1629-
[]*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, skipSnat,
1630-
node1.NodeMgmtPortIP, "1400")
1631-
1632-
hybridSubnetStaticRoute1, hybridLogicalRouterStaticRoute, hybridSubnetLRP1, hybridSubnetLRP2, hybridLogicalSwitchPort := setupHybridOverlayOVNObjects(node1, "", hoSubnet, nodeHOIP, nodeHOMAC)
1633-
1634-
var node1LogicalRouter *nbdb.LogicalRouter
1635-
var basicNode1StaticRoutes []string
1636-
1637-
for _, obj := range expectedDatabaseState {
1638-
if logicalRouter, ok := obj.(*nbdb.LogicalRouter); ok {
1639-
if logicalRouter.Name == "GR_node1" {
1640-
// keep a referance so that we can edit this object
1641-
node1LogicalRouter = logicalRouter
1642-
basicNode1StaticRoutes = logicalRouter.StaticRoutes
1643-
logicalRouter.StaticRoutes = append(logicalRouter.StaticRoutes, hybridLogicalRouterStaticRoute.UUID)
1644-
}
1645-
}
1646-
}
1647-
1648-
// keep copies of these before appending hybrid overlay elements
1649-
basicExpectedNodeSwitchPorts := expectedNodeSwitch.Ports
1650-
basicExpectedOVNClusterRouterPolicies := expectedOVNClusterRouter.Policies
1651-
basicExpectedOVNClusterStaticRoutes := expectedOVNClusterRouter.StaticRoutes
1652-
1653-
expectedNodeSwitch.Ports = append(expectedNodeSwitch.Ports, hybridLogicalSwitchPort.UUID)
1654-
expectedOVNClusterRouter.Policies = append(expectedOVNClusterRouter.Policies, hybridSubnetLRP1.UUID, hybridSubnetLRP2.UUID)
1655-
expectedOVNClusterRouter.StaticRoutes = append(expectedOVNClusterRouter.StaticRoutes, hybridSubnetStaticRoute1.UUID)
1656-
1657-
expectedDatabaseStateWithHybridNode := append([]libovsdbtest.TestData{hybridSubnetStaticRoute1, hybridSubnetLRP2, hybridSubnetLRP1, hybridLogicalSwitchPort, hybridLogicalRouterStaticRoute}, expectedDatabaseState...)
1658-
expectedStaticMACBinding := &nbdb.StaticMACBinding{
1659-
UUID: "MAC-binding-HO-UUID",
1660-
IP: nodeHOIP,
1661-
LogicalPort: "rtos-node1",
1662-
MAC: nodeHOMAC,
1663-
OverrideDynamicMAC: true,
1664-
}
1665-
expectedDatabaseStateWithHybridNode = append(expectedDatabaseStateWithHybridNode, expectedStaticMACBinding)
1666-
gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithHybridNode))
1667-
1668-
nodeAnnotator = kube.NewNodeAnnotator(&kube.Kube{KClient: kubeFakeClient}, testNode.Name)
1669-
util.DeleteNodeHostSubnetAnnotation(nodeAnnotator)
1670-
err = nodeAnnotator.Run()
1671-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1672-
1673-
gomega.Eventually(func() (map[string]string, error) {
1674-
updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{})
1675-
if err != nil {
1676-
return nil, err
1677-
}
1678-
return updatedNode.Annotations, nil
1679-
}, 5).ShouldNot(gomega.HaveKey(hotypes.HybridOverlayDRMAC))
1680-
1681-
// restore values from the non-hybrid versions
1682-
expectedNodeSwitch.Ports = basicExpectedNodeSwitchPorts
1683-
expectedOVNClusterRouter.Policies = basicExpectedOVNClusterRouterPolicies
1684-
expectedOVNClusterRouter.StaticRoutes = basicExpectedOVNClusterStaticRoutes
1685-
node1LogicalRouter.StaticRoutes = basicNode1StaticRoutes
1686-
1687-
gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
1688-
1689-
return nil
1690-
}
1691-
err := app.Run([]string{
1692-
app.Name,
1693-
"-cluster-subnets=" + clusterCIDR,
1694-
"-gateway-mode=shared",
1695-
"-enable-hybrid-overlay",
1696-
"-hybrid-overlay-cluster-subnets=" + hybridOverlayClusterCIDR,
1697-
})
1698-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1699-
})
1700-
17011478
ginkgo.It("cleans up a Linux node that has hybridOverlay annotations and database objects when hybrid overlay is disabled", func() {
17021479
app.Action = func(ctx *cli.Context) error {
17031480
const (

0 commit comments

Comments
 (0)