Skip to content

Commit 9b56178

Browse files
authored
Merge pull request #5307 from aserdean/switch_dpu_annotation
gateway: Refactor gateway initialization and DPU host handling
2 parents 5eceb7d + 5b5bc06 commit 9b56178

File tree

8 files changed

+161
-52
lines changed

8 files changed

+161
-52
lines changed

go-controller/pkg/node/bridgeconfig/bridgeconfig.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,15 @@ func (b *BridgeConfiguration) UpdateInterfaceIPAddresses(node *corev1.Node) ([]*
244244
// For DPU, here we need to use the DPU host's IP address which is the tenant cluster's
245245
// host internal IP address instead of the DPU's external bridge IP address.
246246
if config.OvnKubeNode.Mode == types.NodeModeDPU {
247-
nodeAddrStr, err := util.GetNodePrimaryIP(node)
247+
nodeIfAddr, err := util.GetNodePrimaryDPUHostAddrAnnotation(node)
248248
if err != nil {
249249
return nil, err
250250
}
251-
nodeAddr := net.ParseIP(nodeAddrStr)
252-
if nodeAddr == nil {
251+
// For DPU mode, we only support IPv4 for now.
252+
nodeAddrStr := nodeIfAddr.IPv4
253+
254+
nodeAddr, _, err := net.ParseCIDR(nodeAddrStr)
255+
if err != nil {
253256
return nil, fmt.Errorf("failed to parse node IP address. %v", nodeAddrStr)
254257
}
255258
ifAddrs, err = nodeutil.GetDPUHostPrimaryIPAddresses(nodeAddr, ifAddrs)

go-controller/pkg/node/default_node_network_controller.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -965,8 +965,12 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error {
965965

966966
// First part of gateway initialization. It will be completed by (nc *DefaultNodeNetworkController) Start()
967967
if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
968+
// IPv6 is not supported in DPU enabled nodes, error out if ovnkube is not set in IPv4 mode
969+
if config.IPv6Mode && config.OvnKubeNode.Mode == types.NodeModeDPU {
970+
return fmt.Errorf("IPv6 mode is not supported on a DPU enabled node")
971+
}
968972
// Initialize gateway for OVS internal port or representor management port
969-
gw, err := nc.initGatewayPreStart(subnets, nodeAnnotator, nc.mgmtPortController, nodeAddr)
973+
gw, err := nc.initGatewayPreStart(subnets, nodeAnnotator, nc.mgmtPortController)
970974
if err != nil {
971975
return err
972976
}
@@ -1059,7 +1063,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
10591063
netdevName = netdevs[0]
10601064
config.Gateway.Interface = netdevName
10611065
}
1062-
err = nc.initGatewayDPUHost(nc.nodeAddress)
1066+
err = nc.initGatewayDPUHost(nc.nodeAddress, nodeAnnotator)
10631067
if err != nil {
10641068
return err
10651069
}

go-controller/pkg/node/gateway_init.go

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99

1010
"github.com/vishvananda/netlink"
1111

12+
corev1 "k8s.io/api/core/v1"
13+
"k8s.io/apimachinery/pkg/util/sets"
1214
"k8s.io/klog/v2"
1315
utilnet "k8s.io/utils/net"
1416

@@ -195,7 +197,6 @@ func (nc *DefaultNodeNetworkController) initGatewayPreStart(
195197
subnets []*net.IPNet,
196198
nodeAnnotator kube.Annotator,
197199
mgmtPort managementport.Interface,
198-
kubeNodeIP net.IP,
199200
) (*gateway, error) {
200201

201202
klog.Info("Initializing Gateway Functionality for Gateway PreStart")
@@ -219,13 +220,39 @@ func (nc *DefaultNodeNetworkController) initGatewayPreStart(
219220
return nil, err
220221
}
221222

222-
// For DPU need to use the host IP addr which currently is assumed to be K8s Node cluster
223-
// internal IP address.
223+
// For DPU mode, we need to use the host IP address which is stored as a Kubernetes
224+
// node annotation rather than using the gateway interface IP addresses.
224225
if config.OvnKubeNode.Mode == types.NodeModeDPU {
225-
ifAddrs, err = nodeutil.GetDPUHostPrimaryIPAddresses(kubeNodeIP, ifAddrs)
226+
// Retrieve the current node object from the Kubernetes API
227+
var node *corev1.Node
228+
if node, err = nc.watchFactory.GetNode(nc.name); err != nil {
229+
return nil, fmt.Errorf("error retrieving node %s: %v", nc.name, err)
230+
}
231+
232+
// Extract the primary DPU address annotation from the node
233+
nodeIfAddr, err := util.GetNodePrimaryDPUHostAddrAnnotation(node)
226234
if err != nil {
227235
return nil, err
228236
}
237+
// For DPU mode, we only support IPv4 for now.
238+
nodeAddrStr := nodeIfAddr.IPv4
239+
if nodeAddrStr == "" {
240+
return nil, fmt.Errorf("node primary DPU address annotation is empty for node %s", nc.name)
241+
}
242+
243+
// Parse the IPv4 address string into IP and network components
244+
nodeIP, nodeAddrs, err := net.ParseCIDR(nodeAddrStr)
245+
if err != nil {
246+
return nil, fmt.Errorf("failed to parse node IP address %s: %v", nodeAddrStr, err)
247+
}
248+
249+
// Set the parsed IP as the network address
250+
nodeAddrs.IP = nodeIP
251+
252+
// Create a new slice and replace ifAddrs with the DPU host address
253+
// This overrides the gateway interface addresses for DPU mode
254+
var gwIps []*net.IPNet
255+
ifAddrs = append(gwIps, nodeAddrs)
229256
}
230257

231258
if err := util.SetNodePrimaryIfAddrs(nodeAnnotator, ifAddrs); err != nil {
@@ -359,43 +386,79 @@ func interfaceForEXGW(intfName string) string {
359386
return intfName
360387
}
361388

362-
func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) error {
389+
func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP, nodeAnnotator kube.Annotator) error {
363390
// A DPU host gateway is complementary to the shared gateway running
364391
// on the DPU embedded CPU. it performs some initializations and
365392
// watch on services for iptable rule updates and run a loadBalancerHealth checker
366393
// Note: all K8s Node related annotations are handled from DPU.
367394
klog.Info("Initializing Shared Gateway Functionality on DPU host")
368395
var err error
369396

370-
// Force gateway interface to be the interface associated with kubeNodeIP
371-
gwIntf, err := getInterfaceByIP(kubeNodeIP)
397+
// Find the network interface that has the Kubernetes node IP assigned to it
398+
// This interface will be used for DPU host gateway operations
399+
kubeIntf, err := getInterfaceByIP(kubeNodeIP)
372400
if err != nil {
373401
return err
374402
}
375-
config.Gateway.Interface = gwIntf
376403

377-
_, gatewayIntf, err := getGatewayNextHops()
404+
// Get all IP addresses (IPv4 and IPv6) configured on the detected interface
405+
ifAddrs, err := nodeutil.GetNetworkInterfaceIPAddresses(kubeIntf)
378406
if err != nil {
379407
return err
380408
}
381409

382-
ifAddrs, err := nodeutil.GetNetworkInterfaceIPAddresses(gatewayIntf)
383-
if err != nil {
410+
// Extract the IPv4 address from the interface addresses for node annotation
411+
nodeIPNet, _ := util.MatchFirstIPNetFamily(false, ifAddrs)
412+
nodeAddrSet := sets.New[string](nodeIPNet.String())
413+
414+
// If no gateway interface is explicitly configured, use the detected interface
415+
if config.Gateway.Interface == "" {
416+
config.Gateway.Interface = kubeIntf
417+
}
418+
419+
// If a different gateway interface is configured than the one with used for the kubernetes node IP,
420+
// get its addresses and add them to the node address set for routing purposes
421+
if config.Gateway.Interface != kubeIntf {
422+
ifAddrs, err = nodeutil.GetNetworkInterfaceIPAddresses(config.Gateway.Interface)
423+
if err != nil {
424+
return err
425+
}
426+
detectedIPNetv4, _ := util.MatchFirstIPNetFamily(false, ifAddrs)
427+
nodeAddrSet.Insert(detectedIPNetv4.String())
428+
// Use the configured interface for the masquerade route instead of the auto-detected one
429+
kubeIntf = config.Gateway.Interface
430+
}
431+
432+
// Set the primary DPU address annotation on the node with the interface addresses
433+
if err := util.SetNodePrimaryDPUHostAddr(nodeAnnotator, ifAddrs); err != nil {
434+
klog.Errorf("Unable to set primary IP net label on node, err: %v", err)
435+
return err
436+
}
437+
438+
// Set the host CIDRs annotation to include all detected network addresses
439+
// This helps with routing decisions for traffic coming from the host
440+
if err := util.SetNodeHostCIDRs(nodeAnnotator, nodeAddrSet); err != nil {
441+
klog.Errorf("Unable to set host-cidrs on node, err: %v", err)
384442
return err
385443
}
386444

445+
// Apply all node annotations to the Kubernetes node object
446+
if err := nodeAnnotator.Run(); err != nil {
447+
return fmt.Errorf("failed to set node %s annotations: %w", nc.name, err)
448+
}
449+
387450
// Delete stale masquerade resources if there are any. This is to make sure that there
388451
// are no Linux resources with IP from old masquerade subnet when masquerade subnet
389452
// gets changed as part of day2 operation.
390-
if err := deleteStaleMasqueradeResources(gwIntf, nc.name, nc.watchFactory); err != nil {
453+
if err := deleteStaleMasqueradeResources(kubeIntf, nc.name, nc.watchFactory); err != nil {
391454
return fmt.Errorf("failed to remove stale masquerade resources: %w", err)
392455
}
393456

394-
if err := setNodeMasqueradeIPOnExtBridge(gwIntf); err != nil {
395-
return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwIntf, err)
457+
if err := setNodeMasqueradeIPOnExtBridge(kubeIntf); err != nil {
458+
return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", kubeIntf, err)
396459
}
397460

398-
if err := addMasqueradeRoute(nc.routeManager, gwIntf, nc.name, ifAddrs, nc.watchFactory); err != nil {
461+
if err := addMasqueradeRoute(nc.routeManager, kubeIntf, nc.name, ifAddrs, nc.watchFactory); err != nil {
399462
return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err)
400463
}
401464

@@ -404,7 +467,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er
404467
return fmt.Errorf("failed to update masquerade subnet annotation on node: %s, error: %v", nc.name, err)
405468
}
406469

407-
err = configureSvcRouteViaInterface(nc.routeManager, gatewayIntf, DummyNextHopIPs())
470+
err = configureSvcRouteViaInterface(nc.routeManager, config.Gateway.Interface, DummyNextHopIPs())
408471
if err != nil {
409472
return err
410473
}
@@ -430,7 +493,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er
430493
gw.portClaimWatcher = portClaimWatcher
431494
}
432495

433-
if err := addHostMACBindings(gwIntf); err != nil {
496+
if err := addHostMACBindings(kubeIntf); err != nil {
434497
return fmt.Errorf("failed to add MAC bindings for service routing")
435498
}
436499

go-controller/pkg/node/gateway_init_linux_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,9 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS,
725725
k := &kube.Kube{KClient: kubeFakeClient}
726726

727727
nodeAnnotator := kube.NewNodeAnnotator(k, existingNode.Name)
728+
err = util.SetNodePrimaryDPUHostAddr(nodeAnnotator, ovntest.MustParseIPNets(nodeSubnet))
729+
config.Gateway.RouterSubnet = nodeSubnet
730+
Expect(err).NotTo(HaveOccurred())
728731

729732
err = util.SetNodeHostSubnetAnnotation(nodeAnnotator, ovntest.MustParseIPNets(nodeSubnet))
730733
Expect(err).NotTo(HaveOccurred())
@@ -893,8 +896,11 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName,
893896

894897
err = testNS.Do(func(ns.NetNS) error {
895898
defer GinkgoRecover()
899+
k := &kube.Kube{KClient: kubeFakeClient}
900+
901+
nodeAnnotator := kube.NewNodeAnnotator(k, existingNode.Name)
896902

897-
err := nc.initGatewayDPUHost(net.ParseIP(hostIP))
903+
err := nc.initGatewayDPUHost(net.ParseIP(hostIP), nodeAnnotator)
898904
Expect(err).NotTo(HaveOccurred())
899905

900906
link, err := netlink.LinkByName(uplinkName)

go-controller/pkg/node/node_ip_handler_linux.go

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -65,27 +65,11 @@ func newAddressManagerInternal(nodeName string, k kube.Interface, mgmtPort manag
6565
}
6666
mgr.nodeAnnotator = kube.NewNodeAnnotator(k, nodeName)
6767
if config.OvnKubeNode.Mode == types.NodeModeDPU {
68-
var ifAddrs []*net.IPNet
69-
70-
// update k8s.ovn.org/host-cidrs
71-
node, err := watchFactory.GetNode(nodeName)
72-
if err != nil {
73-
klog.Errorf("Failed to get node %s: %v", nodeName, err)
74-
return nil
75-
}
76-
if useNetlink {
77-
// get updated interface IP addresses for the gateway bridge
78-
ifAddrs, err = gwBridge.UpdateInterfaceIPAddresses(node)
79-
if err != nil {
80-
klog.Errorf("Failed to obtain interface IP addresses for node %s: %v", nodeName, err)
81-
return nil
82-
}
83-
}
84-
if err = mgr.updateHostCIDRs(ifAddrs); err != nil {
68+
if err := mgr.updateHostCIDRs(); err != nil {
8569
klog.Errorf("Failed to update host-cidrs annotations on node %s: %v", nodeName, err)
8670
return nil
8771
}
88-
if err = mgr.nodeAnnotator.Run(); err != nil {
72+
if err := mgr.nodeAnnotator.Run(); err != nil {
8973
klog.Errorf("Failed to set host-cidrs annotations on node %s: %v", nodeName, err)
9074
return nil
9175
}
@@ -286,7 +270,7 @@ func (c *addressManager) updateNodeAddressAnnotations() error {
286270
}
287271

288272
// update k8s.ovn.org/host-cidrs
289-
if err = c.updateHostCIDRs(ifAddrs); err != nil {
273+
if err = c.updateHostCIDRs(); err != nil {
290274
return err
291275
}
292276

@@ -316,14 +300,10 @@ func (c *addressManager) updateNodeAddressAnnotations() error {
316300
return nil
317301
}
318302

319-
func (c *addressManager) updateHostCIDRs(ifAddrs []*net.IPNet) error {
303+
func (c *addressManager) updateHostCIDRs() error {
320304
if config.OvnKubeNode.Mode == types.NodeModeDPU {
321-
// For DPU mode, here we need to use the DPU host's IP address which is the tenant cluster's
322-
// host internal IP address instead.
323-
// Currently we are only intentionally supporting IPv4 for DPU here.
324-
nodeIPNetv4, _ := util.MatchFirstIPNetFamily(false, ifAddrs)
325-
nodeAddrSet := sets.New[string](nodeIPNetv4.String())
326-
return util.SetNodeHostCIDRs(c.nodeAnnotator, nodeAddrSet)
305+
// For DPU mode, we don't need to update the host-cidrs annotation.
306+
return nil
327307
}
328308

329309
return util.SetNodeHostCIDRs(c.nodeAnnotator, c.cidrs)

go-controller/pkg/ovn/controller/services/lb_config.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,16 @@ func makeNodeRouterTargetIPs(node *nodeInfo, c *lbConfig, hostMasqueradeIPV4, ho
9191
targetIPsV6 = localIPsV6
9292
}
9393

94+
// TODO: For all scenarios the lbAddress should be set to hostAddressesStr but this is breaking CI needs more investigation
95+
lbAddresses := node.hostAddressesStr()
96+
if config.OvnKubeNode.Mode == types.NodeModeFull {
97+
lbAddresses = node.l3gatewayAddressesStr()
98+
}
99+
94100
// Any targets local to the node need to have a special
95101
// harpin IP added, but only for the router LB
96-
targetIPsV4, v4Updated := util.UpdateIPsSlice(targetIPsV4, node.l3gatewayAddressesStr(), []string{hostMasqueradeIPV4})
97-
targetIPsV6, v6Updated := util.UpdateIPsSlice(targetIPsV6, node.l3gatewayAddressesStr(), []string{hostMasqueradeIPV6})
102+
targetIPsV4, v4Updated := util.UpdateIPsSlice(targetIPsV4, lbAddresses, []string{hostMasqueradeIPV4})
103+
targetIPsV6, v6Updated := util.UpdateIPsSlice(targetIPsV6, lbAddresses, []string{hostMasqueradeIPV6})
98104

99105
// Local endpoints are a subset of cluster endpoints, so it is enough to compare their length
100106
v4Changed = len(targetIPsV4) != len(c.clusterEndpoints.V4IPs) || v4Updated

go-controller/pkg/ovn/default_network_controller.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,7 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
949949
zoneClusterChanged := h.oc.nodeZoneClusterChanged(oldNode, newNode, newNodeIsLocalZoneNode, types.DefaultNetworkName)
950950
nodeSubnetChange := nodeSubnetChanged(oldNode, newNode, types.DefaultNetworkName)
951951
nodeEncapIPsChanged := util.NodeEncapIPsChanged(oldNode, newNode)
952+
nodePrimaryDPUHostAddrChanged := util.NodePrimaryDPUHostAddrAnnotationChanged(oldNode, newNode)
952953

953954
var aggregatedErrors []error
954955
if newNodeIsLocalZoneNode {
@@ -1006,11 +1007,18 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
10061007
// Also check if node subnet changed, so static routes are properly set
10071008
// Also check if the node is used to be a hybrid overlay node
10081009
syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) || nodeSubnetChange || zoneClusterChanged ||
1009-
switchToOvnNode || nodeEncapIPsChanged
1010+
switchToOvnNode || nodeEncapIPsChanged || nodePrimaryDPUHostAddrChanged
10101011
if syncZoneIC {
10111012
klog.Infof("Node %q in remote zone %q, network %q, needs interconnect zone sync up. Zone cluster changed: %v",
10121013
newNode.Name, util.GetNodeZone(newNode), h.oc.GetNetworkName(), zoneClusterChanged)
10131014
}
1015+
// Reprovisioning the DPU (including OVS), which is pinned to a host, will change the system ID but not the node.
1016+
if config.OvnKubeNode.Mode == types.NodeModeDPU && nodeChassisChanged(oldNode, newNode) {
1017+
if err := h.oc.zoneChassisHandler.DeleteRemoteZoneNode(oldNode); err != nil {
1018+
aggregatedErrors = append(aggregatedErrors, err)
1019+
}
1020+
syncZoneIC = true
1021+
}
10141022
if err := h.oc.addUpdateRemoteNodeEvent(newNode, syncZoneIC); err != nil {
10151023
aggregatedErrors = append(aggregatedErrors, err)
10161024
}

go-controller/pkg/util/node_annotations.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ const (
9797
// OVNNodeHostCIDRs is used to track the different host IP addresses and subnet masks on the node
9898
OVNNodeHostCIDRs = "k8s.ovn.org/host-cidrs"
9999

100+
// OVNNodePrimaryDPUHostAddr is used to track the primary DPU host address on the node
101+
OVNNodePrimaryDPUHostAddr = "k8s.ovn.org/primary-dpu-host-addr"
102+
100103
// OVNNodeSecondaryHostEgressIPs contains EgressIP addresses that aren't managed by OVN. The EIP addresses are assigned to
101104
// standard linux interfaces and not interfaces of type OVS.
102105
OVNNodeSecondaryHostEgressIPs = "k8s.ovn.org/secondary-host-egress-ips"
@@ -1534,3 +1537,39 @@ func ParseNodeEncapIPsAnnotation(node *corev1.Node) ([]string, error) {
15341537
func NodeEncapIPsChanged(oldNode, newNode *corev1.Node) bool {
15351538
return oldNode.Annotations[OVNNodeEncapIPs] != newNode.Annotations[OVNNodeEncapIPs]
15361539
}
1540+
1541+
// SetNodePrimaryDPUHostAddr sets the primary DPU host address annotation on a node
1542+
func SetNodePrimaryDPUHostAddr(nodeAnnotator kube.Annotator, ifAddrs []*net.IPNet) error {
1543+
nodeIPNetv4, _ := MatchFirstIPNetFamily(false, ifAddrs)
1544+
nodeIPNetv6, _ := MatchFirstIPNetFamily(true, ifAddrs)
1545+
1546+
ifAddrAnnotation := ifAddr{}
1547+
if nodeIPNetv4 != nil {
1548+
ifAddrAnnotation.IPv4 = nodeIPNetv4.String()
1549+
}
1550+
if nodeIPNetv6 != nil {
1551+
ifAddrAnnotation.IPv6 = nodeIPNetv6.String()
1552+
}
1553+
return nodeAnnotator.Set(OVNNodePrimaryDPUHostAddr, ifAddrAnnotation)
1554+
}
1555+
1556+
// NodePrimaryDPUHostAddrAnnotationChanged returns true if the primary DPU host address annotation changed
1557+
func NodePrimaryDPUHostAddrAnnotationChanged(oldNode, newNode *corev1.Node) bool {
1558+
return oldNode.Annotations[OVNNodePrimaryDPUHostAddr] != newNode.Annotations[OVNNodePrimaryDPUHostAddr]
1559+
}
1560+
1561+
// GetNodePrimaryDPUHostAddrAnnotation returns the raw primary DPU host address annotation from a node
1562+
func GetNodePrimaryDPUHostAddrAnnotation(node *corev1.Node) (*ifAddr, error) {
1563+
addrAnnotation, ok := node.Annotations[OVNNodePrimaryDPUHostAddr]
1564+
if !ok {
1565+
return nil, newAnnotationNotSetError("%s annotation not found for node %q", OVNNodePrimaryDPUHostAddr, node.Name)
1566+
}
1567+
nodeIfAddr := &ifAddr{}
1568+
if err := json.Unmarshal([]byte(addrAnnotation), nodeIfAddr); err != nil {
1569+
return nil, fmt.Errorf("failed to unmarshal annotation: %s for node %q, err: %v", OVNNodePrimaryDPUHostAddr, node.Name, err)
1570+
}
1571+
if nodeIfAddr.IPv4 == "" && nodeIfAddr.IPv6 == "" {
1572+
return nil, fmt.Errorf("node: %q does not have any IP information set", node.Name)
1573+
}
1574+
return nodeIfAddr, nil
1575+
}

0 commit comments

Comments
 (0)