From 13fb9bf54f97136e6eb5bba19240ce58dca2bf94 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 30 Apr 2025 08:27:08 +0000 Subject: [PATCH] Fix hybrid overlay node subnets collision with cluster subnets When hybrid overlay clustersubnets is undefined, the hybrid overlay node subnet allocation is not managed by ovn-k. But we need to ensures hybrid overlay node subnets are properly marked as allocated in the cluster subnet allocator when they overlap with the cluster subnet ranges, preventing potential subnet allocation conflicts. The fix helps maintain proper subnet allocation tracking when hybrid overlay nodes use subnets that fall within the cluster's subnet ranges. Signed-off-by: Peng Liu (cherry picked from commit 192f575d351acc9bc049db213bf65f25a315e2c6) --- .../pkg/clustermanager/node/node_allocator.go | 58 +++++++- .../node/node_allocator_test.go | 86 +++++++++++- .../clustermanager/node/subnet_allocator.go | 124 +++++++++++++----- .../node/subnet_allocator_test.go | 44 +++++++ go-controller/pkg/util/net.go | 11 ++ 5 files changed, 282 insertions(+), 41 deletions(-) diff --git a/go-controller/pkg/clustermanager/node/node_allocator.go b/go-controller/pkg/clustermanager/node/node_allocator.go index e5647b1a46..63593618b2 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator.go +++ b/go-controller/pkg/clustermanager/node/node_allocator.go @@ -47,6 +47,9 @@ type NodeAllocator struct { networkID int netInfo util.NetInfo + + // nodeSubnets is a list of node subnets that are managed by the cluster subnet allocator + nodeSubnets []*net.IPNet } func NewNodeAllocator(networkID int, netInfo util.NetInfo, nodeLister listers.NodeLister, kube kube.Interface, tunnelIDAllocator id.Allocator) *NodeAllocator { @@ -123,6 +126,10 @@ func (na *NodeAllocator) hasHybridOverlayAllocation() bool { return config.HybridOverlay.Enabled && !na.netInfo.IsSecondary() && len(config.HybridOverlay.ClusterSubnets) > 0 } +func (na *NodeAllocator) hasHybridOverlayAllocationUnmanaged() bool { + return config.HybridOverlay.Enabled && !na.netInfo.IsSecondary() && len(config.HybridOverlay.ClusterSubnets) == 0 +} + func (na *NodeAllocator) recordSubnetCount() { // only for L3 networks if na.hasNodeSubnetAllocation() { @@ -230,6 +237,12 @@ func (na *NodeAllocator) HandleAddUpdateNodeEvent(node *corev1.Node) error { } return fmt.Errorf("failed to set hybrid overlay annotations for node %s: %v", node.Name, err) } + } else if na.hasHybridOverlayAllocationUnmanaged() { + // this is a hybrid overlay node but not managed by the hybrid overlay subnet allocator + // Hybrid overlay is only available for IPv4 for now, so we only need to check for IPv4 subnets + if err := na.markAllocatedNetworksForUnmanagedHONode(node); err != nil { + return fmt.Errorf("failed to mark the subnet %v as allocated in the cluster subnet allocator for node %s: %v", na.nodeSubnets, node.Name, err) + } } return nil } @@ -380,7 +393,7 @@ func (na *NodeAllocator) HandleDeleteNode(node *corev1.Node) error { return nil } - if na.hasNodeSubnetAllocation() { + if na.hasNodeSubnetAllocation() || na.hasHybridOverlayAllocationUnmanaged() { na.clusterSubnetAllocator.ReleaseAllNetworks(node.Name) na.recordSubnetUsage() } @@ -411,10 +424,17 @@ func (na *NodeAllocator) Sync(nodes []interface{}) error { klog.Errorf("Failed to parse hybrid overlay for node %s: %v", node.Name, err) } else if hostSubnet != nil { klog.V(5).Infof("Node %s contains subnets: %v", node.Name, hostSubnet) - if err := na.hybridOverlaySubnetAllocator.ReleaseNetworks(node.Name, hostSubnet); err != nil { + if err := na.hybridOverlaySubnetAllocator.MarkAllocatedNetworks(node.Name, hostSubnet); err != nil { klog.Errorf("Failed to mark the subnet %v as allocated in the hybrid subnet allocator for node %s: %v", hostSubnet, node.Name, err) } } + } else if na.hasHybridOverlayAllocationUnmanaged() { + // this is a hybrid overlay node but not managed by the hybrid overlay subnet allocator + // Hybrid overlay is only available for IPv4 for now, so we only need to check for IPv4 subnets + if err := na.markAllocatedNetworksForUnmanagedHONode(node); err != nil { + klog.Errorf("Failed to mark the subnet as allocated in the cluster subnet allocator for hybrid overlay node %s: %v", node.Name, err) + } + } } else { hostSubnets, _ := util.ParseNodeHostSubnetAnnotation(node, networkName) @@ -634,3 +654,37 @@ func (na *NodeAllocator) hasJoinSubnetAllocation() bool { // we allocate join subnets for L3/L2 primary user defined networks or default network return na.netInfo.IsDefault() || (util.IsNetworkSegmentationSupportEnabled() && na.netInfo.IsPrimaryNetwork()) } + +func (na *NodeAllocator) markAllocatedNetworksForUnmanagedHONode(node *corev1.Node) error { + hostSubnet, err := houtil.ParseHybridOverlayHostSubnet(node) + if err != nil { + return fmt.Errorf("failed to parse hybrid overlay for node %s: %v", node.Name, err) + } + + var overlaps []*net.IPNet + for _, clusterSubnet := range na.netInfo.Subnets() { + if overlaps = util.IPNetOverlaps(hostSubnet, clusterSubnet.CIDR); len(overlaps) != 0 { + break + } + } + if len(overlaps) == 0 { + // if the host subnet does not overlap with any cluster subnet, return + return nil + } + + if hostSubnet != nil { + if na.nodeSubnets == nil { + // initialize the nodeSubnets variable at the first called + na.nodeSubnets = na.clusterSubnetAllocator.ListAllIPv4Networks() + } + // check if the host subnet overlaps with any node subnet that is managed by the cluster subnet allocator + // if it does, mark it as allocated in the cluster subnet allocator + if overlaps := util.IPNetOverlaps(hostSubnet, na.nodeSubnets...); overlaps != nil { + klog.Infof("Hybrid overlay node %s overlaps with subnets: %v", node.Name, overlaps) + if err := na.clusterSubnetAllocator.MarkAllocatedNetworks(node.Name, overlaps...); err != nil { + return fmt.Errorf("failed to mark the subnet %v as allocated: %v", hostSubnet, err) + } + } + } + return nil +} diff --git a/go-controller/pkg/clustermanager/node/node_allocator_test.go b/go-controller/pkg/clustermanager/node/node_allocator_test.go index 8b1d621cf1..01064ff50b 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/node_allocator_test.go @@ -8,6 +8,11 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -45,9 +50,10 @@ func TestController_allocateNodeSubnets(t *testing.T) { existingNets []*net.IPNet alreadyOwned *existingAllocation // to be converted during the test to []*net.IPNet - wantStr []string - allocated int - wantErr bool + wantStr []string + allocated int + wantErr bool + existingNodes []*corev1.Node }{ { name: "new node, IPv4 only cluster", @@ -60,6 +66,56 @@ func TestController_allocateNodeSubnets(t *testing.T) { allocated: 1, wantErr: false, }, + { + name: "new node, IPv4 only cluster, the test node is added when a hybrid overlay node with overlapped node subnet exists", + networkRanges: []string{"172.16.0.0/16"}, + networkLens: []int{24}, + configIPv4: true, + configIPv6: false, + existingNets: nil, + wantStr: []string{"172.16.1.0/24"}, + allocated: 1, + wantErr: false, + existingNodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ho_node1", + Annotations: map[string]string{ + "k8s.ovn.org/hybrid-overlay-node-subnet": "172.16.0.0/24", + }, + Labels: map[string]string{ + "hybrid-overlay-node": "true", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + }, + { + name: "new node, IPv4 only cluster, the test node is added when a hybrid overlay node with overlapped and differernt mask length node subnet exists", + networkRanges: []string{"172.16.0.0/16"}, + networkLens: []int{24}, + configIPv4: true, + configIPv6: false, + existingNets: nil, + wantStr: []string{"172.16.2.0/24"}, + allocated: 1, + wantErr: false, + existingNodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ho_node1", + Annotations: map[string]string{ + "k8s.ovn.org/hybrid-overlay-node-subnet": "172.16.0.0/23", + }, + Labels: map[string]string{ + "hybrid-overlay-node": "true", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + }, { name: "new node, IPv6 only cluster", networkRanges: []string{"2001:db2::/56"}, @@ -208,6 +264,12 @@ func TestController_allocateNodeSubnets(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + config.HybridOverlay.Enabled = true + config.Kubernetes.NoHostSubnetNodes, _ = metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{"hybrid-overlay-node": "true"}, + }) + config.HybridOverlay.ClusterSubnets = nil + ranges, err := rangesFromStrings(tt.networkRanges, tt.networkLens) if err != nil { t.Fatal(err) @@ -231,6 +293,15 @@ func TestController_allocateNodeSubnets(t *testing.T) { if err := na.Init(); err != nil { t.Fatalf("Failed to initialize node allocator: %v", err) } + nodeInterfaces := make([]interface{}, len(tt.existingNodes)) + for i, node := range tt.existingNodes { + nodeInterfaces[i] = node + } + // Sync existing nodes before allocating subnets + err = na.Sync(nodeInterfaces) + if err != nil { + t.Fatal(err) + } if tt.alreadyOwned != nil { err := na.clusterSubnetAllocator.MarkAllocatedNetworks(tt.alreadyOwned.owner, ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) @@ -291,6 +362,7 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { na := &NodeAllocator{ netInfo: netInfo, clusterSubnetAllocator: NewSubnetAllocator(), + nodeLister: newFakeNodeLister([]*corev1.Node{}), } if err := na.Init(); err != nil { @@ -323,3 +395,11 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { t.Fatalf("Expected %d v6 allocated subnets, but got %d", v6usedBefore, v6usedAfter) } } + +func newFakeNodeLister(nodes []*corev1.Node) v1.NodeLister { + indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + for _, node := range nodes { + _ = indexer.Add(node) + } + return v1.NewNodeLister(indexer) +} diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator.go b/go-controller/pkg/clustermanager/node/subnet_allocator.go index 29019e6691..02cdca711a 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator.go @@ -23,6 +23,8 @@ type SubnetAllocator interface { AllocateNetworks(string) ([]*net.IPNet, error) AllocateIPv4Network(string) (*net.IPNet, error) AllocateIPv6Network(string) (*net.IPNet, error) + ListAllIPv4Networks() []*net.IPNet + ListAllIPv6Networks() []*net.IPNet // ReleaseNetworks releases the given networks if they are owned by the // given owner ReleaseNetworks(string, ...*net.IPNet) error @@ -192,6 +194,22 @@ func (sna *BaseSubnetAllocator) AllocateIPv6Network(owner string) (*net.IPNet, e return nil, ErrSubnetAllocatorFull } +func (sna *BaseSubnetAllocator) ListAllIPv4Networks() []*net.IPNet { + var subnets []*net.IPNet + for _, snr := range sna.v4ranges { + subnets = append(subnets, snr.listAllNetworks()...) + } + return subnets +} + +func (sna *BaseSubnetAllocator) ListAllIPv6Networks() []*net.IPNet { + var subnets []*net.IPNet + for _, snr := range sna.v6ranges { + subnets = append(subnets, snr.listAllNetworks()...) + } + return subnets +} + func (sna *BaseSubnetAllocator) ReleaseNetworks(owner string, subnets ...*net.IPNet) error { sna.Lock() defer sna.Unlock() @@ -379,48 +397,20 @@ func (snr *subnetAllocatorRange) allocateNetwork(owner string) *net.IPNet { return subnet } } - netMaskSize, addrLen := snr.network.Mask.Size() - numSubnets := uint32(1) << snr.subnetBits - if snr.subnetBits > 24 { - // We need to make sure that the uint32 math below won't overflow. If - // snr.subnetBits > 32 then numSubnets has already overflowed, but also if - // numSubnets is between 1<<24 and 1<<32 then "base << (snr.hostBits % 8)" - // below could overflow if snr.hostBits%8 is non-0. So we cap numSubnets - // at 1<<24. "16M subnets ought to be enough for anybody." - numSubnets = 1 << 24 - } - - var i uint32 - for i = 0; i < numSubnets; i++ { - n := (i + snr.next) % numSubnets - base := n - if snr.leftShift != 0 { - base = ((base << snr.leftShift) & snr.leftMask) | ((base >> snr.rightShift) & snr.rightMask) - } else if addrLen == 128 && snr.subnetBits >= 16 { - // Skip the 0 subnet (and other subnets with all 0s in the low word) - // since the extra 0 word will get compressed out and make the address - // look different from addresses on other subnets. - if (base & 0xFFFF) == 0 { - continue - } - } - genIP := append([]byte{}, []byte(snr.network.IP)...) - subnetBits := base << (snr.hostBits % 8) - b := (uint32(addrLen) - snr.hostBits - 1) / 8 - for subnetBits != 0 { - genIP[b] |= byte(subnetBits) - subnetBits >>= 8 - b-- - } - - genSubnet := &net.IPNet{IP: genIP, Mask: net.CIDRMask(int(snr.subnetBits)+netMaskSize, addrLen)} + var subnet *net.IPNet + snr.foreach(snr.next, func(n uint32, genSubnet *net.IPNet) bool { if _, ok := snr.allocMap[genSubnet.String()]; !ok { snr.allocMap[genSubnet.String()] = owner snr.next = n + 1 snr.used++ - return genSubnet + subnet = genSubnet + return false } + return true + }) + if subnet != nil { + return subnet } snr.next = 0 @@ -456,3 +446,65 @@ func (snr *subnetAllocatorRange) releaseAllNetworks(owner string) { } } } + +// generateSubnet generates a subnet for the given base number using the allocator's parameters +func (snr *subnetAllocatorRange) generateSubnet(base uint32) *net.IPNet { + netMaskSize, addrLen := snr.network.Mask.Size() + + if snr.leftShift != 0 { + base = ((base << snr.leftShift) & snr.leftMask) | ((base >> snr.rightShift) & snr.rightMask) + } else if addrLen == 128 && snr.subnetBits >= 16 { + // Skip the 0 subnet (and other subnets with all 0s in the low word) + // since the extra 0 word will get compressed out and make the address + // look different from addresses on other subnets. + if (base & 0xFFFF) == 0 { + return nil + } + } + + genIP := append([]byte{}, []byte(snr.network.IP)...) + subnetBits := base << (snr.hostBits % 8) + b := (uint32(addrLen) - snr.hostBits - 1) / 8 + for subnetBits != 0 { + genIP[b] |= byte(subnetBits) + subnetBits >>= 8 + b-- + } + + return &net.IPNet{ + IP: genIP, + Mask: net.CIDRMask(int(snr.subnetBits)+netMaskSize, addrLen), + } +} + +func (snr *subnetAllocatorRange) foreach(next uint32, do func(uint32, *net.IPNet) bool) { + numSubnets := uint32(1) << snr.subnetBits + if snr.subnetBits > 24 { + // We need to make sure that the uint32 math below won't overflow. If + // snr.subnetBits > 32 then numSubnets has already overflowed, but also if + // numSubnets is between 1<<24 and 1<<32 then "base << (snr.hostBits % 8)" + // below could overflow if snr.hostBits%8 is non-0. So we cap numSubnets + // at 1<<24. "16M subnets ought to be enough for anybody." + numSubnets = 1 << 24 + } + + var i uint32 + for i = 0; i < numSubnets; i++ { + n := (i + next) % numSubnets + base := n + genSubnet := snr.generateSubnet(base) + if genSubnet != nil && !do(n, genSubnet) { + return + } + } +} + +// listAllNetworks returns all networks in the range +func (snr *subnetAllocatorRange) listAllNetworks() []*net.IPNet { + var subnets []*net.IPNet + snr.foreach(0, func(_ uint32, genSubnet *net.IPNet) bool { + subnets = append(subnets, genSubnet) + return true + }) + return subnets +} diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator_test.go b/go-controller/pkg/clustermanager/node/subnet_allocator_test.go index 390c927260..af7605e4f0 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator_test.go @@ -629,3 +629,47 @@ func TestAllocateSubnetSameOwner(t *testing.T) { } } } + +func TestListAllNetworks(t *testing.T) { + expectV4Subnets := []string{ + "10.1.0.0/18", + "10.1.64.0/18", + "10.1.128.0/18", + "10.1.192.0/18", + } + expectV6Subnets := []string{ + "fd01::/64", + "fd01:0:0:1::/64", + "fd01:0:0:2::/64", + "fd01:0:0:3::/64", + } + + sna, err := newSubnetAllocator("10.1.0.0/16", 18) + if err != nil { + t.Fatal("Failed to initialize subnet allocator: ", err) + } + err = sna.AddNetworkRange(ovntest.MustParseIPNet("fd01::/62"), 64) + if err != nil { + t.Fatal("Failed to add network range: ", err) + } + + v4Subnets := sna.ListAllIPv4Networks() + if len(v4Subnets) != len(expectV4Subnets) { + t.Fatalf("Expected %d subnets, got %d", len(expectV4Subnets), len(v4Subnets)) + } + for i, sn := range v4Subnets { + if sn.String() != expectV4Subnets[i] { + t.Fatalf("Expected %s, got %s", expectV4Subnets[i], sn.String()) + } + } + + v6Subnets := sna.ListAllIPv6Networks() + if len(v6Subnets) != len(expectV6Subnets) { + t.Fatalf("Expected %d subnets, got %d", len(expectV6Subnets), len(v6Subnets)) + } + for i, sn := range v6Subnets { + if sn.String() != expectV6Subnets[i] { + t.Fatalf("Expected %s, got %s", expectV6Subnets[i], sn.String()) + } + } +} diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 49b3a11a40..e1819a2af3 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -284,6 +284,17 @@ func ContainsCIDR(ipnet1, ipnet2 *net.IPNet) bool { return mask1 <= mask2 && ipnet1.Contains(ipnet2.IP) } +// IPNetOverlaps returns ipnets that overlap with the ref +func IPNetOverlaps(ref *net.IPNet, ipnets ...*net.IPNet) []*net.IPNet { + var overlaps []*net.IPNet + for _, ipnet := range ipnets { + if ref.Contains(ipnet.IP) || ipnet.Contains(ref.IP) { + overlaps = append(overlaps, ipnet) + } + } + return overlaps +} + // ParseIPNets parses the provided string formatted CIDRs func ParseIPNets(strs []string) ([]*net.IPNet, error) { ipnets := make([]*net.IPNet, len(strs))