diff --git a/go-controller/pkg/clustermanager/node/node_allocator.go b/go-controller/pkg/clustermanager/node/node_allocator.go index e5647b1a46..63593618b2 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator.go +++ b/go-controller/pkg/clustermanager/node/node_allocator.go @@ -47,6 +47,9 @@ type NodeAllocator struct { networkID int netInfo util.NetInfo + + // nodeSubnets is a list of node subnets that are managed by the cluster subnet allocator + nodeSubnets []*net.IPNet } func NewNodeAllocator(networkID int, netInfo util.NetInfo, nodeLister listers.NodeLister, kube kube.Interface, tunnelIDAllocator id.Allocator) *NodeAllocator { @@ -123,6 +126,10 @@ func (na *NodeAllocator) hasHybridOverlayAllocation() bool { return config.HybridOverlay.Enabled && !na.netInfo.IsSecondary() && len(config.HybridOverlay.ClusterSubnets) > 0 } +func (na *NodeAllocator) hasHybridOverlayAllocationUnmanaged() bool { + return config.HybridOverlay.Enabled && !na.netInfo.IsSecondary() && len(config.HybridOverlay.ClusterSubnets) == 0 +} + func (na *NodeAllocator) recordSubnetCount() { // only for L3 networks if na.hasNodeSubnetAllocation() { @@ -230,6 +237,12 @@ func (na *NodeAllocator) HandleAddUpdateNodeEvent(node *corev1.Node) error { } return fmt.Errorf("failed to set hybrid overlay annotations for node %s: %v", node.Name, err) } + } else if na.hasHybridOverlayAllocationUnmanaged() { + // this is a hybrid overlay node but not managed by the hybrid overlay subnet allocator + // Hybrid overlay is only available for IPv4 for now, so we only need to check for IPv4 subnets + if err := na.markAllocatedNetworksForUnmanagedHONode(node); err != nil { + return fmt.Errorf("failed to mark the subnet %v as allocated in the cluster subnet allocator for node %s: %v", na.nodeSubnets, node.Name, err) + } } return nil } @@ -380,7 +393,7 @@ func (na *NodeAllocator) HandleDeleteNode(node *corev1.Node) error { return nil } - if na.hasNodeSubnetAllocation() { + if na.hasNodeSubnetAllocation() || na.hasHybridOverlayAllocationUnmanaged() { na.clusterSubnetAllocator.ReleaseAllNetworks(node.Name) na.recordSubnetUsage() } @@ -411,10 +424,17 @@ func (na *NodeAllocator) Sync(nodes []interface{}) error { klog.Errorf("Failed to parse hybrid overlay for node %s: %v", node.Name, err) } else if hostSubnet != nil { klog.V(5).Infof("Node %s contains subnets: %v", node.Name, hostSubnet) - if err := na.hybridOverlaySubnetAllocator.ReleaseNetworks(node.Name, hostSubnet); err != nil { + if err := na.hybridOverlaySubnetAllocator.MarkAllocatedNetworks(node.Name, hostSubnet); err != nil { klog.Errorf("Failed to mark the subnet %v as allocated in the hybrid subnet allocator for node %s: %v", hostSubnet, node.Name, err) } } + } else if na.hasHybridOverlayAllocationUnmanaged() { + // this is a hybrid overlay node but not managed by the hybrid overlay subnet allocator + // Hybrid overlay is only available for IPv4 for now, so we only need to check for IPv4 subnets + if err := na.markAllocatedNetworksForUnmanagedHONode(node); err != nil { + klog.Errorf("Failed to mark the subnet as allocated in the cluster subnet allocator for hybrid overlay node %s: %v", node.Name, err) + } + } } else { hostSubnets, _ := util.ParseNodeHostSubnetAnnotation(node, networkName) @@ -634,3 +654,37 @@ func (na *NodeAllocator) hasJoinSubnetAllocation() bool { // we allocate join subnets for L3/L2 primary user defined networks or default network return na.netInfo.IsDefault() || (util.IsNetworkSegmentationSupportEnabled() && na.netInfo.IsPrimaryNetwork()) } + +func (na *NodeAllocator) markAllocatedNetworksForUnmanagedHONode(node *corev1.Node) error { + hostSubnet, err := houtil.ParseHybridOverlayHostSubnet(node) + if err != nil { + return fmt.Errorf("failed to parse hybrid overlay for node %s: %v", node.Name, err) + } + + var overlaps []*net.IPNet + for _, clusterSubnet := range na.netInfo.Subnets() { + if overlaps = util.IPNetOverlaps(hostSubnet, clusterSubnet.CIDR); len(overlaps) != 0 { + break + } + } + if len(overlaps) == 0 { + // if the host subnet does not overlap with any cluster subnet, return + return nil + } + + if hostSubnet != nil { + if na.nodeSubnets == nil { + // initialize the nodeSubnets variable at the first called + na.nodeSubnets = na.clusterSubnetAllocator.ListAllIPv4Networks() + } + // check if the host subnet overlaps with any node subnet that is managed by the cluster subnet allocator + // if it does, mark it as allocated in the cluster subnet allocator + if overlaps := util.IPNetOverlaps(hostSubnet, na.nodeSubnets...); overlaps != nil { + klog.Infof("Hybrid overlay node %s overlaps with subnets: %v", node.Name, overlaps) + if err := na.clusterSubnetAllocator.MarkAllocatedNetworks(node.Name, overlaps...); err != nil { + return fmt.Errorf("failed to mark the subnet %v as allocated: %v", hostSubnet, err) + } + } + } + return nil +} diff --git a/go-controller/pkg/clustermanager/node/node_allocator_test.go b/go-controller/pkg/clustermanager/node/node_allocator_test.go index 8b1d621cf1..01064ff50b 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/node_allocator_test.go @@ -8,6 +8,11 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -45,9 +50,10 @@ func TestController_allocateNodeSubnets(t *testing.T) { existingNets []*net.IPNet alreadyOwned *existingAllocation // to be converted during the test to []*net.IPNet - wantStr []string - allocated int - wantErr bool + wantStr []string + allocated int + wantErr bool + existingNodes []*corev1.Node }{ { name: "new node, IPv4 only cluster", @@ -60,6 +66,56 @@ func TestController_allocateNodeSubnets(t *testing.T) { allocated: 1, wantErr: false, }, + { + name: "new node, IPv4 only cluster, the test node is added when a hybrid overlay node with overlapped node subnet exists", + networkRanges: []string{"172.16.0.0/16"}, + networkLens: []int{24}, + configIPv4: true, + configIPv6: false, + existingNets: nil, + wantStr: []string{"172.16.1.0/24"}, + allocated: 1, + wantErr: false, + existingNodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ho_node1", + Annotations: map[string]string{ + "k8s.ovn.org/hybrid-overlay-node-subnet": "172.16.0.0/24", + }, + Labels: map[string]string{ + "hybrid-overlay-node": "true", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + }, + { + name: "new node, IPv4 only cluster, the test node is added when a hybrid overlay node with overlapped and differernt mask length node subnet exists", + networkRanges: []string{"172.16.0.0/16"}, + networkLens: []int{24}, + configIPv4: true, + configIPv6: false, + existingNets: nil, + wantStr: []string{"172.16.2.0/24"}, + allocated: 1, + wantErr: false, + existingNodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ho_node1", + Annotations: map[string]string{ + "k8s.ovn.org/hybrid-overlay-node-subnet": "172.16.0.0/23", + }, + Labels: map[string]string{ + "hybrid-overlay-node": "true", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + }, { name: "new node, IPv6 only cluster", networkRanges: []string{"2001:db2::/56"}, @@ -208,6 +264,12 @@ func TestController_allocateNodeSubnets(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + config.HybridOverlay.Enabled = true + config.Kubernetes.NoHostSubnetNodes, _ = metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{"hybrid-overlay-node": "true"}, + }) + config.HybridOverlay.ClusterSubnets = nil + ranges, err := rangesFromStrings(tt.networkRanges, tt.networkLens) if err != nil { t.Fatal(err) @@ -231,6 +293,15 @@ func TestController_allocateNodeSubnets(t *testing.T) { if err := na.Init(); err != nil { t.Fatalf("Failed to initialize node allocator: %v", err) } + nodeInterfaces := make([]interface{}, len(tt.existingNodes)) + for i, node := range tt.existingNodes { + nodeInterfaces[i] = node + } + // Sync existing nodes before allocating subnets + err = na.Sync(nodeInterfaces) + if err != nil { + t.Fatal(err) + } if tt.alreadyOwned != nil { err := na.clusterSubnetAllocator.MarkAllocatedNetworks(tt.alreadyOwned.owner, ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) @@ -291,6 +362,7 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { na := &NodeAllocator{ netInfo: netInfo, clusterSubnetAllocator: NewSubnetAllocator(), + nodeLister: newFakeNodeLister([]*corev1.Node{}), } if err := na.Init(); err != nil { @@ -323,3 +395,11 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { t.Fatalf("Expected %d v6 allocated subnets, but got %d", v6usedBefore, v6usedAfter) } } + +func newFakeNodeLister(nodes []*corev1.Node) v1.NodeLister { + indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + for _, node := range nodes { + _ = indexer.Add(node) + } + return v1.NewNodeLister(indexer) +} diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator.go b/go-controller/pkg/clustermanager/node/subnet_allocator.go index 29019e6691..02cdca711a 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator.go @@ -23,6 +23,8 @@ type SubnetAllocator interface { AllocateNetworks(string) ([]*net.IPNet, error) AllocateIPv4Network(string) (*net.IPNet, error) AllocateIPv6Network(string) (*net.IPNet, error) + ListAllIPv4Networks() []*net.IPNet + ListAllIPv6Networks() []*net.IPNet // ReleaseNetworks releases the given networks if they are owned by the // given owner ReleaseNetworks(string, ...*net.IPNet) error @@ -192,6 +194,22 @@ func (sna *BaseSubnetAllocator) AllocateIPv6Network(owner string) (*net.IPNet, e return nil, ErrSubnetAllocatorFull } +func (sna *BaseSubnetAllocator) ListAllIPv4Networks() []*net.IPNet { + var subnets []*net.IPNet + for _, snr := range sna.v4ranges { + subnets = append(subnets, snr.listAllNetworks()...) + } + return subnets +} + +func (sna *BaseSubnetAllocator) ListAllIPv6Networks() []*net.IPNet { + var subnets []*net.IPNet + for _, snr := range sna.v6ranges { + subnets = append(subnets, snr.listAllNetworks()...) + } + return subnets +} + func (sna *BaseSubnetAllocator) ReleaseNetworks(owner string, subnets ...*net.IPNet) error { sna.Lock() defer sna.Unlock() @@ -379,48 +397,20 @@ func (snr *subnetAllocatorRange) allocateNetwork(owner string) *net.IPNet { return subnet } } - netMaskSize, addrLen := snr.network.Mask.Size() - numSubnets := uint32(1) << snr.subnetBits - if snr.subnetBits > 24 { - // We need to make sure that the uint32 math below won't overflow. If - // snr.subnetBits > 32 then numSubnets has already overflowed, but also if - // numSubnets is between 1<<24 and 1<<32 then "base << (snr.hostBits % 8)" - // below could overflow if snr.hostBits%8 is non-0. So we cap numSubnets - // at 1<<24. "16M subnets ought to be enough for anybody." - numSubnets = 1 << 24 - } - - var i uint32 - for i = 0; i < numSubnets; i++ { - n := (i + snr.next) % numSubnets - base := n - if snr.leftShift != 0 { - base = ((base << snr.leftShift) & snr.leftMask) | ((base >> snr.rightShift) & snr.rightMask) - } else if addrLen == 128 && snr.subnetBits >= 16 { - // Skip the 0 subnet (and other subnets with all 0s in the low word) - // since the extra 0 word will get compressed out and make the address - // look different from addresses on other subnets. - if (base & 0xFFFF) == 0 { - continue - } - } - genIP := append([]byte{}, []byte(snr.network.IP)...) - subnetBits := base << (snr.hostBits % 8) - b := (uint32(addrLen) - snr.hostBits - 1) / 8 - for subnetBits != 0 { - genIP[b] |= byte(subnetBits) - subnetBits >>= 8 - b-- - } - - genSubnet := &net.IPNet{IP: genIP, Mask: net.CIDRMask(int(snr.subnetBits)+netMaskSize, addrLen)} + var subnet *net.IPNet + snr.foreach(snr.next, func(n uint32, genSubnet *net.IPNet) bool { if _, ok := snr.allocMap[genSubnet.String()]; !ok { snr.allocMap[genSubnet.String()] = owner snr.next = n + 1 snr.used++ - return genSubnet + subnet = genSubnet + return false } + return true + }) + if subnet != nil { + return subnet } snr.next = 0 @@ -456,3 +446,65 @@ func (snr *subnetAllocatorRange) releaseAllNetworks(owner string) { } } } + +// generateSubnet generates a subnet for the given base number using the allocator's parameters +func (snr *subnetAllocatorRange) generateSubnet(base uint32) *net.IPNet { + netMaskSize, addrLen := snr.network.Mask.Size() + + if snr.leftShift != 0 { + base = ((base << snr.leftShift) & snr.leftMask) | ((base >> snr.rightShift) & snr.rightMask) + } else if addrLen == 128 && snr.subnetBits >= 16 { + // Skip the 0 subnet (and other subnets with all 0s in the low word) + // since the extra 0 word will get compressed out and make the address + // look different from addresses on other subnets. + if (base & 0xFFFF) == 0 { + return nil + } + } + + genIP := append([]byte{}, []byte(snr.network.IP)...) + subnetBits := base << (snr.hostBits % 8) + b := (uint32(addrLen) - snr.hostBits - 1) / 8 + for subnetBits != 0 { + genIP[b] |= byte(subnetBits) + subnetBits >>= 8 + b-- + } + + return &net.IPNet{ + IP: genIP, + Mask: net.CIDRMask(int(snr.subnetBits)+netMaskSize, addrLen), + } +} + +func (snr *subnetAllocatorRange) foreach(next uint32, do func(uint32, *net.IPNet) bool) { + numSubnets := uint32(1) << snr.subnetBits + if snr.subnetBits > 24 { + // We need to make sure that the uint32 math below won't overflow. If + // snr.subnetBits > 32 then numSubnets has already overflowed, but also if + // numSubnets is between 1<<24 and 1<<32 then "base << (snr.hostBits % 8)" + // below could overflow if snr.hostBits%8 is non-0. So we cap numSubnets + // at 1<<24. "16M subnets ought to be enough for anybody." + numSubnets = 1 << 24 + } + + var i uint32 + for i = 0; i < numSubnets; i++ { + n := (i + next) % numSubnets + base := n + genSubnet := snr.generateSubnet(base) + if genSubnet != nil && !do(n, genSubnet) { + return + } + } +} + +// listAllNetworks returns all networks in the range +func (snr *subnetAllocatorRange) listAllNetworks() []*net.IPNet { + var subnets []*net.IPNet + snr.foreach(0, func(_ uint32, genSubnet *net.IPNet) bool { + subnets = append(subnets, genSubnet) + return true + }) + return subnets +} diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator_test.go b/go-controller/pkg/clustermanager/node/subnet_allocator_test.go index 390c927260..af7605e4f0 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator_test.go @@ -629,3 +629,47 @@ func TestAllocateSubnetSameOwner(t *testing.T) { } } } + +func TestListAllNetworks(t *testing.T) { + expectV4Subnets := []string{ + "10.1.0.0/18", + "10.1.64.0/18", + "10.1.128.0/18", + "10.1.192.0/18", + } + expectV6Subnets := []string{ + "fd01::/64", + "fd01:0:0:1::/64", + "fd01:0:0:2::/64", + "fd01:0:0:3::/64", + } + + sna, err := newSubnetAllocator("10.1.0.0/16", 18) + if err != nil { + t.Fatal("Failed to initialize subnet allocator: ", err) + } + err = sna.AddNetworkRange(ovntest.MustParseIPNet("fd01::/62"), 64) + if err != nil { + t.Fatal("Failed to add network range: ", err) + } + + v4Subnets := sna.ListAllIPv4Networks() + if len(v4Subnets) != len(expectV4Subnets) { + t.Fatalf("Expected %d subnets, got %d", len(expectV4Subnets), len(v4Subnets)) + } + for i, sn := range v4Subnets { + if sn.String() != expectV4Subnets[i] { + t.Fatalf("Expected %s, got %s", expectV4Subnets[i], sn.String()) + } + } + + v6Subnets := sna.ListAllIPv6Networks() + if len(v6Subnets) != len(expectV6Subnets) { + t.Fatalf("Expected %d subnets, got %d", len(expectV6Subnets), len(v6Subnets)) + } + for i, sn := range v6Subnets { + if sn.String() != expectV6Subnets[i] { + t.Fatalf("Expected %s, got %s", expectV6Subnets[i], sn.String()) + } + } +} diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 49b3a11a40..e1819a2af3 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -284,6 +284,17 @@ func ContainsCIDR(ipnet1, ipnet2 *net.IPNet) bool { return mask1 <= mask2 && ipnet1.Contains(ipnet2.IP) } +// IPNetOverlaps returns ipnets that overlap with the ref +func IPNetOverlaps(ref *net.IPNet, ipnets ...*net.IPNet) []*net.IPNet { + var overlaps []*net.IPNet + for _, ipnet := range ipnets { + if ref.Contains(ipnet.IP) || ipnet.Contains(ref.IP) { + overlaps = append(overlaps, ipnet) + } + } + return overlaps +} + // ParseIPNets parses the provided string formatted CIDRs func ParseIPNets(strs []string) ([]*net.IPNet, error) { ipnets := make([]*net.IPNet, len(strs))