diff --git a/go-controller/pkg/allocator/mac/mac_suite_test.go b/go-controller/pkg/allocator/mac/mac_suite_test.go new file mode 100644 index 0000000000..da52f4ae17 --- /dev/null +++ b/go-controller/pkg/allocator/mac/mac_suite_test.go @@ -0,0 +1,13 @@ +package mac_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMAC(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "MAC Suite") +} diff --git a/go-controller/pkg/allocator/mac/reservation.go b/go-controller/pkg/allocator/mac/reservation.go new file mode 100644 index 0000000000..b3cbc3777a --- /dev/null +++ b/go-controller/pkg/allocator/mac/reservation.go @@ -0,0 +1,74 @@ +package mac + +import ( + "errors" + "net" + "sync" +) + +type Register interface { + Reserve(owner string, mac net.HardwareAddr) error + Release(owner string, mac net.HardwareAddr) error +} + +// ReservationManager tracks reserved MAC addresses requests of pods and detect MAC conflicts, +// where one pod request static MAC address that is used by another pod. +type ReservationManager struct { + // lock for storing a MAC reservation. + lock sync.Mutex + // store for reserved MAC address request by owner. Key is MAC address, value is owner identifier. + store map[string]string +} + +// NewManager creates a new ReservationManager. +func NewManager() *ReservationManager { + return &ReservationManager{ + store: make(map[string]string), + } +} + +var ErrReserveMACConflict = errors.New("MAC address already in use") +var ErrMACReserved = errors.New("MAC address already reserved for the given owner") + +// Reserve stores the address reservation and its owner. +// Returns an error ErrReserveMACConflict when "mac" is already reserved by different owner. +// Returns an error ErrMACReserved when "mac" is already reserved by the given owner. +func (n *ReservationManager) Reserve(owner string, mac net.HardwareAddr) error { + n.lock.Lock() + defer n.lock.Unlock() + + macKey := mac.String() + currentOwner, macReserved := n.store[macKey] + if macReserved && currentOwner != owner { + return ErrReserveMACConflict + } + if macReserved { + return ErrMACReserved + } + + n.store[macKey] = owner + + return nil +} + +var ErrReleaseMismatchOwner = errors.New("MAC reserved for different owner") + +// Release MAC address from store of the given owner. +// Return an error ErrReleaseMismatchOwner when "mac" reserved for different owner than the given one. +func (n *ReservationManager) Release(owner string, mac net.HardwareAddr) error { + n.lock.Lock() + defer n.lock.Unlock() + + macKey := mac.String() + currentOwner, macReserved := n.store[macKey] + if !macReserved { + return nil + } + if currentOwner != owner { + return ErrReleaseMismatchOwner + } + + delete(n.store, macKey) + + return nil +} diff --git a/go-controller/pkg/allocator/mac/reservation_test.go b/go-controller/pkg/allocator/mac/reservation_test.go new file mode 100644 index 0000000000..a463948c42 --- /dev/null +++ b/go-controller/pkg/allocator/mac/reservation_test.go @@ -0,0 +1,73 @@ +package mac_test + +import ( + "fmt" + "net" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ReservationManager", func() { + const owner1 = "namespace1/pod1" + const owner2 = "namespace2/pod2" + + var testMgr *mac.ReservationManager + var mac1 net.HardwareAddr + + BeforeEach(func() { + var err error + mac1, err = net.ParseMAC("aa:bb:cc:dd:ee:f1") + Expect(err).NotTo(HaveOccurred()) + + testMgr = mac.NewManager() + }) + + Context("reserve", func() { + It("should fail on repeated reservation for the same owner", func() { + Expect(testMgr.Reserve(owner1, mac1)).To(Succeed()) + Expect(testMgr.Reserve(owner1, mac1)).To(MatchError(mac.ErrMACReserved)) + }) + + It("should fail reserve existing MAC for different owner", func() { + Expect(testMgr.Reserve(owner1, mac1)).To(Succeed()) + Expect(testMgr.Reserve(owner2, mac1)).To(MatchError(mac.ErrReserveMACConflict), + "different owner should raise a conflict") + }) + + It("should succeed", func() { + for i := 0; i < 5; i++ { + owner := fmt.Sprintf("ns%d/test", i) + mac := net.HardwareAddr(fmt.Sprintf("02:02:02:02:02:0%d", i)) + Expect(testMgr.Reserve(owner, mac)).To(Succeed()) + } + }) + }) + + Context("release a reserved mac", func() { + BeforeEach(func() { + By("reserve mac1 for owner1") + Expect(testMgr.Reserve(owner1, mac1)).To(Succeed()) + }) + + It("should not release MAC given wrong owner", func() { + Expect(testMgr.Release(owner2, mac1)).To(MatchError(mac.ErrReleaseMismatchOwner)) + + Expect(testMgr.Reserve(owner2, mac1)).To(MatchError(mac.ErrReserveMACConflict), + "mac1 reserved for owner1, it should raise a conflict") + }) + + It("should succeed", func() { + Expect(testMgr.Release(owner1, mac1)).To(Succeed()) + + Expect(testMgr.Reserve(owner2, mac1)).To(Succeed(), + "reserving mac1 for different owner should not raise a conflict") + }) + }) + + It("release non reserved mac should succeed (no-op)", func() { + Expect(testMgr.Release(owner1, mac1)).To(Succeed()) + }) +}) diff --git a/go-controller/pkg/allocator/pod/macs.go b/go-controller/pkg/allocator/pod/macs.go new file mode 100644 index 0000000000..8a6ebb4653 --- /dev/null +++ b/go-controller/pkg/allocator/pod/macs.go @@ -0,0 +1,178 @@ +package pod + +import ( + "errors" + "fmt" + "net" + + kubevirtv1 "kubevirt.io/api/core/v1" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/klog/v2" + k8snet "k8s.io/utils/net" + + allocmac "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kubevirt" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// macOwner compose the owner identifier reserved for MAC addresses management. +// Returns "/" for regular pods and "/" for VMs. +func macOwner(pod *corev1.Pod) string { + // Check if this is a VM pod and persistent IPs are enabled + if vmName, ok := pod.Labels[kubevirtv1.VirtualMachineNameLabel]; ok { + return fmt.Sprintf("%s/%s", pod.Namespace, vmName) + } + + // Default to pod-based identifier + return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) +} + +// ReleasePodReservedMacAddress releases pod's reserved MAC address, if exists. +// It removes the used MAC address, from pod network annotation, and remove it from the MAC manager store. +func (allocator *PodAnnotationAllocator) ReleasePodReservedMacAddress(pod *corev1.Pod, mac net.HardwareAddr) error { + networkName := allocator.netInfo.GetNetworkName() + if allocator.macRegistry == nil { + klog.V(5).Infof("No MAC registry defined for network %q, skipping MAC address release", networkName) + return nil + } + + macOwnerID := macOwner(pod) + if vmKey := kubevirt.ExtractVMNameFromPod(pod); vmKey != nil { + allVMPodsCompleted, err := kubevirt.AllVMPodsAreCompleted(allocator.podLister, pod) + if err != nil { + return fmt.Errorf("failed checking all VM %q pods are completed: %v", vmKey, err) + } + if !allVMPodsCompleted { + klog.V(5).Infof(`Retaining MAC address %q for owner %q on network %q because its in use by another VM pod`, + mac, macOwnerID, networkName) + return nil + } + } + + if err := allocator.macRegistry.Release(macOwnerID, mac); err != nil { + if errors.Is(err, allocmac.ErrReleaseMismatchOwner) { + // the given pod is not the original MAC owner thus there is no point to retry. avoid retries by not returning an error. + klog.Errorf(`Failed to release MAC %q for owner %q on network %q, because its originally reserved for different owner`, mac, macOwnerID, networkName) + } else { + return fmt.Errorf("failed to release MAC address %q for owner %q on network %q: %v", mac, macOwnerID, networkName, err) + } + } else { + klog.V(5).Infof("Released MAC %q owned by %q on network %q", mac, macOwnerID, networkName) + } + + return nil +} + +// InitializeMACRegistry initializes MAC reservation tracker with MAC addresses in use in the network. +func (allocator *PodAnnotationAllocator) InitializeMACRegistry() error { + networkName := allocator.netInfo.GetNetworkName() + if allocator.macRegistry == nil { + klog.V(5).Infof("No MAC registry defined for network %s, skipping initialization", networkName) + return nil + } + + pods, err := allocator.fetchNetworkPods() + if err != nil { + return err + } + podMACs, err := indexMACAddrByPodPrimaryUDN(pods) + if err != nil { + return err + } + + // reserve MACs used by infra first, to prevent network disruptions to connected pods in case of conflict. + infraMACs := calculateSubnetsInfraMACAddresses(allocator.netInfo) + for owner, mac := range infraMACs { + if err := allocator.macRegistry.Reserve(owner, mac); err != nil { + return fmt.Errorf("failed to reserve infra MAC %q for owner %q on network %q: %w", + mac, owner, networkName, err) + } + klog.V(5).Infof("Reserved MAC %q on initialization, for infra %q on network %q", mac, owner, networkName) + } + for owner, mac := range podMACs { + if rerr := allocator.macRegistry.Reserve(owner, mac); rerr != nil { + return fmt.Errorf("failed to reserve pod MAC %q for owner %q on network %q: %w", + mac, owner, networkName, rerr) + } + klog.V(5).Infof("Reserved MAC %q on initialization, for pod %q on network %q", mac, owner, networkName) + } + + return nil +} + +// calculateSubnetsInfraMACAddresses return map of the network infrastructure mac addresses and owner name. +// It calculates the gateway and management ports MAC addresses from their IP address. +func calculateSubnetsInfraMACAddresses(netInfo util.NetInfo) map[string]net.HardwareAddr { + reservedMACs := map[string]net.HardwareAddr{} + for _, subnet := range netInfo.Subnets() { + if subnet.CIDR == nil { + continue + } + + gwIP := netInfo.GetNodeGatewayIP(subnet.CIDR) + gwMAC := util.IPAddrToHWAddr(gwIP.IP) + gwKey := fmt.Sprintf("gw-v%s", k8snet.IPFamilyOf(gwIP.IP)) + reservedMACs[gwKey] = gwMAC + + mgmtIP := netInfo.GetNodeManagementIP(subnet.CIDR) + mgmtMAC := util.IPAddrToHWAddr(mgmtIP.IP) + mgmtKey := fmt.Sprintf("mgmt-v%s", k8snet.IPFamilyOf(mgmtIP.IP)) + reservedMACs[mgmtKey] = mgmtMAC + } + + return reservedMACs +} + +// fetchNetworkPods fetch running pods in to the network NAD namespaces. +func (allocator *PodAnnotationAllocator) fetchNetworkPods() ([]*corev1.Pod, error) { + var netPods []*corev1.Pod + for _, ns := range allocator.netInfo.GetNADNamespaces() { + pods, err := allocator.podLister.Pods(ns).List(labels.Everything()) + if err != nil { + return nil, fmt.Errorf("failed to list pods for namespace %q: %v", ns, err) + } + for _, pod := range pods { + if pod == nil { + continue + } + if !util.PodRunning(pod) { + continue + } + // Check if pod is being deleted and has no finalizers (about to be disposed) + if util.PodTerminating(pod) && len(pod.Finalizers) == 0 { + continue + } + netPods = append(netPods, pod) + } + } + return netPods, nil +} + +// indexMACAddrByPodPrimaryUDN indexes the MAC address of the primary UDN for each pod. +// It returns a map where keys are the owner ID (composed by macOwner, e.g., "namespace/pod-name") +// and the values are the corresponding MAC addresses. +func indexMACAddrByPodPrimaryUDN(pods []*corev1.Pod) (map[string]net.HardwareAddr, error) { + indexedMACs := map[string]net.HardwareAddr{} + for _, pod := range pods { + podNetworks, err := util.UnmarshalPodAnnotationAllNetworks(pod.Annotations) + if err != nil { + return nil, fmt.Errorf(`failed to unmarshal pod-network annotation "%s/%s": %v`, pod.Namespace, pod.Name, err) + } + for _, network := range podNetworks { + if network.Role != types.NetworkRolePrimary { + // filter out default network and secondary user-defined networks + continue + } + mac, perr := net.ParseMAC(network.MAC) + if perr != nil { + return nil, fmt.Errorf(`failed to parse mac address "%s/%s": %v`, pod.Namespace, pod.Name, perr) + } + indexedMACs[macOwner(pod)] = mac + } + } + + return indexedMACs, nil +} diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index d174a5e3d9..6f84005c67 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -1,6 +1,7 @@ package pod import ( + "errors" "fmt" "net" @@ -15,6 +16,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -30,20 +32,34 @@ type PodAnnotationAllocator struct { netInfo util.NetInfo ipamClaimsReconciler persistentips.PersistentAllocations + macRegistry mac.Register } +type AllocatorOption func(*PodAnnotationAllocator) + func NewPodAnnotationAllocator( netInfo util.NetInfo, podLister listers.PodLister, kube kube.InterfaceOVN, claimsReconciler persistentips.PersistentAllocations, + opts ...AllocatorOption, ) *PodAnnotationAllocator { - return &PodAnnotationAllocator{ + p := &PodAnnotationAllocator{ podLister: podLister, kube: kube, netInfo: netInfo, ipamClaimsReconciler: claimsReconciler, } + for _, opt := range opts { + opt(p) + } + return p +} + +func WithMACRegistry(m mac.Register) AllocatorOption { + return func(p *PodAnnotationAllocator) { + p.macRegistry = m + } } // AllocatePodAnnotation allocates the PodAnnotation which includes IPs, a mac @@ -75,6 +91,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotation( pod, network, allocator.ipamClaimsReconciler, + allocator.macRegistry, reallocateIP, networkRole, ) @@ -89,6 +106,7 @@ func allocatePodAnnotation( pod *corev1.Pod, network *nadapi.NetworkSelectionElement, claimsReconciler persistentips.PersistentAllocations, + macRegistry mac.Register, reallocateIP bool, networkRole string) ( updatedPod *corev1.Pod, @@ -108,6 +126,7 @@ func allocatePodAnnotation( pod, network, claimsReconciler, + macRegistry, reallocateIP, networkRole, ) @@ -159,6 +178,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotationWithTunnelID( pod, network, allocator.ipamClaimsReconciler, + allocator.macRegistry, reallocateIP, networkRole, ) @@ -174,6 +194,7 @@ func allocatePodAnnotationWithTunnelID( pod *corev1.Pod, network *nadapi.NetworkSelectionElement, claimsReconciler persistentips.PersistentAllocations, + macRegistry mac.Register, reallocateIP bool, networkRole string) ( updatedPod *corev1.Pod, @@ -190,6 +211,7 @@ func allocatePodAnnotationWithTunnelID( pod, network, claimsReconciler, + macRegistry, reallocateIP, networkRole, ) @@ -264,6 +286,7 @@ func allocatePodAnnotationWithRollback( pod *corev1.Pod, network *nadapi.NetworkSelectionElement, claimsReconciler persistentips.PersistentAllocations, + macRegistry mac.Register, reallocateIP bool, networkRole string) ( updatedPod *corev1.Pod, @@ -276,6 +299,8 @@ func allocatePodAnnotationWithRollback( nadName = util.GetNADName(network.Namespace, network.Name) } podDesc := fmt.Sprintf("%s/%s/%s", nadName, pod.Namespace, pod.Name) + macOwnerID := macOwner(pod) + networkName := netInfo.GetNetworkName() // the IPs we allocate in this function need to be released back to the IPAM // pool if there is some error in any step past the point the IPs were @@ -283,12 +308,23 @@ func allocatePodAnnotationWithRollback( // for defer to work correctly. var releaseIPs []*net.IPNet var releaseID int + var releaseMAC net.HardwareAddr rollback = func() { if releaseID != 0 { idAllocator.ReleaseID() klog.V(5).Infof("Released ID %d", releaseID) releaseID = 0 } + + if len(releaseMAC) > 0 && macRegistry != nil { + if rerr := macRegistry.Release(macOwnerID, releaseMAC); rerr != nil { + klog.Errorf("Failed to release MAC %q on rollback, owner: %q, network: %q: %v", releaseMAC.String(), macOwnerID, networkName, rerr) + } else { + klog.V(5).Infof("Released MAC %q on rollback, owner: %q, network: %q", releaseMAC.String(), macOwnerID, networkName) + } + releaseMAC = nil + } + if len(releaseIPs) == 0 { return } @@ -308,6 +344,7 @@ func allocatePodAnnotationWithRollback( }() podAnnotation, _ = util.UnmarshalPodAnnotation(pod.Annotations, nadName) + isNetworkAllocated := podAnnotation != nil if podAnnotation == nil { podAnnotation = &util.PodAnnotation{} } @@ -391,7 +428,7 @@ func allocatePodAnnotationWithRollback( if hasIPAM { if len(tentative.IPs) > 0 { - if err = ipAllocator.AllocateIPs(tentative.IPs); err != nil && !ip.IsErrAllocated(err) { + if err = ipAllocator.AllocateIPs(tentative.IPs); err != nil && !shouldSkipAllocateIPsError(err, isNetworkAllocated, ipamClaim) { err = fmt.Errorf("failed to ensure requested or annotated IPs %v for %s: %w", util.StringSlice(tentative.IPs), podDesc, err) if !reallocateOnNonStaticIPRequest { @@ -402,7 +439,7 @@ func allocatePodAnnotationWithRollback( tentative.IPs = nil } - if err == nil && !hasIPAMClaim { // if we have persistentIPs, we should *not* release them on rollback + if err == nil && (!hasIPAMClaim || !isNetworkAllocated) { // copy the IPs that would need to be released releaseIPs = util.CopyIPNets(tentative.IPs) } @@ -435,6 +472,21 @@ func allocatePodAnnotationWithRollback( if err != nil { return } + if macRegistry != nil { + if rerr := macRegistry.Reserve(macOwnerID, tentative.MAC); rerr != nil { + // repeated requests are no-op because mac already reserved + if !errors.Is(rerr, mac.ErrMACReserved) { + // avoid leaking the network name because this error may reflect of a pod event, which is visible to non-admins. + err = fmt.Errorf("failed to reserve MAC address %q for owner %q on network attachment %q: %w", + tentative.MAC, macOwnerID, nadName, rerr) + klog.Errorf("%v, network-name: %q", err, networkName) + return + } + } else { + klog.V(5).Infof("Reserved MAC %q for owner %q on network %q nad %q", tentative.MAC, macOwnerID, networkName, nadName) + releaseMAC = tentative.MAC + } + } // handle routes & gateways err = AddRoutesGatewayIP(netInfo, node, pod, tentative, network) @@ -643,3 +695,30 @@ func AddRoutesGatewayIP( return nil } + +// shouldSkipAllocateIPsError determines whether to skip/ignore IP allocation errors +// in scenarios where IPs may already be legitimately allocated. +// Returns false if the error is not ErrAllocated or if none of the skip conditions are met. True otherwise. +func shouldSkipAllocateIPsError(err error, networkAllocated bool, ipamClaim *ipamclaimsapi.IPAMClaim) bool { + // Only skip if it's an "already allocated" error + if !ip.IsErrAllocated(err) { + return false + } + + // If PreconfiguredUDNAddressesEnabled is disabled, always skip ErrAllocated + if !util.IsPreconfiguredUDNAddressesEnabled() { + return true + } + + // Always skip ErrAllocated if network annotation already persisted on pod + if networkAllocated { + return true + } + + // For persistent IP VM/Pods, if IPAMClaim already has IPs allocated, then ip already allocated, skip ErrAllocated + if ipamClaim != nil && len(ipamClaim.Status.IPs) > 0 { + return true + } + + return false +} diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index e4642ac027..d2719bd8c1 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -18,6 +18,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/persistentips" @@ -89,6 +90,22 @@ func ipamClaimKey(namespace string, claimName string) string { return fmt.Sprintf("%s/%s", namespace, claimName) } +type macRegistryStub struct { + reserveErr error + releaseMAC net.HardwareAddr + reservedMAC net.HardwareAddr +} + +func (m *macRegistryStub) Reserve(_ string, mac net.HardwareAddr) error { + m.reservedMAC = mac + return m.reserveErr +} + +func (m *macRegistryStub) Release(_ string, mac net.HardwareAddr) error { + m.releaseMAC = mac + return nil +} + func Test_allocatePodAnnotationWithRollback(t *testing.T) { randomMac, err := util.GenerateRandMAC() if err != nil { @@ -104,6 +121,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { type args struct { ipAllocator subnet.NamedAllocator idAllocator id.NamedAllocator + macRegistry *macRegistryStub network *nadapi.NetworkSelectionElement ipamClaim *ipamclaimsapi.IPAMClaim reallocate bool @@ -125,6 +143,8 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantPodAnnotation *util.PodAnnotation wantReleasedIPs []*net.IPNet wantReleasedIPsOnRollback []*net.IPNet + wantReservedMAC net.HardwareAddr + wantReleaseMACOnRollback net.HardwareAddr wantReleaseID bool wantRelasedIDOnRollback bool wantErr bool @@ -718,6 +738,10 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { name: "IPAM persistent IPs, IP address re-use", ipam: true, persistentIPAllocation: true, + podAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.200/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.200/24")[0].IP), + }, args: args{ network: &nadapi.NetworkSelectionElement{ IPAMClaimReference: "my-ipam-claim", @@ -734,7 +758,6 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { nextIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, - wantUpdatedPod: true, wantPodAnnotation: &util.PodAnnotation{ IPs: ovntest.MustParseIPNets("192.168.0.200/24"), MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.200/24")[0].IP), @@ -880,6 +903,153 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantErr: true, wantReleaseID: true, }, + { + // Test ErrAllocated is always skipped with EnablePreconfiguredUDNAddresses disabled (legacy behavior) + name: "ErrAllocated should be skipped when EnablePreconfiguredUDNAddresses disabled", + ipam: true, + persistentIPAllocation: true, + enablePreconfiguredUDNAddresses: false, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPAMClaimReference: "my-ipam-claim", + }, + ipamClaim: &ipamclaimsapi.IPAMClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-ipam-claim", + }, + Status: ipamclaimsapi.IPAMClaimStatus{ + IPs: []string{"192.168.0.200/24"}, + }, + }, + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.200/24"), + allocateIPsError: ipam.ErrAllocated, + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.200/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.200/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: &net.IPNet{ + IP: ovntest.MustParseIP("100.65.0.0").To4(), + Mask: net.CIDRMask(16, 32), + }, + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + Role: types.NetworkRolePrimary, + }, + // With legacy behavior (feature flag disabled), IPs should NOT be tracked for rollback when hasIPAMClaim is true + role: types.NetworkRolePrimary, + }, + { + // Test ErrAllocated with EnablePreconfiguredUDNAddresses enabled and network annotation persisted - should not fail with ErrAllocated + name: "Pod with persisted annotation should skip ErrAllocated", + ipam: true, + persistentIPAllocation: true, + enablePreconfiguredUDNAddresses: true, + podAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.150/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.150/24")[0].IP), + }, + args: args{ + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + allocateIPsError: ipam.ErrAllocated, // Should be skipped because network already allocated + }, + }, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.150/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.150/24")[0].IP), + }, + // No wantUpdatedPod because annotation already exists and no changes needed + }, + { + // Test VM restart/migration case: new pod spawned with no network annotation but IPAMClaim has IPs + name: "VM restart/migration new pod with IPAMClaim IPs should skip ErrAllocated", + ipam: true, + persistentIPAllocation: true, + enablePreconfiguredUDNAddresses: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPAMClaimReference: "vm-ipam-claim", + }, + ipamClaim: &ipamclaimsapi.IPAMClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vm-ipam-claim", + }, + Status: ipamclaimsapi.IPAMClaimStatus{ + IPs: []string{"192.168.0.250/24"}, // IPAMClaim has IPs from previous pod + }, + }, + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + allocateIPsError: ipam.ErrAllocated, // Should be skipped because IPAMClaim has IPs + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.250/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.250/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: &net.IPNet{ + IP: ovntest.MustParseIP("100.65.0.0").To4(), + Mask: net.CIDRMask(16, 32), + }, + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + Role: types.NetworkRolePrimary, + }, + role: types.NetworkRolePrimary, + }, + { + // Test ErrAllocated when pod with no annotation and IPAMClaim has no IPs allocated yet - should fail on ErrAllocated + name: "New pod with IPAMClaim but no IPs yet should fail on ErrAllocated", + ipam: true, + persistentIPAllocation: true, + enablePreconfiguredUDNAddresses: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPAMClaimReference: "empty-ipam-claim", + IPRequest: []string{"192.168.0.100/24"}, // Request specific IP to trigger AllocateIPs call + }, + reallocate: false, // Don't reallocate on error + ipamClaim: &ipamclaimsapi.IPAMClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "empty-ipam-claim", + }, + Status: ipamclaimsapi.IPAMClaimStatus{ + IPs: []string{}, // No IPs allocated yet + }, + }, + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + allocateIPsError: ipam.ErrAllocated, // Should NOT be skipped, should cause failure + }, + }, + wantErr: true, // Should fail because ErrAllocated is not skipped + }, + { + // In a scenario of VM migration multiple pods using the same network configuration including the MAC address. + // When the migration destination pod is created, the pod-allocator should relax ErrMACReserved error + // to allow the migration destination pod use the same MAC as the migration source pod, for the migration to succeed. + name: "macRegistry should not release already reserved MAC on rollback", + args: args{ + network: &nadapi.NetworkSelectionElement{MacRequest: requestedMAC}, + macRegistry: &macRegistryStub{reserveErr: mac.ErrMACReserved}, + }, + wantPodAnnotation: &util.PodAnnotation{ + MAC: requestedMACParsed, + }, + wantReservedMAC: requestedMACParsed, + wantReleaseMACOnRollback: nil, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -898,6 +1068,13 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { config.OVNKubernetesFeature.EnableMultiNetwork = !tt.multiNetworkDisabled config.OVNKubernetesFeature.EnableNetworkSegmentation = true config.OVNKubernetesFeature.EnablePreconfiguredUDNAddresses = tt.enablePreconfiguredUDNAddresses + + var macRegistry mac.Register + macRegistry = mac.NewManager() + if tt.args.macRegistry != nil { + macRegistry = tt.args.macRegistry + } + config.IPv4Mode = true if tt.isSingleStackIPv6 { config.IPv4Mode = false @@ -982,6 +1159,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { pod, network, claimsReconciler, + macRegistry, tt.args.reallocate, tt.role, ) @@ -998,6 +1176,12 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { tt.args.idAllocator.(*idAllocatorStub).releasedID = false } + if tt.args.macRegistry != nil { + reservedMAC := tt.args.macRegistry.reservedMAC + g.Expect(reservedMAC).To(gomega.Equal(tt.wantReservedMAC), "Reserve MAC on error behaved unexpectedly") + tt.args.macRegistry.reservedMAC = nil + } + rollback() if tt.args.ipAllocator != nil { @@ -1010,6 +1194,12 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { g.Expect(releasedID).To(gomega.Equal(tt.wantRelasedIDOnRollback), "Release ID on rollback behaved unexpectedly") } + if tt.args.macRegistry != nil { + releaseMAC := tt.args.macRegistry.releaseMAC + g.Expect(releaseMAC).To(gomega.Equal(tt.wantReleaseMACOnRollback), "Release MAC on rollback behaved unexpectedly") + tt.args.macRegistry.releaseMAC = nil + } + if tt.wantErr { // check the expected error after we have checked above that the // rollback has behaved as expected diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index f8241aadca..88505bdc07 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -21,6 +21,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" annotationalloc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/node" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/pod" @@ -229,7 +230,8 @@ func (ncc *networkClusterController) init() error { ipamClaimsReconciler persistentips.PersistentAllocations ) - if ncc.allowPersistentIPs() { + persistentIPsEnabled := ncc.allowPersistentIPs() + if persistentIPsEnabled { ncc.retryIPAMClaims = ncc.newRetryFramework(factory.IPAMClaimsType, true) ncc.ipamClaimReconciler = persistentips.NewIPAMClaimReconciler( ncc.kube, @@ -239,11 +241,20 @@ func (ncc *networkClusterController) init() error { ipamClaimsReconciler = ncc.ipamClaimReconciler } + var podAllocOpts []annotationalloc.AllocatorOption + if util.IsPreconfiguredUDNAddressesEnabled() && + ncc.IsPrimaryNetwork() && + persistentIPsEnabled && + ncc.TopologyType() == types.Layer2Topology { + podAllocOpts = append(podAllocOpts, annotationalloc.WithMACRegistry(mac.NewManager())) + } + podAllocationAnnotator = annotationalloc.NewPodAnnotationAllocator( ncc.GetNetInfo(), ncc.watchFactory.PodCoreInformer().Lister(), ncc.kube, ipamClaimsReconciler, + podAllocOpts..., ) ncc.podAllocator = pod.NewPodAllocator( diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 5bd9cafcc1..b9f71a045f 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -19,6 +19,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" @@ -99,6 +100,11 @@ func (a *PodAllocator) Init() error { ) } + klog.Infof("Initializing network %s pod annotation allocator MAC registry", a.netInfo.GetNetworkName()) + if err := a.podAnnotationAllocator.InitializeMACRegistry(); err != nil { + return fmt.Errorf("failed to initialize MAC addresses registry: %w", err) + } + return nil } @@ -274,6 +280,7 @@ func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, network *net hasIPAMClaim = false } if hasIPAMClaim { + var err error ipamClaim, err := a.ipamClaimsReconciler.FindIPAMClaim(network.IPAMClaimReference, network.Namespace) hasIPAMClaim = ipamClaim != nil && len(ipamClaim.Status.IPs) > 0 if apierrors.IsNotFound(err) { @@ -315,6 +322,13 @@ func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, network *net klog.V(5).Infof("Released IPs %v", util.StringSlice(podAnnotation.IPs)) } + if doRelease { + if err := a.podAnnotationAllocator.ReleasePodReservedMacAddress(pod, podAnnotation.MAC); err != nil { + return fmt.Errorf(`failed to release pod "%s/%s" mac %q: %v`, + pod.Namespace, pod.Name, podAnnotation.MAC, err) + } + } + if podDeleted { a.deleteReleasedPod(nad, string(pod.UID)) } else { @@ -364,7 +378,9 @@ func (a *PodAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *ne ) if err != nil { - if errors.Is(err, ipallocator.ErrFull) { + if errors.Is(err, ipallocator.ErrFull) || + errors.Is(err, ipallocator.ErrAllocated) || + errors.Is(err, mac.ErrReserveMACConflict) { a.recordPodErrorEvent(pod, err) } return err diff --git a/go-controller/pkg/clustermanager/pod/allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go index 51bab90fc5..72eeb16061 100644 --- a/go-controller/pkg/clustermanager/pod/allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -3,6 +3,7 @@ package pod import ( "context" "encoding/json" + "errors" "fmt" "net" "sync" @@ -26,6 +27,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -43,6 +45,7 @@ type testPod struct { hostNetwork bool completed bool network *nadapi.NetworkSelectionElement + labels map[string]string } func (p testPod) getPod(t *testing.T) *corev1.Pod { @@ -53,6 +56,7 @@ func (p testPod) getPod(t *testing.T) *corev1.Pod { UID: apitypes.UID("pod"), Namespace: "namespace", Annotations: map[string]string{}, + Labels: p.labels, }, Spec: corev1.PodSpec{ HostNetwork: p.hostNetwork, @@ -169,6 +173,23 @@ func (nas *namedAllocatorStub) ReleaseIPs([]*net.IPNet) error { return nil } +type macRegistryStub struct { + reservedMAC, releasedMAC net.HardwareAddr + ownerID string + reserveErr, releaseErr error +} + +func (m *macRegistryStub) Reserve(owner string, mac net.HardwareAddr) error { + m.ownerID = owner + m.reservedMAC = mac + return m.reserveErr +} +func (m *macRegistryStub) Release(owner string, mac net.HardwareAddr) error { + m.ownerID = owner + m.releasedMAC = mac + return m.releaseErr +} + func TestPodAllocator_reconcileForNAD(t *testing.T) { type args struct { old *testPod @@ -178,20 +199,26 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { release bool } tests := []struct { - name string - args args - ipam bool - idAllocation bool - tracked bool - role string - expectAllocate bool - expectIPRelease bool - expectIDRelease bool - expectTracked bool - fullIPPool bool - expectEvents []string - expectError string - podAnnotation *util.PodAnnotation + name string + args args + ipam bool + idAllocation bool + macRegistry *macRegistryStub + tracked bool + role string + expectAllocate bool + expectIPRelease bool + expectIDRelease bool + expectMACReserve *net.HardwareAddr + expectMACRelease *net.HardwareAddr + expectMACOwnerID string + expectTracked bool + fullIPPool bool + expectEvents []string + expectError string + podAnnotation *util.PodAnnotation + newPodCopyRunning bool + podListerErr error }{ { name: "Pod not scheduled", @@ -541,6 +568,227 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { expectEvents: []string{"Warning ErrorAllocatingPod failed to update pod namespace/pod: failed to ensure requested or annotated IPs [10.1.130.0/24] for namespace/nad/namespace/pod: subnet address pool exhausted"}, expectError: "failed to update pod namespace/pod: failed to ensure requested or annotated IPs [10.1.130.0/24] for namespace/nad/namespace/pod: subnet address pool exhausted", }, + + // podAllocator's macRegistry record mac on pod creation + { + name: "macRegistry should record pod's MAC", + macRegistry: &macRegistryStub{}, + args: args{ + new: &testPod{ + scheduled: true, + // use predictable MAC address for testing. + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad", MacRequest: "0a:0a:0a:0a:0a:0a"}, + }, + }, + expectMACReserve: &net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}, + expectAllocate: true, + }, + { + name: "should fail when macRegistry fail to reserve pod's MAC", + macRegistry: &macRegistryStub{reserveErr: errors.New("test reserve failure")}, + args: args{ + new: &testPod{ + scheduled: true, + // use predictable MAC address for testing. + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad", MacRequest: "0a:0a:0a:0a:0a:0a"}, + }, + }, + expectError: `failed to update pod namespace/pod: failed to reserve MAC address "0a:0a:0a:0a:0a:0a" for owner "namespace/pod" on network attachment "namespace/nad": test reserve failure`, + }, + { + name: "should emit pod event when macRegistry fail to reserve pod's MAC due to MAC conflict", + macRegistry: &macRegistryStub{reserveErr: mac.ErrReserveMACConflict}, + args: args{ + new: &testPod{ + scheduled: true, + // use predictable MAC address for testing. + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad", MacRequest: "0a:0a:0a:0a:0a:0a"}, + }, + }, + expectError: `failed to update pod namespace/pod: failed to reserve MAC address "0a:0a:0a:0a:0a:0a" for owner "namespace/pod" on network attachment "namespace/nad": MAC address already in use`, + expectEvents: []string{`Warning ErrorAllocatingPod failed to update pod namespace/pod: failed to reserve MAC address "0a:0a:0a:0a:0a:0a" for owner "namespace/pod" on network attachment "namespace/nad": MAC address already in use`}, + }, + { + name: "should NOT fail when macRegistry gets repeated reserve requests (same mac and owner)", + macRegistry: &macRegistryStub{reserveErr: mac.ErrMACReserved}, + args: args{ + new: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + }, + }, + expectAllocate: true, + }, + // podAllocator's macRegistry remove mac record on pod complete/deleted + { + name: "Pod completed, macRegistry should release pod's MAC", + ipam: true, + macRegistry: &macRegistryStub{}, + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + }, + }, + expectMACRelease: &net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}, + expectIPRelease: true, + expectTracked: true, + }, + { + name: "Pod completed, has VM label, macRegistry should release pod's MAC", + ipam: true, + macRegistry: &macRegistryStub{}, + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + labels: map[string]string{"vm.kubevirt.io/name": "myvm"}, + }, + }, + expectMACRelease: &net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}, + expectIPRelease: true, + expectTracked: true, + }, + { + name: "Pod completed, should fail when macRegistry fail to release pod MAC", + ipam: true, + macRegistry: &macRegistryStub{releaseErr: errors.New("test release failure")}, + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + // use predictable MAC address for testing. + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad", MacRequest: "0a:0a:0a:0a:0a:0a"}, + }, + }, + expectError: `failed to release pod "namespace/pod" mac "0a:0a:0a:0a:0a:0a": failed to release MAC address "0a:0a:0a:0a:0a:0a" for owner "namespace/pod" on network "": test release failure`, + expectIPRelease: true, + }, + { + // In a scenario of VM migration, migration destination and source pods use the same network configuration, + // including MAC address. The MAC address should not be released as long there is at least one VM pod running. + name: "Pod completed, has VM label, macRegistry should NOT release MAC when not all associated VM pods are in completed state", + ipam: true, + macRegistry: &macRegistryStub{}, + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + labels: map[string]string{"vm.kubevirt.io/name": ""}, + }, + }, + newPodCopyRunning: true, + expectTracked: true, + expectIPRelease: true, + }, + { + name: "Pod completed, has VM label, macRegistry should fail when checking associated VM pods are in complete state", + ipam: true, + macRegistry: &macRegistryStub{}, + podListerErr: errors.New("test error"), + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + labels: map[string]string{"vm.kubevirt.io/name": "myvm"}, + }, + }, + expectError: `failed to release pod "namespace/pod" mac "0a:0a:0a:0a:0a:0a": failed checking all VM "namespace/myvm" pods are completed: failed finding related pods for pod namespace/pod when checking if they are completed: test error`, + expectIPRelease: true, + }, + { + name: "Pod completed, should NOT fail when macRegistry fail to release pod's MAC due to miss-match owner error", + ipam: true, + macRegistry: &macRegistryStub{releaseErr: mac.ErrReleaseMismatchOwner}, + podAnnotation: &util.PodAnnotation{MAC: net.HardwareAddr{0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a}}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + }, + }, + expectIPRelease: true, + expectTracked: true, + }, + // podAllocator compose MAC owner IDs as expected + { + name: "should compose MAC owner ID from pod.namespace and pod.name", + macRegistry: &macRegistryStub{}, + args: args{ + new: &testPod{ + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + scheduled: true, + }, + }, + expectMACOwnerID: "namespace/pod", + expectAllocate: true, + }, + { + name: "Pod completed, should compose MAC owner ID from pod.namespace and pod.name", + ipam: true, + macRegistry: &macRegistryStub{}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + }, + }, + expectMACOwnerID: "namespace/pod", + expectTracked: true, + expectIPRelease: true, + }, + { + // In a scenario of VM migration, migration destination and source pods use the same network configuration, + // including MAC address. Given VM pods, composing the owner ID from the VM name relaxes MAC conflict errors, + // when VM is migrated (where migration source and destination pods share the same MAC). + name: "Given pod with VM label, should compose MAC owner ID from pod.namespace and VM label", + expectMACOwnerID: "namespace/myvm", + macRegistry: &macRegistryStub{}, + args: args{ + new: &testPod{ + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + scheduled: true, + labels: map[string]string{"vm.kubevirt.io/name": "myvm"}, + }, + }, + expectAllocate: true, + }, + { + // In a scenario of VM migration, migration destination and source pods use the same network configuration, + // including MAC address. Given VM pods, composing the owner ID from the VM name relaxes MAC conflict errors, + // when VM is migrated (where migration source and destination pods share the same MAC). + name: "Pod completed, has VM label, should compose MAC owner ID from pod.namespace and VM label", + ipam: true, + macRegistry: &macRegistryStub{}, + args: args{ + release: true, + new: &testPod{ + scheduled: true, completed: true, + network: &nadapi.NetworkSelectionElement{Namespace: "namespace", Name: "nad"}, + labels: map[string]string{"vm.kubevirt.io/name": "myvm"}, + }, + }, + expectMACOwnerID: "namespace/myvm", + expectTracked: true, + expectIPRelease: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -553,6 +801,11 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { kubeMock := &kubemocks.InterfaceOVN{} podNamespaceLister := &v1mocks.PodNamespaceLister{} + if tt.podListerErr != nil { + podNamespaceLister.On("List", mock.AnythingOfType("labels.internalSelector")). + Return(nil, tt.podListerErr).Once() + } + podListerMock.On("Pods", mock.AnythingOfType("string")).Return(podNamespaceLister) var allocated bool @@ -606,11 +859,16 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }) } + var opts []pod.AllocatorOption + if tt.macRegistry != nil { + opts = append(opts, pod.WithMACRegistry(tt.macRegistry)) + } podAnnotationAllocator := pod.NewPodAnnotationAllocator( netInfo, podListerMock, kubeMock, ipamClaimsReconciler, + opts..., ) testNs := "namespace" @@ -653,6 +911,18 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { if tt.args.new != nil { new = tt.args.new.getPod(t) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(new, nil) + + pods := []*corev1.Pod{new} + if tt.newPodCopyRunning { + cp := new.DeepCopy() + cp.Status.Phase = corev1.PodRunning + cp.UID = "copy" + pods = append(pods, cp) + } + if tt.podListerErr == nil { + podNamespaceLister.On("List", mock.AnythingOfType("labels.internalSelector")). + Return(pods, nil).Once() + } } if tt.tracked { @@ -666,9 +936,17 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { } if tt.podAnnotation != nil { - new.Annotations, err = util.MarshalPodAnnotation(new.Annotations, tt.podAnnotation, "namespace/nad") - if err != nil { - t.Fatalf("failed to set pod annotations: %v", err) + if new != nil { + new.Annotations, err = util.MarshalPodAnnotation(new.Annotations, tt.podAnnotation, "namespace/nad") + if err != nil { + t.Fatalf("failed to set pod annotations: %v", err) + } + } + if old != nil { + old.Annotations, err = util.MarshalPodAnnotation(old.Annotations, tt.podAnnotation, "namespace/nad") + if err != nil { + t.Fatalf("failed to set pod annotations: %v", err) + } } } @@ -694,6 +972,15 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { if tt.expectTracked != a.releasedPods["namespace/nad"].Has("pod") { t.Errorf("expected pod tracked to be %v but it was %v", tt.expectTracked, a.releasedPods["namespace/nad"].Has("pod")) } + if tt.expectMACReserve != nil && tt.macRegistry.reservedMAC.String() != tt.expectMACReserve.String() { + t.Errorf("expected pod MAC reserved to be %v but it was %v", tt.expectMACReserve, tt.macRegistry.reservedMAC) + } + if tt.expectMACRelease != nil && tt.expectMACRelease.String() != tt.macRegistry.releasedMAC.String() { + t.Errorf("expected pod MAC released to be %v but it was %v", tt.expectMACRelease, tt.macRegistry.releasedMAC) + } + if tt.expectMACOwnerID != "" && tt.expectMACOwnerID != tt.macRegistry.ownerID { + t.Errorf("expected pod MAC owner ID to be %v but it was %v", tt.expectMACOwnerID, tt.macRegistry.ownerID) + } var obtainedEvents []string for { diff --git a/go-controller/pkg/kubevirt/pod.go b/go-controller/pkg/kubevirt/pod.go index 8cde9d713e..aa6fa5e79a 100644 --- a/go-controller/pkg/kubevirt/pod.go +++ b/go-controller/pkg/kubevirt/pod.go @@ -11,6 +11,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ktypes "k8s.io/apimachinery/pkg/types" + v1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/retry" libovsdbclient "github.com/ovn-kubernetes/libovsdb/client" @@ -53,14 +54,22 @@ func IsPodLiveMigratable(pod *corev1.Pod) bool { return ok } +// TODO: remove adapter once all findVMRelatedPods usages transition to use PodLister +type listPodsFn func(namespace string, selector metav1.LabelSelector) ([]*corev1.Pod, error) + // findVMRelatedPods will return pods belong to the same vm annotated at pod and // filter out the one at the function argument func findVMRelatedPods(client *factory.WatchFactory, pod *corev1.Pod) ([]*corev1.Pod, error) { + return findVMRelatedPodsWithListerFn(client.GetPodsBySelector, pod) +} + +func findVMRelatedPodsWithListerFn(listPodsFn listPodsFn, pod *corev1.Pod) ([]*corev1.Pod, error) { vmName, ok := pod.Labels[kubevirtv1.VirtualMachineNameLabel] if !ok { return nil, nil } - vmPods, err := client.GetPodsBySelector(pod.Namespace, metav1.LabelSelector{MatchLabels: map[string]string{kubevirtv1.VirtualMachineNameLabel: vmName}}) + vmLabelSelector := metav1.LabelSelector{MatchLabels: map[string]string{kubevirtv1.VirtualMachineNameLabel: vmName}} + vmPods, err := listPodsFn(pod.Namespace, vmLabelSelector) if err != nil { return nil, err } @@ -149,16 +158,19 @@ func EnsurePodAnnotationForVM(watchFactory *factory.WatchFactory, kube *kube.Kub } // AllVMPodsAreCompleted return true if all the vm pods are completed -func AllVMPodsAreCompleted(client *factory.WatchFactory, pod *corev1.Pod) (bool, error) { - if !IsPodLiveMigratable(pod) { - return false, nil - } - +func AllVMPodsAreCompleted(podLister v1.PodLister, pod *corev1.Pod) (bool, error) { if !util.PodCompleted(pod) { return false, nil } - vmPods, err := findVMRelatedPods(client, pod) + f := func(namespace string, selector metav1.LabelSelector) ([]*corev1.Pod, error) { + s, err := metav1.LabelSelectorAsSelector(&selector) + if err != nil { + return nil, err + } + return podLister.Pods(namespace).List(s) + } + vmPods, err := findVMRelatedPodsWithListerFn(f, pod) if err != nil { return false, fmt.Errorf("failed finding related pods for pod %s/%s when checking if they are completed: %v", pod.Namespace, pod.Name, err) } @@ -241,7 +253,7 @@ func CleanUpLiveMigratablePod(nbClient libovsdbclient.Client, watchFactory *fact return nil } - allVMPodsCompleted, err := AllVMPodsAreCompleted(watchFactory, pod) + allVMPodsCompleted, err := AllVMPodsAreCompleted(watchFactory.PodCoreInformer().Lister(), pod) if err != nil { return fmt.Errorf("failed cleaning up VM when checking if pod is leftover: %v", err) } diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 1073954bc4..ccb7686893 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -16,6 +16,7 @@ import ( "golang.org/x/exp/maps" corev1 "k8s.io/api/core/v1" + k8sapitypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" knet "k8s.io/utils/net" @@ -1487,14 +1488,19 @@ func GetPodNADToNetworkMappingWithActiveNetwork(pod *corev1.Pod, nInfo NetInfo, if len(activeNetworkNADs) < 1 { return false, nil, fmt.Errorf("missing NADs at active network %q for namespace %q", activeNetwork.GetNetworkName(), pod.Namespace) } - activeNetworkNADKey := strings.Split(activeNetworkNADs[0], "/") + + activeNADKey := getNADWithNamespace(activeNetworkNADs, pod.Namespace) + if activeNADKey == nil { + return false, nil, fmt.Errorf("no active NAD found for namespace %q", pod.Namespace) + } + if len(networkSelections) == 0 { networkSelections = map[string]*nettypes.NetworkSelectionElement{} } activeNSE := &nettypes.NetworkSelectionElement{ - Namespace: activeNetworkNADKey[0], - Name: activeNetworkNADKey[1], + Namespace: activeNADKey.Namespace, + Name: activeNADKey.Name, } // Feature gate integration: EnablePreconfiguredUDNAddresses controls default network IP/MAC transfer to active network @@ -1523,10 +1529,26 @@ func GetPodNADToNetworkMappingWithActiveNetwork(pod *corev1.Pod, nInfo NetInfo, } } - networkSelections[activeNetworkNADs[0]] = activeNSE + networkSelections[activeNADKey.String()] = activeNSE return true, networkSelections, nil } +// getNADWithNamespace returns the first occurrence of NAD key with the given namespace name. +func getNADWithNamespace(nads []string, targetNamespace string) *k8sapitypes.NamespacedName { + for _, nad := range nads { + nsName := strings.Split(nad, "/") + if len(nsName) != 2 { + continue + } + ns, name := nsName[0], nsName[1] + if ns != targetNamespace { + continue + } + return &k8sapitypes.NamespacedName{Namespace: ns, Name: name} + } + return nil +} + func IsMultiNetworkPoliciesSupportEnabled() bool { return config.OVNKubernetesFeature.EnableMultiNetwork && config.OVNKubernetesFeature.EnableMultiNetworkPolicy } diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 24746599de..81d6bd058e 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -1022,6 +1022,7 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { expectedIsAttachmentRequested bool expectedNetworkSelectionElements map[string]*nadv1.NetworkSelectionElement enablePreconfiguredUDNAddresses bool + injectPrimaryUDNNADs []string } tests := []testConfig{ @@ -1241,6 +1242,53 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { enablePreconfiguredUDNAddresses: true, expectedError: fmt.Errorf(`unexpected default NSE name "unexpected-name", expected "default"`), }, + { + desc: "should fail when no nad of the active network found on the pod namespace", + inputNamespace: "non-existent-ns", + expectedError: fmt.Errorf(`no active NAD found for namespace "non-existent-ns"`), + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + NADName: GetNADName(namespaceName, attachmentName), + Topology: ovntypes.Layer2Topology, + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + }, + { + desc: "primary l2 CUDN (replicated NADs), should return the correct active network according to pod namespace", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: "cluster_udn_l2p"}, + NADName: GetNADName("red", "l2p"), + Topology: ovntypes.Layer2Topology, + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: "cluster_udn_l2p"}, + NADName: GetNADName("red", "l2p"), + Topology: ovntypes.Layer2Topology, + Role: ovntypes.NetworkRolePrimary, + }, + injectPrimaryUDNNADs: []string{"blue/l2p", "green/l2p"}, + inputNamespace: "blue", + inputPodAnnotations: map[string]string{ + DefNetworkAnnotation: `[{"namespace": "ovn-kubernetes", "name": "default", "ips": ["192.168.0.3/24", "fda6::3/48"], "mac": "aa:bb:cc:dd:ee:ff"}]`, + }, + enablePreconfiguredUDNAddresses: true, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "blue/l2p": { + Name: "l2p", + Namespace: "blue", + IPRequest: []string{"192.168.0.3/24", "fda6::3/48"}, + MacRequest: "aa:bb:cc:dd:ee:ff", + }, + }, + }, { desc: "default-network ips and mac is is ignored for Layer3 topology", @@ -1290,7 +1338,7 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { DefNetworkAnnotation: `[{"foo}`, }, enablePreconfiguredUDNAddresses: true, - expectedError: fmt.Errorf(`failed getting default-network annotation for pod "/test-pod": %w`, fmt.Errorf(`GetK8sPodDefaultNetwork: failed to parse CRD object: parsePodNetworkAnnotation: failed to parse pod Network Attachment Selection Annotation JSON format: unexpected end of JSON input`)), + expectedError: fmt.Errorf(`failed getting default-network annotation for pod "ns1/test-pod": %w`, fmt.Errorf(`GetK8sPodDefaultNetwork: failed to parse CRD object: parsePodNetworkAnnotation: failed to parse pod Network Attachment Selection Annotation JSON format: unexpected end of JSON input`)), }, } for _, test := range tests { @@ -1323,6 +1371,9 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { if test.inputPrimaryUDNConfig.NADName != "" { mutableNetInfo := NewMutableNetInfo(primaryUDNNetInfo) mutableNetInfo.AddNADs(test.inputPrimaryUDNConfig.NADName) + if len(test.injectPrimaryUDNNADs) > 0 { + mutableNetInfo.AddNADs(test.injectPrimaryUDNNADs...) + } primaryUDNNetInfo = mutableNetInfo } } @@ -1330,10 +1381,13 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pod", - Namespace: test.inputNamespace, + Namespace: namespaceName, Annotations: test.inputPodAnnotations, }, } + if test.inputNamespace != "" { + pod.Namespace = test.inputNamespace + } isAttachmentRequested, networkSelectionElements, err := GetPodNADToNetworkMappingWithActiveNetwork( pod, diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index 507cc6d086..62a3d82f81 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -41,6 +41,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/util/retry" e2eframework "k8s.io/kubernetes/test/e2e/framework" @@ -2359,4 +2360,174 @@ chpasswd: { expire: False } ) }) + Context("duplicate addresses validation", func() { + var ( + cudn *udnv1.ClusterUserDefinedNetwork + duplicateIPv4 = "10.128.0.200" // Static IP that will be used by both VMs + duplicateIPv6 = "2010:100:200::200" + cidrIPv4 = "10.128.0.0/24" + cidrIPv6 = "2010:100:200::0/60" + ) + + BeforeEach(func() { + if !isPreConfiguredUdnAddressesEnabled() { + Skip("ENABLE_PRE_CONF_UDN_ADDR not configured") + } + + l := map[string]string{ + "e2e-framework": fr.BaseName, + RequiredUDNNamespaceLabel: "", + } + ns, err := fr.CreateNamespace(context.TODO(), fr.BaseName, l) + Expect(err).NotTo(HaveOccurred()) + fr.Namespace = ns + namespace = fr.Namespace.Name + + dualCIDRs := filterDualStackCIDRs(fr.ClientSet, []udnv1.CIDR{udnv1.CIDR(cidrIPv4), udnv1.CIDR(cidrIPv6)}) + cudn, _ = kubevirt.GenerateCUDN(namespace, "net1", udnv1.NetworkTopologyLayer2, udnv1.NetworkRolePrimary, dualCIDRs) + createCUDN(cudn) + }) + + createVMWithStaticIP := func(vmName string, staticIPs []string) *kubevirtv1.VirtualMachine { + annotations, err := kubevirt.GenerateAddressesAnnotations("net1", staticIPs) + Expect(err).NotTo(HaveOccurred()) + + vm := fedoraWithTestToolingVM( + nil, // labels + annotations, // annotations with static IP + nil, // nodeSelector + kubevirtv1.NetworkSource{ + Pod: &kubevirtv1.PodNetwork{}, + }, + `#cloud-config +password: fedora +chpasswd: { expire: False } +`, + `version: 2 +ethernets: + eth0: + dhcp4: true + dhcp6: true + ipv6-address-generation: eui64`, + ) + vm.Name = vmName + vm.Namespace = namespace + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Bridge = nil + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} + return vm + } + + waitForVMReadinessAndVerifyIPs := func(vmName string, expectedIPs []string) { + vmi := &kubevirtv1.VirtualMachineInstance{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: vmName, + }, + } + waitVirtualMachineInstanceReadiness(vmi) + Expect(crClient.Get(context.TODO(), crclient.ObjectKeyFromObject(vmi), vmi)).To(Succeed()) + + expectedNumberOfAddresses := len(filterDualStackCIDRs(fr.ClientSet, []udnv1.CIDR{udnv1.CIDR(cidrIPv4), udnv1.CIDR(cidrIPv6)})) + actualAddresses := virtualMachineAddressesFromStatus(vmi, expectedNumberOfAddresses) + Expect(actualAddresses).To(ConsistOf(expectedIPs), fmt.Sprintf("VM %s should get the requested static IPs", vmName)) + } + + waitForVMIPodDuplicateIPFailure := func(vmName string) { + Eventually(func() []corev1.Event { + podList, err := fr.ClientSet.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s=%s", kubevirtv1.VirtualMachineNameLabel, vmName), + }) + if err != nil || len(podList.Items) == 0 { + return nil + } + + events, err := fr.ClientSet.CoreV1().Events(namespace).List(context.TODO(), metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.name=%s", podList.Items[0].Name), + }) + if err != nil { + return nil + } + + return events.Items + }). + WithTimeout(60*time.Second). + WithPolling(2*time.Second). + Should(ContainElement(SatisfyAll( + HaveField("Type", Equal("Warning")), + HaveField("Message", ContainSubstring("provided IP is already allocated")), + )), fmt.Sprintf("VM %s should fail with IP allocation error", vmName)) + } + + It("should fail when creating second VM with duplicate static IP", func() { + staticIPs := filterIPs(fr.ClientSet, duplicateIPv4, duplicateIPv6) + + By("Creating first VM with static IP") + vm1 := createVMWithStaticIP("test-vm-1", staticIPs) + createVirtualMachine(vm1) + waitForVMReadinessAndVerifyIPs(vm1.Name, staticIPs) + + By("Creating second VM with duplicate static IP - should fail") + vm2 := createVMWithStaticIP("test-vm-2", staticIPs) + createVirtualMachine(vm2) + + By("Verifying pod fails with duplicate IP allocation error") + waitForVMIPodDuplicateIPFailure(vm2.Name) + + By("Verifying first VM is still running normally") + waitForVMReadinessAndVerifyIPs(vm1.Name, staticIPs) + }) + + newVMIWithPrimaryIfaceMAC := func(mac string) *kubevirtv1.VirtualMachineInstance { + vm := fedoraWithTestToolingVMI(nil, nil, nil, kubevirtv1.NetworkSource{Pod: &kubevirtv1.PodNetwork{}}, "#", "") + vm.Spec.Domain.Devices.Interfaces[0].Bridge = nil + vm.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} + vm.Spec.Domain.Devices.Interfaces[0].MacAddress = mac + return vm + } + + It("should fail when creating second VM with duplicate user requested MAC", func() { + const testMAC = "02:a1:b2:c3:d4:e5" + vmi1 := newVMIWithPrimaryIfaceMAC(testMAC) + vm1 := generateVM(vmi1) + createVirtualMachine(vm1) + + By("Asserting VM with static MAC is running as expected") + Eventually(func(g Gomega) []kubevirtv1.VirtualMachineInstanceCondition { + g.Expect(crClient.Get(context.Background(), crclient.ObjectKeyFromObject(vm1), vmi1)).To(Succeed()) + return vmi1.Status.Conditions + }).WithPolling(time.Second).WithTimeout(5 * time.Minute).Should(ContainElement(SatisfyAll( + HaveField("Type", kubevirtv1.VirtualMachineInstanceAgentConnected), + HaveField("Status", corev1.ConditionTrue), + ))) + Expect(crClient.Get(context.Background(), crclient.ObjectKeyFromObject(vm1), vmi1)).To(Succeed()) + Expect(vmi1.Status.Interfaces[0].MAC).To(Equal(testMAC), "vmi status should report the requested mac") + + By("Create second VM requesting the same MAC address") + vmi2 := newVMIWithPrimaryIfaceMAC(testMAC) + vm2 := generateVM(vmi2) + createVirtualMachine(vm2) + + By("Asserting second VM pod has attached event reflecting MAC conflict error") + vm2Selector := fmt.Sprintf("%s=%s", kubevirtv1.VirtualMachineNameLabel, vm2.Name) + Eventually(func(g Gomega) []corev1.Event { + podList, err := fr.ClientSet.CoreV1().Pods(vm2.Namespace).List(context.Background(), metav1.ListOptions{LabelSelector: vm2Selector}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(podList.Items).ToNot(BeEmpty()) + events, err := fr.ClientSet.CoreV1().Events(vm2.Namespace).SearchWithContext(context.Background(), scheme.Scheme, &podList.Items[0]) + g.Expect(err).ToNot(HaveOccurred()) + return events.Items + }).WithTimeout(time.Minute * 1).WithPolling(time.Second * 3).Should(ContainElement(SatisfyAll( + HaveField("Type", "Warning"), + HaveField("Reason", "ErrorAllocatingPod"), + HaveField("Message", ContainSubstring("MAC address already in use")), + ))) + + By("Assert second VM not running") + Expect(crClient.Get(context.Background(), crclient.ObjectKeyFromObject(vm2), vmi2)).To(Succeed()) + Expect(vmi2.Status.Conditions).To(ContainElement(SatisfyAll( + HaveField("Type", kubevirtv1.VirtualMachineInstanceReady), + HaveField("Status", corev1.ConditionFalse), + )), "second VM should not be ready due to MAC conflict") + }) + }) }) diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index bf411a3d38..39b0fbc4ac 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -199,6 +199,7 @@ type podConfiguration struct { nodeSelector map[string]string isPrivileged bool labels map[string]string + annotations map[string]string requiresExtraNamespace bool hostNetwork bool ipRequestFromSubnet string @@ -207,9 +208,21 @@ type podConfiguration struct { func generatePodSpec(config podConfiguration) *v1.Pod { podSpec := e2epod.NewAgnhostPod(config.namespace, config.name, nil, nil, nil, config.containerCmd...) + + // Merge network attachments and custom annotations + if podSpec.Annotations == nil { + podSpec.Annotations = make(map[string]string) + } if len(config.attachments) > 0 { - podSpec.Annotations = networkSelectionElements(config.attachments...) + attachmentAnnotations := networkSelectionElements(config.attachments...) + for k, v := range attachmentAnnotations { + podSpec.Annotations[k] = v + } } + for k, v := range config.annotations { + podSpec.Annotations[k] = v + } + podSpec.Spec.NodeSelector = config.nodeSelector podSpec.Labels = config.labels podSpec.Spec.HostNetwork = config.hostNetwork diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 36bcfafd26..d1591e4781 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -2403,6 +2403,12 @@ func withLabels(labels map[string]string) podOption { } } +func withAnnotations(annotations map[string]string) podOption { + return func(pod *podConfiguration) { + pod.annotations = annotations + } +} + func withNetworkAttachment(networks []nadapi.NetworkSelectionElement) podOption { return func(pod *podConfiguration) { pod.attachments = networks diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go index 7ef3416445..56e3e3370f 100644 --- a/test/e2e/network_segmentation_default_network_annotation.go +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -11,7 +11,9 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" @@ -53,11 +55,9 @@ var _ = Describe("Network Segmentation: Default network multus annotation", func Spec: udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - Subnets: filterDualStackCIDRs(f.ClientSet, []udnv1.CIDR{ - udnv1.CIDR("103.0.0.0/16"), - udnv1.CIDR("2014:100:200::0/60"), - }), + Role: udnv1.NetworkRolePrimary, + Subnets: filterDualStackCIDRs(f.ClientSet, []udnv1.CIDR{"103.0.0.0/16", "2014:100:200::0/60"}), + IPAM: &udnv1.IPAMConfig{Mode: udnv1.IPAMEnabled, Lifecycle: udnv1.IPAMLifecyclePersistent}, }, }, } @@ -96,6 +96,33 @@ var _ = Describe("Network Segmentation: Default network multus annotation", func Expect(netStatus[0].IPs).To(ConsistOf(exposedIPs), "Should have the IPs specified in the default network annotation") Expect(strings.ToLower(netStatus[0].Mac)).To(Equal(strings.ToLower(tc.mac)), "Should have the MAC specified in the default network annotation") + By("Create second pod with default network annotation requesting the same MAC request") + pod2 := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-mac-conflict", nil, nil, nil) + pod2.Annotations = map[string]string{"v1.multus-cni.io/default-network": fmt.Sprintf(`[{"name":"default", "namespace":"ovn-kubernetes", "mac":%q}]`, tc.mac)} + pod2.Spec.Containers[0].Command = []string{"sleep", "infinity"} + pod2, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.Background(), pod2, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("Asserting second pod has event attached reflecting MAC conflict error") + Eventually(func(g Gomega) []corev1.Event { + events, err := f.ClientSet.CoreV1().Events(pod2.Namespace).SearchWithContext(context.Background(), scheme.Scheme, pod2) + g.Expect(err).NotTo(HaveOccurred()) + return events.Items + }).WithTimeout(time.Minute * 1).WithPolling(time.Second * 3).Should(ContainElement(SatisfyAll( + HaveField("Type", "Warning"), + HaveField("Reason", "ErrorAllocatingPod"), + HaveField("Message", ContainSubstring("MAC address already in use")), + ))) + + By("Assert second pod consistently at pending") + Consistently(func(g Gomega) corev1.PodPhase { + pod2Updated, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), pod2.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + return pod2Updated.Status.Phase + }). + WithTimeout(3 * time.Second). + WithPolling(time.Second). + Should(Equal(corev1.PodPending)) }, Entry("should create the pod with the specified static IP and MAC address", testCase{ diff --git a/test/e2e/network_segmentation_preconfigured_layer2.go b/test/e2e/network_segmentation_preconfigured_layer2.go index aa9f6c819d..ef9e8315f0 100644 --- a/test/e2e/network_segmentation_preconfigured_layer2.go +++ b/test/e2e/network_segmentation_preconfigured_layer2.go @@ -2,15 +2,21 @@ package e2e import ( "context" + "encoding/json" "fmt" "net" + "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + udnclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" + + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" ) @@ -130,4 +136,150 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo expectedGatewayIPs: []string{"10.128.0.2", "2014:100:200::2"}, }), ) + + Context("duplicate IP validation with primary UDN layer 2 pods", func() { + const ( + duplicateIPv4 = "10.128.0.200/16" + duplicateIPv6 = "2014:100:200::200/60" + ) + + type duplicateIPTestConfig struct { + podIP string + } + + createPodWithStaticIP := func(podName string, staticIPs []string) *v1.Pod { + ips, err := json.Marshal(staticIPs) + Expect(err).NotTo(HaveOccurred(), "Should marshal IPs for annotation") + + podConfig := *podConfig(podName, + withCommand(func() []string { + return []string{"pause"} + }), + withAnnotations(map[string]string{ + "v1.multus-cni.io/default-network": fmt.Sprintf(`[{"name":"default", "namespace":"ovn-kubernetes", "ips": %s}]`, string(ips)), + }), + ) + podConfig.namespace = f.Namespace.Name + + return runUDNPod(cs, f.Namespace.Name, podConfig, nil) + } + + createPodWithStaticIPNoWait := func(podName string, staticIPs []string) *v1.Pod { + ips, err := json.Marshal(staticIPs) + Expect(err).NotTo(HaveOccurred(), "Should marshal IPs for annotation") + + podConfig := *podConfig(podName, + withCommand(func() []string { + return []string{"pause"} + }), + withAnnotations(map[string]string{ + "v1.multus-cni.io/default-network": fmt.Sprintf(`[{"name":"default", "namespace":"ovn-kubernetes", "ips": %s}]`, string(ips)), + }), + ) + podConfig.namespace = f.Namespace.Name + + // Create the pod but don't wait for it to be Running (since it will fail due to duplicate IP) + podSpec := generatePodSpec(podConfig) + createdPod, err := cs.CoreV1().Pods(f.Namespace.Name).Create(context.Background(), podSpec, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + return createdPod + } + + waitForPodDuplicateIPFailure := func(podName string) { + Eventually(func() []v1.Event { + events, err := cs.CoreV1().Events(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.name=%s", podName), + }) + if err != nil { + return nil + } + return events.Items + }). + WithTimeout(60*time.Second). + WithPolling(2*time.Second). + Should(ContainElement(SatisfyAll( + HaveField("Type", Equal("Warning")), + HaveField("Reason", Equal("ErrorAllocatingPod")), + HaveField("Message", ContainSubstring("provided IP is already allocated")), + )), fmt.Sprintf("Pod %s should fail with IP allocation error", podName)) + } + + BeforeEach(func() { + if !isPreConfiguredUdnAddressesEnabled() { + Skip("ENABLE_PRE_CONF_UDN_ADDR not configured") + } + + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) + }) + + DescribeTable("should fail when creating second pod with duplicate static IP", + func(config duplicateIPTestConfig) { + podIPs := filterCIDRs(f.ClientSet, config.podIP) + + if len(podIPs) == 0 { + Skip("IP family not supported in this environment") + } + + By("Creating the L2 network") + netConfig := &networkAttachmentConfigParams{ + name: "duplicate-ip-test-net", + topology: "layer2", + cidr: joinStrings("10.128.0.0/16", "2014:100:200::0/60"), + role: "primary", + namespace: f.Namespace.Name, + } + filterSupportedNetworkConfig(f.ClientSet, netConfig) + udnManifest := generateUserDefinedNetworkManifest(netConfig, f.ClientSet) + cleanup, err := createManifest(netConfig.namespace, udnManifest) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(cleanup) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, netConfig.namespace, netConfig.name), 5*time.Second, time.Second).Should(Succeed()) + + By("Creating first pod with static IP") + pod1 := createPodWithStaticIP("test-pod-1", podIPs) + + By("Verifying first pod gets the requested static IP") + pod1, err = cs.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), pod1.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + + netStatus, err := podNetworkStatus(pod1, func(status nadapi.NetworkStatus) bool { + return status.Default + }) + Expect(err).NotTo(HaveOccurred(), "Should get network status from pod") + Expect(netStatus).To(HaveLen(1), "Should have one network status for the default network") + + var expectedPodIPs []string + for _, ip := range podIPs { + expectedPodIPs = append(expectedPodIPs, strings.Split(ip, "/")[0]) + } + Expect(netStatus[0].IPs).To(ConsistOf(expectedPodIPs), "Should have the IPs specified in the default network annotation") + + By("Creating second pod with duplicate IP - should fail") + pod2 := createPodWithStaticIPNoWait("test-pod-2", podIPs) + + By("Verifying second pod fails with duplicate IP allocation error") + waitForPodDuplicateIPFailure(pod2.Name) + + By("Verifying first pod is still running normally") + Eventually(func() v1.PodPhase { + updatedPod, err := cs.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), pod1.Name, metav1.GetOptions{}) + if err != nil { + return v1.PodFailed + } + return updatedPod.Status.Phase + }, 30*time.Second, 5*time.Second).Should(Equal(v1.PodRunning)) + }, + Entry("IPv4 duplicate", duplicateIPTestConfig{ + podIP: duplicateIPv4, + }), + Entry("IPv6 duplicate", duplicateIPTestConfig{ + podIP: duplicateIPv6, + }), + ) + }) })