Skip to content

Commit 39eaefc

Browse files
authored
Merge pull request #4847 from qinqon/kv-live-migration-cooked-ras
kubevirt, l2, udpn: send unsolicited router advertisement after live migration to reconcile ipv6 default gw
2 parents ede1592 + 484f1c5 commit 39eaefc

File tree

7 files changed

+353
-37
lines changed

7 files changed

+353
-37
lines changed

go-controller/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ require (
2929
github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc
3030
github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875
3131
github.com/mdlayher/ndp v1.0.1
32+
github.com/mdlayher/socket v0.2.1
3233
github.com/metallb/frr-k8s v0.0.15
3334
github.com/miekg/dns v1.1.31
3435
github.com/mitchellh/copystructure v1.2.0
@@ -103,7 +104,6 @@ require (
103104
github.com/mailru/easyjson v0.7.7 // indirect
104105
github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118 // indirect
105106
github.com/mdlayher/packet v1.0.0 // indirect
106-
github.com/mdlayher/socket v0.2.1 // indirect
107107
github.com/mitchellh/reflectwalk v1.0.2 // indirect
108108
github.com/moby/spdystream v0.5.0 // indirect
109109
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect

go-controller/pkg/kubevirt/pod.go

Lines changed: 118 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,29 @@ import (
2222
logicalswitchmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager"
2323
ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
2424
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
25+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/ndp"
2526
)
2627

28+
// DefaultGatewayReconciler is responsible for reconciling the default gateway
29+
// configuration of a virtual machine's network interface after a live migration.
30+
// It supports both IPv4 and IPv6 configurations.
31+
type DefaultGatewayReconciler struct {
32+
watchFactory *factory.WatchFactory
33+
netInfo util.NetInfo
34+
interfaceName string
35+
}
36+
37+
// NewDefaultGatewayReconciler creates a new instance of DefaultGatewayReconciler.
38+
// It takes a WatchFactory for managing resource watches, a NetInfo object for network information,
39+
// and the name of the network interface to send ARPs or RAs as parameters.
40+
func NewDefaultGatewayReconciler(watchFactory *factory.WatchFactory, netInfo util.NetInfo, interfaceName string) *DefaultGatewayReconciler {
41+
return &DefaultGatewayReconciler{
42+
watchFactory: watchFactory,
43+
netInfo: netInfo,
44+
interfaceName: interfaceName,
45+
}
46+
}
47+
2748
// IsPodLiveMigratable will return true if the pod belongs
2849
// to kubevirt and should use the live migration features
2950
func IsPodLiveMigratable(pod *corev1.Pod) bool {
@@ -482,17 +503,20 @@ func DiscoverLiveMigrationStatus(client *factory.WatchFactory, pod *corev1.Pod)
482503
return &status, nil
483504
}
484505

485-
func ReconcileIPv4DefaultGatewayAfterLiveMigration(watchFactory *factory.WatchFactory, netInfo util.NetInfo, liveMigrationStatus *LiveMigrationStatus, interfaceName string) error {
506+
// ReconcileIPv4AfterLiveMigration will send a GARP after live migration
507+
// to update the default gw mac address to the node where the VM is running
508+
// now.
509+
func (r *DefaultGatewayReconciler) ReconcileIPv4AfterLiveMigration(liveMigrationStatus *LiveMigrationStatus) error {
486510
if liveMigrationStatus.State != LiveMigrationTargetDomainReady {
487511
return nil
488512
}
489513

490-
targetNode, err := watchFactory.GetNode(liveMigrationStatus.TargetPod.Spec.NodeName)
514+
targetNode, err := r.watchFactory.GetNode(liveMigrationStatus.TargetPod.Spec.NodeName)
491515
if err != nil {
492516
return err
493517
}
494518

495-
lrpJoinAddress, err := util.ParseNodeGatewayRouterJoinNetwork(targetNode, netInfo.GetNetworkName())
519+
lrpJoinAddress, err := util.ParseNodeGatewayRouterJoinNetwork(targetNode, r.netInfo.GetNetworkName())
496520
if err != nil {
497521
return err
498522
}
@@ -503,16 +527,104 @@ func ReconcileIPv4DefaultGatewayAfterLiveMigration(watchFactory *factory.WatchFa
503527
}
504528

505529
lrpMAC := util.IPAddrToHWAddr(lrpJoinIPv4)
506-
for _, subnet := range netInfo.Subnets() {
530+
for _, subnet := range r.netInfo.Subnets() {
507531
gwIP := util.GetNodeGatewayIfAddr(subnet.CIDR).IP.To4()
508532
if gwIP == nil {
509533
continue
510534
}
511535
garp := util.GARP{IP: gwIP, MAC: &lrpMAC}
512-
if err := util.BroadcastGARP(interfaceName, garp); err != nil {
536+
if err := util.BroadcastGARP(r.interfaceName, garp); err != nil {
513537
return err
514538
}
515539
}
516-
517540
return nil
518541
}
542+
543+
// ReconcileIPv6AfterLiveMigration will do two things at VM's:
544+
// - Remove ipv6 default gw path from VM's node before live migration
545+
// - Add ipv6 default gw path from VM's node after live migration
546+
// This is done by sending a pair of unsolicited RA's one with lifetime=0
547+
// (to remove the gateway path) another with lifetime=max to add the new
548+
// default gateway path
549+
func (r *DefaultGatewayReconciler) ReconcileIPv6AfterLiveMigration(liveMigration *LiveMigrationStatus) error {
550+
if !liveMigration.IsTargetDomainReady() {
551+
return nil
552+
}
553+
nodes, err := r.watchFactory.GetNodes()
554+
if err != nil {
555+
return err
556+
}
557+
558+
targetPod := liveMigration.TargetPod
559+
if len(r.netInfo.GetNADs()) != 1 {
560+
return fmt.Errorf("expected only one nad for network %q, got %d", r.netInfo.GetNetworkName(), len(r.netInfo.GetNADs()))
561+
}
562+
563+
targetPodAnnotation, err := util.UnmarshalPodAnnotation(targetPod.Annotations, r.netInfo.GetNADs()[0])
564+
if err != nil {
565+
return ovntypes.NewSuppressedError(fmt.Errorf("failed parsing ovn pod annotation for pod '%s/%s' and network %q: %w", targetPod.Namespace, targetPod.Name, r.netInfo.GetNetworkName(), err))
566+
}
567+
568+
destinationIP, err := util.MatchFirstIPNetFamily(true /* ipv6 */, targetPodAnnotation.IPs)
569+
if err != nil {
570+
return err
571+
}
572+
destinationMAC := targetPodAnnotation.MAC
573+
574+
ras := make([]ndp.RouterAdvertisement, 0, len(nodes))
575+
for _, node := range nodes {
576+
if node.Name == liveMigration.TargetPod.Spec.NodeName {
577+
// skip the target node since this is the proper gateway
578+
continue
579+
}
580+
nodeJoinAddrs, err := util.ParseNodeGatewayRouterJoinAddrs(node, r.netInfo.GetNetworkName())
581+
if err != nil {
582+
return ovntypes.NewSuppressedError(fmt.Errorf("failed parsing join addresss from node %q and network %q to reconcile ipv6 gateway: %w", node.Name, r.netInfo.GetNetworkName(), err))
583+
}
584+
// During upgrades, nftables blocks Router Advertisements (RAs) from other nodes.
585+
// However, Virtual Machines (VMs) may still retain old default gateway paths.
586+
// To address this, we create a new Router Advertisement with a lifetime of 0
587+
// to signal the removal of the old default gateway.
588+
// NOTE: This is a workaround for the issue and may not be needed in the future, after
589+
// upgrading to a version that supports the new behavior.
590+
ras = append(ras, newRouterAdvertisementFromJoinIPAndLifetime(nodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 0))
591+
}
592+
targetNode, err := r.watchFactory.GetNode(liveMigration.TargetPod.Spec.NodeName)
593+
if err != nil {
594+
return fmt.Errorf("failed fetching node %q to reconcile ipv6 gateway: %w", liveMigration.TargetPod.Spec.NodeName, err)
595+
}
596+
targetNodeJoinAddrs, err := util.ParseNodeGatewayRouterJoinAddrs(targetNode, r.netInfo.GetNetworkName())
597+
if err != nil {
598+
return ovntypes.NewSuppressedError(fmt.Errorf("failed parsing join addresss from live migration target node %q and network %q to reconcile ipv6 gateway: %w", targetNode.Name, r.netInfo.GetNetworkName(), err))
599+
}
600+
ras = append(ras, newRouterAdvertisementFromJoinIPAndLifetime(targetNodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 65535))
601+
return ndp.SendRouterAdvertisements(r.interfaceName, ras...)
602+
}
603+
604+
// newRouterAdvertisementFromJoinIPAndLifetime creates a new Router Advertisement (RA) message
605+
// using the provided join IP address, destination MAC, destination IP, and lifetime.
606+
//
607+
// This function performs the following:
608+
// - Derives the source MAC address from the given IP using util.IPAddrToHWAddr.
609+
// - Calculates the link-local address (LLA) from the source MAC using util.HWAddrToIPv6LLA.
610+
// - Configures the destination IP and MAC address to use the provided values.
611+
// - Sets the RA message's lifetime to the specified value.
612+
//
613+
// Parameters:
614+
// - ip: The join IP address used to derive the source MAC and LLA.
615+
// - destinationMAC: The MAC address to which the RA message will be sent.
616+
// - destinationIP: The IP address to which the RA message will be sent.
617+
// - lifetime: The lifetime value for the RA message, in seconds.
618+
//
619+
// Returns:
620+
// - An ndp.RouterAdvertisement object configured with the calculated source MAC, LLA, and the provided destination MAC, IP, and lifetime.
621+
func newRouterAdvertisementFromJoinIPAndLifetime(ip net.IP, destinationMAC net.HardwareAddr, destinationIP net.IP, lifetime uint16) ndp.RouterAdvertisement {
622+
sourceMAC := util.IPAddrToHWAddr(ip)
623+
return ndp.RouterAdvertisement{
624+
SourceMAC: sourceMAC,
625+
SourceIP: util.HWAddrToIPv6LLA(sourceMAC),
626+
DestinationMAC: destinationMAC,
627+
DestinationIP: destinationIP,
628+
Lifetime: lifetime,
629+
}
630+
}

go-controller/pkg/ovn/secondary_layer2_network_controller.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,9 @@ type SecondaryLayer2NetworkController struct {
293293

294294
// EgressIP controller utilized only to initialize a network with OVN polices to support EgressIP functionality.
295295
eIPController *EgressIPController
296+
297+
// reconcile the virtual machine default gateway sending GARPs and RAs
298+
defaultGatewayReconciler *kubevirt.DefaultGatewayReconciler
296299
}
297300

298301
// NewSecondaryLayer2NetworkController create a new OVN controller for the given secondary layer2 nad
@@ -363,6 +366,7 @@ func NewSecondaryLayer2NetworkController(
363366
if err != nil {
364367
return nil, fmt.Errorf("unable to create new service controller while creating new layer2 network controller: %w", err)
365368
}
369+
oc.defaultGatewayReconciler = kubevirt.NewDefaultGatewayReconciler(oc.watchFactory, oc.GetNetInfo(), util.GetNetworkScopedK8sMgmtHostIntfName(uint(oc.GetNetworkID())))
366370
}
367371

368372
if oc.allocatesPodAnnotation() {
@@ -908,15 +912,20 @@ func (oc *SecondaryLayer2NetworkController) updateLocalPodEvent(pod *corev1.Pod)
908912
}
909913

910914
func (oc *SecondaryLayer2NetworkController) reconcileLiveMigrationTargetZone(kubevirtLiveMigrationStatus *kubevirt.LiveMigrationStatus) error {
911-
// Only primary networks has a gateway to reconcile
912-
if !oc.IsPrimaryNetwork() {
915+
if oc.defaultGatewayReconciler == nil {
913916
return nil
914917
}
915-
mgmtInterfaceName := util.GetNetworkScopedK8sMgmtHostIntfName(uint(oc.GetNetworkID()))
916-
917-
if hasIPv4Subnet, _ := oc.IPMode(); hasIPv4Subnet {
918-
if err := kubevirt.ReconcileIPv4DefaultGatewayAfterLiveMigration(oc.watchFactory, oc.GetNetInfo(), kubevirtLiveMigrationStatus, mgmtInterfaceName); err != nil {
919-
return err
918+
hasIPv4Subnet, hasIPv6Subnet := oc.IPMode()
919+
if hasIPv4Subnet {
920+
if err := oc.defaultGatewayReconciler.ReconcileIPv4AfterLiveMigration(kubevirtLiveMigrationStatus); err != nil {
921+
return fmt.Errorf("failed reconciling IPv4 default gw after live migration at target pod '%s/%s': %w",
922+
kubevirtLiveMigrationStatus.TargetPod.Namespace, kubevirtLiveMigrationStatus.TargetPod.Name, err)
923+
}
924+
}
925+
if hasIPv6Subnet {
926+
if err := oc.defaultGatewayReconciler.ReconcileIPv6AfterLiveMigration(kubevirtLiveMigrationStatus); err != nil {
927+
return fmt.Errorf("failed reconciling IPv6 default gw after live migration at target pod '%s/%s': %w",
928+
kubevirtLiveMigrationStatus.TargetPod.Namespace, kubevirtLiveMigrationStatus.TargetPod.Name, err)
920929
}
921930
}
922931
return nil

go-controller/pkg/util/ndp/ra.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package ndp
2+
3+
import (
4+
"fmt"
5+
"net"
6+
"syscall"
7+
8+
"github.com/google/gopacket"
9+
"github.com/google/gopacket/layers"
10+
"github.com/mdlayher/socket"
11+
"golang.org/x/sys/unix"
12+
)
13+
14+
// RouterAdvertisement with mac, ips and lifetime field to send
15+
type RouterAdvertisement struct {
16+
SourceMAC, DestinationMAC net.HardwareAddr
17+
SourceIP, DestinationIP net.IP
18+
Lifetime uint16
19+
}
20+
21+
// SendRouterAdvertisements sends one or more Router Advertisements (RAs) on the specified network interface.
22+
// This function requires raw socket capabilities because the source MAC and IP addresses in the RAs
23+
// are not the ones from the interface used to send the packets.
24+
//
25+
// Parameters:
26+
// - interfaceName: The name of the network interface to send the RAs on.
27+
// - ras: A variadic list of RouterAdvertisement objects containing the details of each RA to be sent.
28+
//
29+
// Returns:
30+
// - error: An error object if an error occurs, otherwise nil.
31+
//
32+
// The function performs the following steps:
33+
// 1. Retrieves the network interface by name.
34+
// 2. Creates a raw socket for sending packets.
35+
// 3. Serializes each Router Advertisement into a byte slice.
36+
// 4. Sends the serialized RAs using the raw socket.
37+
func SendRouterAdvertisements(interfaceName string, ras ...RouterAdvertisement) error {
38+
iface, err := net.InterfaceByName(interfaceName)
39+
if err != nil {
40+
return fmt.Errorf("failed to find interface %s: %w", interfaceName, err)
41+
}
42+
c, err := socket.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, syscall.ETH_P_ALL, "ra", nil)
43+
if err != nil {
44+
return fmt.Errorf("failed to create raw socket to send unsolicited RAs: %w", err)
45+
}
46+
defer c.Close()
47+
48+
serializedRAs := [][]byte{}
49+
for _, ra := range ras {
50+
serializeBuffer := gopacket.NewSerializeBuffer()
51+
52+
// Create the Ethernet layer with destination and source MAC addresses.
53+
ethernetLayer := layers.Ethernet{
54+
DstMAC: ra.DestinationMAC,
55+
SrcMAC: ra.SourceMAC,
56+
EthernetType: layers.EthernetTypeIPv6,
57+
}
58+
59+
// Create the IPv6 layer with source and destination IP addresses.
60+
ip6Layer := layers.IPv6{
61+
Version: 6,
62+
NextHeader: layers.IPProtocolICMPv6,
63+
HopLimit: 255,
64+
SrcIP: ra.SourceIP,
65+
DstIP: ra.DestinationIP,
66+
}
67+
68+
// Create the ICMPv6 layer for the Router Advertisement.
69+
icmp6Layer := layers.ICMPv6{
70+
TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeRouterAdvertisement, 0),
71+
}
72+
if err := icmp6Layer.SetNetworkLayerForChecksum(&ip6Layer); err != nil {
73+
return err
74+
}
75+
76+
// https://datatracker.ietf.org/doc/html/rfc4861#section-4.2
77+
// Managed address configuration flag.
78+
managedAddressFlag := uint8(0x80)
79+
80+
// https://datatracker.ietf.org/doc/html/rfc4191#section-2.2
81+
// Prf (Default Router Preference)
82+
// 2-bit signed integer. Indicates whether to prefer this
83+
// router over other default routers. If the Router Lifetime
84+
// is zero, the preference value MUST be set to (00) by the
85+
// sender and MUST be ignored by the receiver. If the Reserved
86+
// (10) value is received, the receiver MUST treat the value as
87+
// if it were (00).
88+
defaultRoutePreferenceFlag := uint8(0x08)
89+
if ra.Lifetime == 0 {
90+
defaultRoutePreferenceFlag = uint8(0x00)
91+
}
92+
93+
// Create the ICMPv6 Router Advertisement layer.
94+
raLayer := layers.ICMPv6RouterAdvertisement{
95+
HopLimit: 255,
96+
Flags: managedAddressFlag | defaultRoutePreferenceFlag,
97+
RouterLifetime: ra.Lifetime,
98+
ReachableTime: 0,
99+
RetransTimer: 0,
100+
Options: layers.ICMPv6Options{{
101+
Type: layers.ICMPv6OptSourceAddress,
102+
Data: ra.SourceMAC,
103+
}},
104+
}
105+
106+
// Serialize the layers into a byte slice.
107+
if err := gopacket.SerializeLayers(serializeBuffer, gopacket.SerializeOptions{ComputeChecksums: true, FixLengths: true},
108+
&ethernetLayer,
109+
&ip6Layer,
110+
&icmp6Layer,
111+
&raLayer,
112+
); err != nil {
113+
return err
114+
}
115+
serializedRAs = append(serializedRAs, serializeBuffer.Bytes())
116+
}
117+
118+
// Send each serialized Router Advertisement using the raw socket.
119+
for _, serializedRA := range serializedRAs {
120+
if err := c.Sendto(serializedRA, &unix.SockaddrLinklayer{Ifindex: iface.Index}, 0); err != nil {
121+
return err
122+
}
123+
}
124+
return nil
125+
}

0 commit comments

Comments
 (0)