diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 52085be5a1..bdb1ef80cf 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -387,6 +387,19 @@ wait_for_event() { done } +# wait_ovnkube_controller_with_node_done - Wait for ovnkube-controller-with-node process to complete +# Checks if the ovnkube-controller-with-node process is running by looking for its PID file. +# If the PID file exists, waits for that process to finish before continuing. +# If the PID file doesnt exist, it means the process has already exited. +wait_ovnkube_controller_with_node_done() { + local pid_file=${OVN_RUNDIR}/ovnkube-controller-with-node.pid + if [[ -f ${pid_file} ]]; then + echo "info: waiting on ovnkube-controller-with-node process to end" + wait $(cat $pid_file) + echo "info: done waiting for ovn-controller-with-node to end" + fi +} + # The ovnkube-db kubernetes service must be populated with OVN DB service endpoints # before various OVN K8s containers can come up. This functions checks for that. # If OVN dbs are configured to listen only on unix sockets, then there will not be @@ -461,6 +474,36 @@ ovs_ready() { return 0 } +# get_bridge_name_for_physnet - Extract OVS bridge name for a given OVN physical network +# Takes an OVN network name for physical networks (physnet) and returns the corresponding +# OVS bridge name from the ovn-bridge-mappings configuration. +# Return empty string if not found. +get_bridge_name_for_physnet() { + local physnet="$1" + local mappings + mappings=$(ovs-vsctl --if-exists get open_vswitch . external_ids:ovn-bridge-mappings) + # Extract bridge name after physnet: and before next comma (or end) + # regex matches zero or more non-comma characters + # cut on colon and return field number 2 + echo "$mappings" | tr -d "\"" | grep -o "$physnet:[^,]*" | cut -d: -f2 +} + +# Adds drop flows for GARPs on patch port to br-int for specified bridge. +add_garp_drop_flow() { + local bridge="$1" + local cookie="0x0305" + local priority="498" + # if bridge exists, and the patch port is created, we expect to add at least one flow to a patch port ending in to-br-int. + # FIXME: can we generate the exact name. Its possible we add these flows to the incorrect port when selecting on substring + for port_name in $(ovs-vsctl list-ports $bridge); do + if [[ "$port_name" == *to-br-int ]]; then + local of_port=$(ovs-vsctl get interface $port_name ofport) + ovs-ofctl add-flow $bridge "cookie=$cookie,table=0,priority=$priority,in_port=$of_port,arp,arp_op=1,actions=drop" > /dev/null + break + fi + done +} + # Verify that the process is running either by checking for the PID in `ps` output # or by using `ovs-appctl` utility for the processes that support it. # $1 is the name of the process @@ -1581,7 +1624,10 @@ ovnkube-controller() { } ovnkube-controller-with-node() { - trap 'kill $(jobs -p) ; rm -f /etc/cni/net.d/10-ovn-kubernetes.conf ; exit 0' TERM + # send sig term to background job (ovnkube-node process), remove CNI conf and resume background job until it ends. + # currently we the process to background, therefore wait until that process removes its pid file on exit. + # if the pid file doesnt exist, we exit immediately. + trap 'kill $(jobs -p) ; rm -f /etc/cni/net.d/10-ovn-kubernetes.conf ; wait_ovnkube_controller_with_node_done; exit 0' TERM check_ovn_daemonset_version "3" rm -f ${OVN_RUNDIR}/ovnkube-controller-with-node.pid @@ -1606,6 +1652,23 @@ ovnkube-controller-with-node() { wait_for_event process_ready ovn-controller fi + # start temp work around + # remove when https://issues.redhat.com/browse/FDP-1537 is avilable + if [[ ${ovnkube_node_mode} == "full" && ${ovn_enable_interconnect} == "true" && ${ovn_egressip_enable} == "true" ]]; then + echo "=============== ovnkube-controller-with-node - (add GARP drop flows if external bridge exists)" + # bridge may not yet exist + local bridge_name="$(get_bridge_name_for_physnet 'physnet')" + if [[ "$bridge_name" != "" ]]; then + echo "=============== ovnkube-controller-with-node - found bridge mapping for physnet: $bridge_name" + # nothing to do if the external bridge isn't created. + if ovs-vsctl br-exists $bridge_name; then + echo "=============== ovnkube-controller-with-node - found bridge $bridge_name" + add_garp_drop_flow "$bridge_name" + echo "=============== ovnkube-controller-with-node - (finished adding GARP drop flows)" + fi + fi + fi + ovn_routable_mtu_flag= if [[ -n "${routable_mtu}" ]]; then routable_mtu_flag="--routable-mtu ${routable_mtu}" diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index c1fa4f8edf..c596bdb6ac 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -9,6 +9,7 @@ import ( "os/signal" "strings" "sync" + "sync/atomic" "syscall" "text/tabwriter" "text/template" @@ -26,6 +27,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb" + libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" controllerManager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/network-controller-manager" ovnnode "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" @@ -478,6 +480,14 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util clusterManager.Stop() }() } + // when ovnkube is running in ovnkube-controller and ovnkube node mode in the same process, bool is used to inform ovnkube-node that ovnkube-controller + // has sync'd once and changes have propagated to SB DB. ovnkube-node will then remove flows for dropping GARPs. + // Remove when OVN supports native silencing of GARPs on startup: https://issues.redhat.com/browse/FDP-1537 + // isOVNKubeControllerSyncd is true when ovnkube controller has sync and changes are in OVN Southbound database. + var isOVNKubeControllerSyncd *atomic.Bool + if runMode.ovnkubeController && runMode.node && config.OVNKubernetesFeature.EnableEgressIP && config.OVNKubernetesFeature.EnableInterconnect && config.OvnKubeNode.Mode == types.NodeModeFull { + isOVNKubeControllerSyncd = &atomic.Bool{} + } if runMode.ovnkubeController { wg.Add(1) @@ -518,6 +528,17 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util // record delay until ready metrics.MetricOVNKubeControllerReadyDuration.Set(time.Since(startTime).Seconds()) + if isOVNKubeControllerSyncd != nil { + klog.Infof("Waiting for OVN northbound database changes to be processed by ovn-controller") + if err = libovsdbutil.WaitUntilFlowsInstalled(ctx, libovsdbOvnNBClient); err != nil { + controllerErr = fmt.Errorf("failed waiting for OVN northbound database changes to be processed by ovn-controller: %v", err) + return + } else { + klog.Infof("Finished waiting for OVN northbound database changes to be processed by ovn-controller") + isOVNKubeControllerSyncd.Store(true) + } + } + <-ctx.Done() networkControllerManager.Stop() }() @@ -547,7 +568,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util return } - err = nodeNetworkControllerManager.Start(ctx) + err = nodeNetworkControllerManager.Start(ctx, isOVNKubeControllerSyncd) if err != nil { nodeErr = fmt.Errorf("failed to start node network controller: %w", err) return @@ -557,7 +578,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util metrics.MetricNodeReadyDuration.Set(time.Since(startTime).Seconds()) <-ctx.Done() - nodeNetworkControllerManager.Stop() + nodeNetworkControllerManager.Stop(isOVNKubeControllerSyncd) }() } diff --git a/go-controller/pkg/libovsdb/util/northd_sync.go b/go-controller/pkg/libovsdb/util/northd_sync.go new file mode 100644 index 0000000000..ea34c45a17 --- /dev/null +++ b/go-controller/pkg/libovsdb/util/northd_sync.go @@ -0,0 +1,72 @@ +package util + +import ( + "context" + "errors" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/libovsdb/model" + "github.com/ovn-org/libovsdb/ovsdb" + + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" +) + +// WaitUntilFlowsInstalled ensures that ovn-controller has sync'd at least once by incrementing nb_cfg value in NB DB +// and waiting for northd to write back a value equal or greater to the hv_cfg field in NB_Global. +// See https://www.ovn.org/support/dist-docs/ovn-nb.5.html for more info regarding nb_cfg / hv_cfg fields. +// The expectation is that the data you wish to be sync'd is already written to NB DB. +// Note: if the ovn-controller is down, this will block until it comes back up, therefore this func should only +// be used with IC mode and one node per zone. +func WaitUntilFlowsInstalled(ctx context.Context, nbClient client.Client) error { + // 1. Get value of nb_cfg + // 2. Increment value of nb_cfg + // 3. Wait until value appears in hv_cfg field thus ensuring ovn-controller has processed the changes + nbGlobal := &nbdb.NBGlobal{} + nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal) + if err != nil { + return fmt.Errorf("failed to find OVN Northbound NB_Global table"+ + " entry: %w", err) + } + // increment nb_cfg value by 1. When northd consumes updates from NB DB, it will copy this value to SB DBs SB_Global + // table nb_cfg field. + ops, err := nbClient.Where(nbGlobal).Mutate(nbGlobal, model.Mutation{ + Field: &nbGlobal.NbCfg, + Mutator: ovsdb.MutateOperationAdd, + Value: 1, + }) + if err != nil { + return fmt.Errorf("failed to generate ops to mutate nb_cfg: %w", err) + } + if _, err = libovsdbops.TransactAndCheck(nbClient, ops); err != nil { + return fmt.Errorf("failed to transact to increment nb_cfg: %w", err) + } + expectedHvCfgValue := nbGlobal.NbCfg + 1 + if expectedHvCfgValue < 0 { // handle overflow + expectedHvCfgValue = 0 + } + nbGlobal = &nbdb.NBGlobal{} + // ovn-northd sets hv_cfg to the lowest int value found for all chassis in the system (IC mode, + // we support a single chassis per zone) as reported in the Chassis_Private table in the southbound database. + // Thus, hv_cfg equals nb_cfg for the single chassis once it is caught up with NB DB we want. + // poll until we see the expected value in NB DB every 5 milliseconds until context is cancelled. + err = wait.PollUntilContextCancel(ctx, time.Millisecond*5, true, func(_ context.Context) (done bool, err2 error) { + if nbGlobal, err2 = libovsdbops.GetNBGlobal(nbClient, nbGlobal); err2 != nil { + // northd hasn't added an entry yet + if errors.Is(err2, client.ErrNotFound) { + return false, nil + } + return false, fmt.Errorf("failed to get nb_global table entry from NB DB: %w", err2) + } + return nbGlobal.HvCfg >= expectedHvCfgValue, nil // we only need to ensure it is greater than or equal to the expected value + }) + if err != nil { + return fmt.Errorf("failed while waiting for hv_cfg value greater than or equal %d in NB DB nb_global table: %w", + expectedHvCfgValue, err) + } + return nil +} diff --git a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go index 059696b501..f3ed1f830a 100644 --- a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "sync/atomic" "time" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" @@ -94,7 +95,7 @@ func (ncm *nodeNetworkControllerManager) initDefaultNodeNetworkController() erro } // Start the node network controller manager -func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error) { +func (ncm *nodeNetworkControllerManager) Start(ctx context.Context, isOVNKubeControllerSyncd *atomic.Bool) (err error) { klog.Infof("Starting the node network controller manager, Mode: %s", config.OvnKubeNode.Mode) // Initialize OVS exec runner; find OVS binaries that the CNI code uses. @@ -112,7 +113,7 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error) // make sure we clean up after ourselves on failure defer func() { if err != nil { - ncm.Stop() + ncm.Stop(isOVNKubeControllerSyncd) } }() @@ -135,18 +136,48 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error) // nadController is nil if multi-network is disabled if ncm.nadController != nil { - err = ncm.nadController.Start() + if err = ncm.nadController.Start(); err != nil { + return fmt.Errorf("failed to start NAD controller: %v", err) + } } - - return err + // start workaround and remove when ovn has native support for silencing GARPs for LRPs + // https://issues.redhat.com/browse/FDP-1537 + // when in mode ovnkube controller with node, wait until ovnkube controller is syncd before removing drop flows for GARPs +waitForControllerSyncLoop: + for { + select { + case <-ctx.Done(): + return nil + default: + if isOVNKubeControllerSyncd != nil && !isOVNKubeControllerSyncd.Load() { + klog.V(5).Infof("Waiting for ovnkube controller to start before removing GARP drop flows") + time.Sleep(200 * time.Millisecond) + continue + } + klog.Infof("Removing flows to drop GARP") + ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.SetDefaultBridgeGARPDropFlows(false) + if err := ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.Reconcile(); err != nil { + return fmt.Errorf("failed to reconcile gateway after removing GARP drop flows for ext bridge: %v", err) + } + break waitForControllerSyncLoop + } + } + // end workaround + return nil } // Stop gracefully stops all managed controllers -func (ncm *nodeNetworkControllerManager) Stop() { +func (ncm *nodeNetworkControllerManager) Stop(isOVNKubeControllerSyncd *atomic.Bool) { // stop stale ovs ports cleanup close(ncm.stopChan) if ncm.defaultNodeNetworkController != nil { + if isOVNKubeControllerSyncd != nil && ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway != nil { + ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.SetDefaultBridgeGARPDropFlows(true) + if err := ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.Reconcile(); err != nil { + klog.Errorf("Failed to reconcile gateway after attempting to add flows to the external bridge to drop GARPs: %v", err) + } + } ncm.defaultNodeNetworkController.Stop() } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 2c3b38f99d..4b65ab95e0 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -95,7 +95,7 @@ func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, apbExter type DefaultNodeNetworkController struct { BaseNodeNetworkController - gateway Gateway + Gateway Gateway // Node healthcheck server for cloud load balancers healthzServer *proxierHealthUpdater routeManager *routemanager.Controller @@ -980,7 +980,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if err := waiter.Wait(); err != nil { return err } - nc.gateway.Start() + nc.Gateway.Start() klog.Infof("Gateway and management port readiness took %v", time.Since(start)) // Note(adrianc): DPU deployments are expected to support the new shared gateway changes, upgrade flow @@ -988,7 +988,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if config.OvnKubeNode.Mode != types.NodeModeDPUHost { bridgeName := "" if config.OvnKubeNode.Mode == types.NodeModeFull { - bridgeName = nc.gateway.GetGatewayBridgeIface() + bridgeName = nc.Gateway.GetGatewayBridgeIface() // Configure route for svc towards shared gw bridge // Have to have the route to bridge for multi-NIC mode, where the default gateway may go to a non-OVS interface if err := configureSvcRouteViaBridge(nc.routeManager, bridgeName); err != nil { diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index 7bcaca247f..8aec872eef 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -21,6 +21,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/util/errors" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" ) // Gateway responds to Service and Endpoint K8s events @@ -34,6 +35,7 @@ type Gateway interface { GetGatewayBridgeIface() string SetDefaultGatewayBridgeMAC(addr net.HardwareAddr) Reconcile() error + SetDefaultBridgeGARPDropFlows(bool) } type gateway struct { @@ -381,6 +383,15 @@ func (g *gateway) SetDefaultGatewayBridgeMAC(macAddr net.HardwareAddr) { klog.Infof("Default gateway bridge MAC address updated to %s", macAddr) } +// SetDefaultBridgeGARPDropFlows will enable flows to drop GARPs if the openflow +// manager has been initialized. +func (g *gateway) SetDefaultBridgeGARPDropFlows(isDropped bool) { + if g.openflowManager == nil { + return + } + g.openflowManager.setDefaultBridgeGARPDrop(isDropped) +} + // Reconcile handles triggering updates to different components of a gateway, like OFM, Services func (g *gateway) Reconcile() error { klog.Info("Reconciling gateway with updates") @@ -436,6 +447,46 @@ type bridgeConfiguration struct { ofPortPatch string ofPortPhys string ofPortHost string + dropGARP bool +} + +func (b *bridgeConfiguration) setDropGARP(drop bool) { + b.Lock() + defer b.Unlock() + b.dropGARP = drop +} + +// dropGARPFlows generates the ovs flows for dropping gratuitous ARPs for cluster default network traffic only. +// bridgeConfiguration lock must be held by caller +func (b *bridgeConfiguration) dropGARPFlows() []string { + if config.Gateway.Mode != config.GatewayModeShared || !config.IPv4Mode { + return nil + } + const priority = 498 + var flows []string + flows = append(flows, generateGratuitousARPDropFlow(b.ofPortPatch, priority)) + + return flows +} + +// allowNodeIPGARPFlows generates the OVS flows to allow gratuitous ARPs for Node IP(s) for the cluster default network traffic only. +// bridgeConfiguration lock must be held by caller. +// Remove when https://issues.redhat.com/browse/FDP-1537 is available +func (b *bridgeConfiguration) allowNodeIPGARPFlows(nodeIPs []net.IP) []string { + if config.Gateway.Mode != config.GatewayModeShared || !config.IPv4Mode { + return nil + } + const priority = 499 + var flows []string + + for _, nodeIP := range nodeIPs { + if nodeIP == nil || nodeIP.IsUnspecified() || utilnet.IsIPv6(nodeIP) { + continue + } + flows = append(flows, generateGratuitousARPAllowFlow(b.ofPortPatch, nodeIP, priority)) + } + + return flows } // updateInterfaceIPAddresses sets and returns the bridge's current ips @@ -470,6 +521,16 @@ func (b *bridgeConfiguration) updateInterfaceIPAddresses(node *kapi.Node) ([]*ne func bridgeForInterface(intfName, nodeName, physicalNetworkName string, gwIPs []*net.IPNet) (*bridgeConfiguration, error) { res := bridgeConfiguration{} + // temp workaround for https://issues.redhat.com/browse/FDP-1537 + // we need to ensure we continue dropping GARPs for any new bridge config if the run mode is ovnkube controller + ovnkube node + IC + single zone node + // FIXME: only add if run mode is ovnkube controller + node in single process + if config.OVNKubernetesFeature.EnableEgressIP && config.OVNKubernetesFeature.EnableInterconnect && config.OvnKubeNode.Mode == types.NodeModeFull { + // drop by default - set to false later when ovnkube controller has sync'd and changes propagated to OVN southbound database + // we should also match on run mode here to ensure ovnkube controller + ovnkube node are running in the same process + res.dropGARP = true + } + // end temp work around + gwIntf := intfName if bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfName); err == nil { diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 3b7fee43cb..65c19b067d 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -409,7 +409,7 @@ func (nc *DefaultNodeNetworkController) initGateway(subnets []*net.IPNet, nodeAn } waiter.AddWait(readyGwFunc, initGwFunc) - nc.gateway = gw + nc.Gateway = gw return nc.validateVTEPInterfaceMTU() } @@ -496,7 +496,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er } err = gw.Init(nc.stopChan, nc.wg) - nc.gateway = gw + nc.Gateway = gw return err } @@ -530,7 +530,7 @@ func CleanupClusterNode(name string) error { } func (nc *DefaultNodeNetworkController) updateGatewayMAC(link netlink.Link) error { - if nc.gateway.GetGatewayBridgeIface() != link.Attrs().Name { + if nc.Gateway.GetGatewayBridgeIface() != link.Attrs().Name { return nil } @@ -547,8 +547,8 @@ func (nc *DefaultNodeNetworkController) updateGatewayMAC(link netlink.Link) erro return nil } // MAC must have changed, update node - nc.gateway.SetDefaultGatewayBridgeMAC(link.Attrs().HardwareAddr) - if err := nc.gateway.Reconcile(); err != nil { + nc.Gateway.SetDefaultGatewayBridgeMAC(link.Attrs().HardwareAddr) + if err := nc.Gateway.Reconcile(); err != nil { return fmt.Errorf("failed to reconcile gateway for MAC address update: %w", err) } nodeAnnotator := kube.NewNodeAnnotator(nc.Kube, node.Name) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 83e893c626..ba622f10fe 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -39,6 +39,10 @@ const ( // bridge to move packets between host and external for etp=local traffic. // The hex number 0xe745ecf105, represents etp(e74)-service(5ec)-flows which makes it easier for debugging. etpSvcOpenFlowCookie = "0xe745ecf105" + // GARPCookie identifies the flows used to allow node IPs and drop other GARPs from CDN. + // Temp workaround until OVN has native supported for silencing GARPs on startup. + // https://issues.redhat.com/browse/FDP-1537 + garpCookie = "0x0305" // ovsLocalPort is the name of the OVS bridge local port ovsLocalPort = "LOCAL" // ctMarkOVN is the conntrack mark value for OVN traffic @@ -1101,6 +1105,19 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st // 14 bytes of overhead for ethernet header (does not include VLAN) maxPktLength := getMaxFrameLength() + // Problem: ovn-controller connects to SB DB and then GARPs for any EIPs configured however for IC, SB DB maybe stale if + // ovnkube-controller is not processing. + // Solution: add a logical flow on startup to allow GARPs from Node IPs but drop other GARPs and remove when ovnkube-controller + // has sync'd and changes propagated to OVN SB DB. + // remove when ovn contains native support for logical router ports to contain an option to silence GARPs on startup of ovn-controller. + // https://issues.redhat.com/browse/FDP-1537 + if bridge.dropGARP { + // priority 499 flows to allow GARP pkts when src IP is a Node IP + dftFlows = append(dftFlows, bridge.allowNodeIPGARPFlows(extraIPs)...) + // priority 498 flows to drop GARP pkts with no regards to src IP + dftFlows = append(dftFlows, bridge.dropGARPFlows()...) + } + if config.IPv4Mode { // table0, Geneve packets coming from external. Skip conntrack and go directly to host // if dest mac is the shared mac send directly to host. @@ -1365,6 +1382,26 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st return dftFlows, nil } +// generateGratuitousARPDropFlow returns a single flow to drop GARPs +// Remove when https://issues.redhat.com/browse/FDP-1537 available +func generateGratuitousARPDropFlow(inPort string, priority int) string { + // set to op code 1 - see rfc5227 particularly section: + // Why Are ARP Announcements Performed Using ARP Request Packets and Not ARP Reply Packets? + // ovn follows this practise of using op code 1 + return fmt.Sprintf("cookie=%s,table=0,priority=%d,in_port=%s,dl_dst=ff:ff:ff:ff:ff:ff,arp,arp_op=1,actions=drop", + garpCookie, priority, inPort) +} + +// generateGratuitousARPAllowFlow returns a single flow to allow GARP only for a specific source IP. +// Remove when https://issues.redhat.com/browse/FDP-1537 available +func generateGratuitousARPAllowFlow(inPort string, ip net.IP, priority int) string { + // set to op code 1 - see rfc5227 particularly section: + // Why Are ARP Announcements Performed Using ARP Request Packets and Not ARP Reply Packets? + // ovn follows this practise of using op code 1 + return fmt.Sprintf("cookie=%s,table=0,priority=%d,in_port=%s,dl_dst=ff:ff:ff:ff:ff:ff,arp,arp_op=1,arp_spa=%s,actions=output:NORMAL", + garpCookie, priority, inPort, ip) +} + func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, error) { // CAUTION: when adding new flows where the in_port is ofPortPatch and the out_port is ofPortPhys, ensure // that dl_src is included in match criteria! diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index 6482831ec0..3024709c85 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -61,6 +61,12 @@ func (c *openflowManager) setDefaultBridgeMAC(macAddr net.HardwareAddr) { c.defaultBridge.macAddress = macAddr } +// setDefaultBridgeGARPDrop is used to enable or disable whether openflow manager generates ovs flows and adds them to +// the default ext bridge to drop GARP +func (c *openflowManager) setDefaultBridgeGARPDrop(isDropped bool) { + c.defaultBridge.setDropGARP(isDropped) +} + func (c *openflowManager) updateFlowCacheEntry(key string, flows []string) { c.flowMutex.Lock() defer c.flowMutex.Unlock() @@ -181,6 +187,9 @@ func (c *openflowManager) Run(stopChan <-chan struct{}, doneWg *sync.WaitGroup) c.syncFlows() timer.Reset(syncPeriod) case <-stopChan: + // sync before shutting down because flows maybe added, and theres a race between flow channel (req sync) + // and stop chan on shutdown. ensure flows are sync before shut down + c.syncFlows() return } }