Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 64 additions & 1 deletion dist/images/ovnkube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,19 @@ wait_for_event() {
done
}

# wait_ovnkube_controller_with_node_done - Wait for ovnkube-controller-with-node process to complete
# Checks if the ovnkube-controller-with-node process is running by looking for its PID file.
# If the PID file exists, waits for that process to finish before continuing.
# If the PID file doesnt exist, it means the process has already exited.
wait_ovnkube_controller_with_node_done() {
local pid_file=${OVN_RUNDIR}/ovnkube-controller-with-node.pid
if [[ -f ${pid_file} ]]; then
echo "info: waiting on ovnkube-controller-with-node process to end"
wait $(cat $pid_file)
echo "info: done waiting for ovn-controller-with-node to end"
fi
}

# The ovnkube-db kubernetes service must be populated with OVN DB service endpoints
# before various OVN K8s containers can come up. This functions checks for that.
# If OVN dbs are configured to listen only on unix sockets, then there will not be
Expand Down Expand Up @@ -465,6 +478,36 @@ ovs_ready() {
return 0
}

# get_bridge_name_for_physnet - Extract OVS bridge name for a given OVN physical network
# Takes an OVN network name for physical networks (physnet) and returns the corresponding
# OVS bridge name from the ovn-bridge-mappings configuration.
# Return empty string if not found.
get_bridge_name_for_physnet() {
local physnet="$1"
local mappings
mappings=$(ovs-vsctl --if-exists get open_vswitch . external_ids:ovn-bridge-mappings)
# Extract bridge name after physnet: and before next comma (or end)
# regex matches zero or more non-comma characters
# cut on colon and return field number 2
echo "$mappings" | tr -d "\"" | grep -o "$physnet:[^,]*" | cut -d: -f2
}

# Adds drop flows for GARPs on patch port to br-int for specified bridge.
add_garp_drop_flow() {
local bridge="$1"
local cookie="0x0305"
local priority="498"
# if bridge exists, and the patch port is created, we expect to add at least one flow to a patch port ending in to-br-int.
# FIXME: can we generate the exact name. Its possible we add these flows to the incorrect port when selecting on substring
for port_name in $(ovs-vsctl list-ports $bridge); do
if [[ "$port_name" == *to-br-int ]]; then
local of_port=$(ovs-vsctl get interface $port_name ofport)
ovs-ofctl add-flow $bridge "cookie=$cookie,table=0,priority=$priority,in_port=$of_port,arp,arp_op=1,actions=drop" > /dev/null
break
fi
done
}

# Verify that the process is running either by checking for the PID in `ps` output
# or by using `ovs-appctl` utility for the processes that support it.
# $1 is the name of the process
Expand Down Expand Up @@ -1613,7 +1656,10 @@ ovnkube-controller() {
}

ovnkube-controller-with-node() {
trap 'kill $(jobs -p) ; rm -f /etc/cni/net.d/10-ovn-kubernetes.conf ; exit 0' TERM
# send sig term to background job (ovnkube-node process), remove CNI conf and resume background job until it ends.
# currently we the process to background, therefore wait until that process removes its pid file on exit.
# if the pid file doesnt exist, we exit immediately.
trap 'kill $(jobs -p) ; rm -f /etc/cni/net.d/10-ovn-kubernetes.conf ; wait_ovnkube_controller_with_node_done; exit 0' TERM
check_ovn_daemonset_version "1.0.0"
rm -f ${OVN_RUNDIR}/ovnkube-controller-with-node.pid

Expand All @@ -1638,6 +1684,23 @@ ovnkube-controller-with-node() {
wait_for_event process_ready ovn-controller
fi

# start temp work around
# remove when https://issues.redhat.com/browse/FDP-1537 is avilable
if [[ ${ovnkube_node_mode} == "full" && ${ovn_enable_interconnect} == "true" && ${ovn_egressip_enable} == "true" ]]; then
echo "=============== ovnkube-controller-with-node - (add GARP drop flows if external bridge exists)"
# bridge may not yet exist
local bridge_name="$(get_bridge_name_for_physnet 'physnet')"
if [[ "$bridge_name" != "" ]]; then
echo "=============== ovnkube-controller-with-node - found bridge mapping for physnet: $bridge_name"
# nothing to do if the external bridge isn't created.
if ovs-vsctl br-exists $bridge_name; then
echo "=============== ovnkube-controller-with-node - found bridge $bridge_name"
add_garp_drop_flow "$bridge_name"
echo "=============== ovnkube-controller-with-node - (finished adding GARP drop flows)"
fi
fi
fi

ovn_routable_mtu_flag=
if [[ -n "${routable_mtu}" ]]; then
routable_mtu_flag="--routable-mtu ${routable_mtu}"
Expand Down
25 changes: 23 additions & 2 deletions go-controller/cmd/ovnkube/ovnkube.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os/signal"
"strings"
"sync"
"sync/atomic"
"syscall"
"text/tabwriter"
"text/template"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb"
libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics"
controllerManager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/network-controller-manager"
ovnnode "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node"
Expand Down Expand Up @@ -479,6 +481,14 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
clusterManager.Stop()
}()
}
// when ovnkube is running in ovnkube-controller and ovnkube node mode in the same process, bool is used to inform ovnkube-node that ovnkube-controller
// has sync'd once and changes have propagated to SB DB. ovnkube-node will then remove flows for dropping GARPs.
// Remove when OVN supports native silencing of GARPs on startup: https://issues.redhat.com/browse/FDP-1537
// isOVNKubeControllerSyncd is true when ovnkube controller has sync and changes are in OVN Southbound database.
var isOVNKubeControllerSyncd *atomic.Bool
if runMode.ovnkubeController && runMode.node && config.OVNKubernetesFeature.EnableEgressIP && config.OVNKubernetesFeature.EnableInterconnect && config.OvnKubeNode.Mode == types.NodeModeFull {
isOVNKubeControllerSyncd = &atomic.Bool{}
}

if runMode.ovnkubeController {
wg.Add(1)
Expand Down Expand Up @@ -519,6 +529,17 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
// record delay until ready
metrics.MetricOVNKubeControllerReadyDuration.Set(time.Since(startTime).Seconds())

if isOVNKubeControllerSyncd != nil {
klog.Infof("Waiting for OVN northbound database changes to be processed by ovn-controller")
if err = libovsdbutil.WaitUntilFlowsInstalled(ctx, libovsdbOvnNBClient); err != nil {
controllerErr = fmt.Errorf("failed waiting for OVN northbound database changes to be processed by ovn-controller: %v", err)
return
} else {
klog.Infof("Finished waiting for OVN northbound database changes to be processed by ovn-controller")
isOVNKubeControllerSyncd.Store(true)
}
}

<-ctx.Done()
networkControllerManager.Stop()
}()
Expand Down Expand Up @@ -550,7 +571,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
return
}

err = nodeNetworkControllerManager.Start(ctx)
err = nodeNetworkControllerManager.Start(ctx, isOVNKubeControllerSyncd)
if err != nil {
nodeErr = fmt.Errorf("failed to start node network controller: %w", err)
return
Expand All @@ -560,7 +581,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
metrics.MetricNodeReadyDuration.Set(time.Since(startTime).Seconds())

<-ctx.Done()
nodeNetworkControllerManager.Stop()
nodeNetworkControllerManager.Stop(isOVNKubeControllerSyncd)
}()
}

Expand Down
72 changes: 72 additions & 0 deletions go-controller/pkg/libovsdb/util/northd_sync.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package util

import (
"context"
"errors"
"fmt"
"time"

"k8s.io/apimachinery/pkg/util/wait"

"github.com/ovn-org/libovsdb/client"
"github.com/ovn-org/libovsdb/model"
"github.com/ovn-org/libovsdb/ovsdb"

libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
)

// WaitUntilFlowsInstalled ensures that ovn-controller has sync'd at least once by incrementing nb_cfg value in NB DB
// and waiting for northd to write back a value equal or greater to the hv_cfg field in NB_Global.
// See https://www.ovn.org/support/dist-docs/ovn-nb.5.html for more info regarding nb_cfg / hv_cfg fields.
// The expectation is that the data you wish to be sync'd is already written to NB DB.
// Note: if the ovn-controller is down, this will block until it comes back up, therefore this func should only
// be used with IC mode and one node per zone.
func WaitUntilFlowsInstalled(ctx context.Context, nbClient client.Client) error {
// 1. Get value of nb_cfg
// 2. Increment value of nb_cfg
// 3. Wait until value appears in hv_cfg field thus ensuring ovn-controller has processed the changes
nbGlobal := &nbdb.NBGlobal{}
nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal)
if err != nil {
return fmt.Errorf("failed to find OVN Northbound NB_Global table"+
" entry: %w", err)
}
// increment nb_cfg value by 1. When northd consumes updates from NB DB, it will copy this value to SB DBs SB_Global
// table nb_cfg field.
ops, err := nbClient.Where(nbGlobal).Mutate(nbGlobal, model.Mutation{
Field: &nbGlobal.NbCfg,
Mutator: ovsdb.MutateOperationAdd,
Value: 1,
})
if err != nil {
return fmt.Errorf("failed to generate ops to mutate nb_cfg: %w", err)
}
if _, err = libovsdbops.TransactAndCheck(nbClient, ops); err != nil {
return fmt.Errorf("failed to transact to increment nb_cfg: %w", err)
}
expectedHvCfgValue := nbGlobal.NbCfg + 1
if expectedHvCfgValue < 0 { // handle overflow
expectedHvCfgValue = 0
}
nbGlobal = &nbdb.NBGlobal{}
// ovn-northd sets hv_cfg to the lowest int value found for all chassis in the system (IC mode,
// we support a single chassis per zone) as reported in the Chassis_Private table in the southbound database.
// Thus, hv_cfg equals nb_cfg for the single chassis once it is caught up with NB DB we want.
// poll until we see the expected value in NB DB every 5 milliseconds until context is cancelled.
err = wait.PollUntilContextCancel(ctx, time.Millisecond*5, true, func(_ context.Context) (done bool, err2 error) {
if nbGlobal, err2 = libovsdbops.GetNBGlobal(nbClient, nbGlobal); err2 != nil {
// northd hasn't added an entry yet
if errors.Is(err2, client.ErrNotFound) {
return false, nil
}
return false, fmt.Errorf("failed to get nb_global table entry from NB DB: %w", err2)
}
return nbGlobal.HvCfg >= expectedHvCfgValue, nil // we only need to ensure it is greater than or equal to the expected value
})
if err != nil {
return fmt.Errorf("failed while waiting for hv_cfg value greater than or equal %d in NB DB nb_global table: %w",
expectedHvCfgValue, err)
}
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni"
Expand Down Expand Up @@ -153,7 +154,7 @@ func (ncm *nodeNetworkControllerManager) initDefaultNodeNetworkController() erro
}

// Start the node network controller manager
func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error) {
func (ncm *nodeNetworkControllerManager) Start(ctx context.Context, isOVNKubeControllerSyncd *atomic.Bool) (err error) {
klog.Infof("Starting the node network controller manager, Mode: %s", config.OvnKubeNode.Mode)

// Initialize OVS exec runner; find OVS binaries that the CNI code uses.
Expand All @@ -171,7 +172,7 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error)
// make sure we clean up after ourselves on failure
defer func() {
if err != nil {
ncm.Stop()
ncm.Stop(isOVNKubeControllerSyncd)
}
}()

Expand Down Expand Up @@ -228,15 +229,46 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error)
return fmt.Errorf("failed to own priority %d for IP rules: %v", node.UDNMasqueradeIPRulePriority, err)
}
}

// start workaround and remove when ovn has native support for silencing GARPs for LRPs
// https://issues.redhat.com/browse/FDP-1537
// when in mode ovnkube controller with node, wait until ovnkube controller is syncd before removing drop flows for GARPs
waitForControllerSyncLoop:
for {
select {
case <-ctx.Done():
return nil
default:
if isOVNKubeControllerSyncd != nil && !isOVNKubeControllerSyncd.Load() {
klog.V(5).Infof("Waiting for ovnkube controller to start before removing GARP drop flows")
time.Sleep(200 * time.Millisecond)
continue
}
klog.Infof("Removing flows to drop GARP")
ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.SetDefaultBridgeGARPDropFlows(false)
if err := ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.Reconcile(); err != nil {
return fmt.Errorf("failed to reconcile gateway after removing GARP drop flows for ext bridge: %v", err)
}
break waitForControllerSyncLoop
}
}
// end workaround

return nil
}

// Stop gracefully stops all managed controllers
func (ncm *nodeNetworkControllerManager) Stop() {
func (ncm *nodeNetworkControllerManager) Stop(isOVNKubeControllerSyncd *atomic.Bool) {
// stop stale ovs ports cleanup
close(ncm.stopChan)

if ncm.defaultNodeNetworkController != nil {
if isOVNKubeControllerSyncd != nil && ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway != nil {
ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.SetDefaultBridgeGARPDropFlows(true)
if err := ncm.defaultNodeNetworkController.(*node.DefaultNodeNetworkController).Gateway.Reconcile(); err != nil {
klog.Errorf("Failed to reconcile gateway after attempting to add flows to the external bridge to drop GARPs: %v", err)
}
}
ncm.defaultNodeNetworkController.Stop()
}

Expand Down
Loading