Skip to content

Commit 4699b44

Browse files
committed
Merge remote-tracking branch 'ovn-org/master' into d/s-merge-06-04-2025
2 parents 80ebd7a + 2017ede commit 4699b44

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1125
-1196
lines changed

go-controller/hybrid-overlay/pkg/controller/ho_node_windows.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func newNodeController(kube kube.Interface,
5757
"UDP port. Please make sure you install all the KB updates on your system.")
5858
}
5959

60-
node, err := kube.GetNode(nodeName)
60+
node, err := kube.GetNodeForWindows(nodeName)
6161
if err != nil {
6262
return nil, err
6363
}
@@ -345,7 +345,7 @@ func (n *NodeController) initSelf(node *corev1.Node, nodeSubnet *net.IPNet) erro
345345
}
346346

347347
// Add existing nodes
348-
nodes, err := n.kube.GetNodes()
348+
nodes, err := n.kube.GetNodesForWindows()
349349
if err != nil {
350350
return fmt.Errorf("error in initializing/fetching nodes: %v", err)
351351
}
@@ -370,7 +370,7 @@ func (n *NodeController) uninitSelf(node *corev1.Node) error {
370370
networkName, n.networkID, node.Name)
371371

372372
// Remove existing nodes
373-
nodes, err := n.kube.GetNodes()
373+
nodes, err := n.kube.GetNodesForWindows()
374374
if err != nil {
375375
return fmt.Errorf("failed to get nodes: %v", err)
376376
}

go-controller/hybrid-overlay/pkg/controller/ovn_node_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ func (n *NodeController) AddNode(node *corev1.Node) error {
261261
} else {
262262
// Make sure the local node has been initialized before adding a hybridOverlay remote node
263263
if atomic.LoadUint32(n.initState) < hotypes.DistributedRouterInitialized {
264-
localNode, err := n.kube.GetNode(n.nodeName)
264+
localNode, err := n.nodeLister.Get(n.nodeName)
265265
if err != nil {
266266
return fmt.Errorf("cannot get local node: %s: %w", n.nodeName, err)
267267
}

go-controller/pkg/clustermanager/network_cluster_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ func (ncc *networkClusterController) Reconcile(netInfo util.NetInfo) error {
475475
klog.Errorf("Failed to reconcile network %s: %v", ncc.GetNetworkName(), err)
476476
}
477477
if reconcilePendingPods && ncc.retryPods != nil {
478-
if err := objretry.RequeuePendingPods(ncc.kube, ncc.GetNetInfo(), ncc.retryPods); err != nil {
478+
if err := objretry.RequeuePendingPods(ncc.watchFactory, ncc.GetNetInfo(), ncc.retryPods); err != nil {
479479
klog.Errorf("Failed to requeue pending pods for network %s: %v", ncc.GetNetworkName(), err)
480480
}
481481
}
@@ -576,7 +576,7 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int
576576
// 1. we missed an add event (bug in kapi informer code)
577577
// 2. a user removed the annotation on the node
578578
// Either way to play it safe for now do a partial json unmarshal check
579-
if !nodeFailed && util.NoHostSubnet(oldNode) != util.NoHostSubnet(newNode) && !h.ncc.nodeAllocator.NeedsNodeAllocation(newNode) {
579+
if !nodeFailed && util.NoHostSubnet(oldNode) == util.NoHostSubnet(newNode) && !h.ncc.nodeAllocator.NeedsNodeAllocation(newNode) {
580580
// no other node updates would require us to reconcile again
581581
return nil
582582
}

go-controller/pkg/clustermanager/node/node_allocator.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -195,27 +195,24 @@ func (na *NodeAllocator) NeedsNodeAllocation(node *corev1.Node) bool {
195195
}
196196

197197
// ovn node check
198-
// allocation is all or nothing, so if one field was allocated from:
199-
// nodeSubnets, joinSubnet, layer 2 tunnel id, then all of them were
200198
if na.hasNodeSubnetAllocation() {
201-
if util.HasNodeHostSubnetAnnotation(node, na.netInfo.GetNetworkName()) {
202-
return false
199+
if !util.HasNodeHostSubnetAnnotation(node, na.netInfo.GetNetworkName()) {
200+
return true
203201
}
204202
}
205-
206203
if na.hasJoinSubnetAllocation() {
207-
if util.HasNodeGatewayRouterJoinNetwork(node, na.netInfo.GetNetworkName()) {
208-
return false
204+
if !util.HasNodeGatewayRouterJoinNetwork(node, na.netInfo.GetNetworkName()) {
205+
return true
209206
}
210207
}
211208

212209
if util.IsNetworkSegmentationSupportEnabled() && na.netInfo.IsPrimaryNetwork() && util.DoesNetworkRequireTunnelIDs(na.netInfo) {
213-
if util.HasUDNLayer2NodeGRLRPTunnelID(node, na.netInfo.GetNetworkName()) {
214-
return false
210+
if !util.HasUDNLayer2NodeGRLRPTunnelID(node, na.netInfo.GetNetworkName()) {
211+
return true
215212
}
216213
}
217214

218-
return true
215+
return false
219216

220217
}
221218

go-controller/pkg/config/config.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ const DefaultVXLANPort = 4789
3838

3939
const DefaultDBTxnTimeout = time.Second * 100
4040

41+
// DefaultEphemeralPortRange is used for unit testing only
42+
const DefaultEphemeralPortRange = "32768-60999"
43+
4144
// The following are global config parameters that other modules may access directly
4245
var (
4346
// Build information. Populated at build-time.
@@ -494,6 +497,10 @@ type GatewayConfig struct {
494497
DisableForwarding bool `gcfg:"disable-forwarding"`
495498
// AllowNoUplink (disabled by default) controls if the external gateway bridge without an uplink port is allowed in local gateway mode.
496499
AllowNoUplink bool `gcfg:"allow-no-uplink"`
500+
// EphemeralPortRange is the range of ports used by egress SNAT operations in OVN. Specifically for NAT where
501+
// the source IP of the NAT will be a shared Node IP address. If unset, the value will be determined by sysctl lookup
502+
// for the kernel's ephemeral range: net.ipv4.ip_local_port_range. Format is "<min port>-<max port>".
503+
EphemeralPortRange string `gfcg:"ephemeral-port-range"`
497504
}
498505

499506
// OvnAuthConfig holds client authentication and location details for
@@ -664,6 +671,9 @@ func PrepareTestConfig() error {
664671
Kubernetes.DisableRequestedChassis = false
665672
EnableMulticast = false
666673
Default.OVSDBTxnTimeout = 5 * time.Second
674+
if Gateway.Mode != GatewayModeDisabled {
675+
Gateway.EphemeralPortRange = DefaultEphemeralPortRange
676+
}
667677

668678
if err := completeConfig(); err != nil {
669679
return err
@@ -1509,6 +1519,14 @@ var OVNGatewayFlags = []cli.Flag{
15091519
Usage: "Allow the external gateway bridge without an uplink port in local gateway mode",
15101520
Destination: &cliConfig.Gateway.AllowNoUplink,
15111521
},
1522+
&cli.StringFlag{
1523+
Name: "ephemeral-port-range",
1524+
Usage: "The port range in '<min port>-<max port>' format for OVN to use when SNAT'ing to a node IP. " +
1525+
"This range should not collide with the node port range being used in Kubernetes. If not provided, " +
1526+
"the default value will be derived from checking the sysctl value of net.ipv4.ip_local_port_range on the node.",
1527+
Destination: &cliConfig.Gateway.EphemeralPortRange,
1528+
Value: Gateway.EphemeralPortRange,
1529+
},
15121530
// Deprecated CLI options
15131531
&cli.BoolFlag{
15141532
Name: "init-gateways",
@@ -1917,6 +1935,19 @@ func buildGatewayConfig(ctx *cli.Context, cli, file *config) error {
19171935
if !found {
19181936
return fmt.Errorf("invalid gateway mode %q: expect one of %s", string(Gateway.Mode), strings.Join(validModes, ","))
19191937
}
1938+
1939+
if len(Gateway.EphemeralPortRange) > 0 {
1940+
if !isValidEphemeralPortRange(Gateway.EphemeralPortRange) {
1941+
return fmt.Errorf("invalid ephemeral-port-range, should be in the format <min port>-<max port>")
1942+
}
1943+
} else {
1944+
// auto-detect ephermal range
1945+
portRange, err := getKernelEphemeralPortRange()
1946+
if err != nil {
1947+
return fmt.Errorf("unable to auto-detect ephemeral port range to use with OVN")
1948+
}
1949+
Gateway.EphemeralPortRange = portRange
1950+
}
19201951
}
19211952

19221953
// Options are only valid if Mode is not disabled
@@ -1927,6 +1958,9 @@ func buildGatewayConfig(ctx *cli.Context, cli, file *config) error {
19271958
if Gateway.NextHop != "" {
19281959
return fmt.Errorf("gateway next-hop option %q not allowed when gateway is disabled", Gateway.NextHop)
19291960
}
1961+
if len(Gateway.EphemeralPortRange) > 0 {
1962+
return fmt.Errorf("gateway ephemeral port range option not allowed when gateway is disabled")
1963+
}
19301964
}
19311965

19321966
if Gateway.Mode != GatewayModeShared && Gateway.VLANID != 0 {

go-controller/pkg/config/utils.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ package config
33
import (
44
"fmt"
55
"net"
6+
"os"
67
"reflect"
8+
"regexp"
79
"strconv"
810
"strings"
911

@@ -328,3 +330,49 @@ func AllocateV6MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIP
328330
}
329331
return nil
330332
}
333+
334+
func isValidEphemeralPortRange(s string) bool {
335+
// Regex to match "<number>-<number>" with no extra characters
336+
re := regexp.MustCompile(`^(\d{1,5})-(\d{1,5})$`)
337+
matches := re.FindStringSubmatch(s)
338+
if matches == nil {
339+
return false
340+
}
341+
342+
minPort, err1 := strconv.Atoi(matches[1])
343+
maxPort, err2 := strconv.Atoi(matches[2])
344+
if err1 != nil || err2 != nil {
345+
return false
346+
}
347+
348+
// Port numbers must be in the 1-65535 range
349+
if minPort < 1 || minPort > 65535 || maxPort < 0 || maxPort > 65535 {
350+
return false
351+
}
352+
353+
return maxPort > minPort
354+
}
355+
356+
func getKernelEphemeralPortRange() (string, error) {
357+
data, err := os.ReadFile("/proc/sys/net/ipv4/ip_local_port_range")
358+
if err != nil {
359+
return "", fmt.Errorf("failed to read port range: %w", err)
360+
}
361+
362+
parts := strings.Fields(string(data))
363+
if len(parts) != 2 {
364+
return "", fmt.Errorf("unexpected format: %q", string(data))
365+
}
366+
367+
minPort, err := strconv.Atoi(parts[0])
368+
if err != nil {
369+
return "", fmt.Errorf("invalid min port: %w", err)
370+
}
371+
372+
maxPort, err := strconv.Atoi(parts[1])
373+
if err != nil {
374+
return "", fmt.Errorf("invalid max port: %w", err)
375+
}
376+
377+
return fmt.Sprintf("%d-%d", minPort, maxPort), nil
378+
}

go-controller/pkg/controllermanager/controller_manager.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
2424
libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util"
2525
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics"
26+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics/recorders"
2627
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
2728
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager"
2829
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/observability"
@@ -438,7 +439,7 @@ func (cm *ControllerManager) Start(ctx context.Context) error {
438439
// with k=10,
439440
// for a cluster with 10 nodes, measurement of 1 in every 100 requests
440441
// for a cluster with 100 nodes, measurement of 1 in every 1000 requests
441-
metrics.GetConfigDurationRecorder().Run(cm.nbClient, cm.kube, 10, time.Second*5, cm.stopChan)
442+
recorders.GetConfigDurationRecorder().Run(cm.nbClient, cm.watchFactory, 10, time.Second*5, cm.stopChan)
442443
}
443444
cm.podRecorder.Run(cm.sbClient, cm.stopChan)
444445

go-controller/pkg/controllermanager/node_controller_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ var _ = Describe("Healthcheck tests", func() {
228228
},
229229
}
230230
nodeList := []*corev1.Node{node}
231-
factoryMock.On("GetNode", nodeName).Return(nodeList[0], nil)
231+
factoryMock.On("GetNodeForWindows", nodeName).Return(nodeList[0], nil)
232232
factoryMock.On("GetNodes").Return(nodeList, nil)
233233
factoryMock.On("UserDefinedNetworkInformer").Return(nil)
234234
factoryMock.On("ClusterUserDefinedNetworkInformer").Return(nil)

go-controller/pkg/kube/annotator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ var _ = Describe("Annotator", func() {
7979
err := nodeAnnot.Run()
8080
Expect(err).ToNot(HaveOccurred())
8181

82-
node, err := kube.GetNode(nodeName)
82+
node, err := kube.GetNodeForWindows(nodeName)
8383
Expect(err).ToNot(HaveOccurred())
8484

8585
// should contain initial annotations

go-controller/pkg/kube/kube.go

Lines changed: 13 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212

1313
corev1 "k8s.io/api/core/v1"
1414
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15-
"k8s.io/apimachinery/pkg/labels"
1615
"k8s.io/apimachinery/pkg/runtime"
1716
"k8s.io/apimachinery/pkg/types"
1817
"k8s.io/apimachinery/pkg/util/strategicpatch"
@@ -62,12 +61,11 @@ type Interface interface {
6261
PatchNode(old, new *corev1.Node) error
6362
UpdateNodeStatus(node *corev1.Node) error
6463
UpdatePodStatus(pod *corev1.Pod) error
65-
GetAnnotationsOnPod(namespace, name string) (map[string]string, error)
66-
GetNodes() ([]*corev1.Node, error)
67-
GetNamespaces(labelSelector metav1.LabelSelector) ([]*corev1.Namespace, error)
68-
GetPods(namespace string, opts metav1.ListOptions) ([]*corev1.Pod, error)
69-
GetPod(namespace, name string) (*corev1.Pod, error)
70-
GetNode(name string) (*corev1.Node, error)
64+
// GetPodsForDBChecker should only be used by legacy DB checker. Use watchFactory instead to get pods.
65+
GetPodsForDBChecker(namespace string, opts metav1.ListOptions) ([]*corev1.Pod, error)
66+
// GetNodeForWindows should only be used for windows hybrid overlay binary and never in linux code
67+
GetNodeForWindows(name string) (*corev1.Node, error)
68+
GetNodesForWindows() ([]*corev1.Node, error)
7169
Events() kv1core.EventInterface
7270
}
7371

@@ -201,7 +199,7 @@ func (k *Kube) SetAnnotationsOnService(namespace, name string, annotations map[s
201199

202200
// SetTaintOnNode tries to add a new taint to the node. If the taint already exists, it doesn't do anything.
203201
func (k *Kube) SetTaintOnNode(nodeName string, taint *corev1.Taint) error {
204-
node, err := k.GetNode(nodeName)
202+
node, err := k.GetNodeForWindows(nodeName)
205203
if err != nil {
206204
klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err)
207205
return err
@@ -234,7 +232,7 @@ func (k *Kube) SetTaintOnNode(nodeName string, taint *corev1.Taint) error {
234232
// RemoveTaintFromNode removes all the taints that have the same key and effect from the node.
235233
// If the taint doesn't exist, it doesn't do anything.
236234
func (k *Kube) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error {
237-
node, err := k.GetNode(nodeName)
235+
node, err := k.GetNodeForWindows(nodeName)
238236
if err != nil {
239237
klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err)
240238
return err
@@ -324,32 +322,8 @@ func (k *Kube) UpdatePodStatus(pod *corev1.Pod) error {
324322
return err
325323
}
326324

327-
// GetAnnotationsOnPod obtains the pod annotations from kubernetes apiserver, given the name and namespace
328-
func (k *Kube) GetAnnotationsOnPod(namespace, name string) (map[string]string, error) {
329-
pod, err := k.KClient.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{})
330-
if err != nil {
331-
return nil, err
332-
}
333-
return pod.ObjectMeta.Annotations, nil
334-
}
335-
336-
// GetNamespaces returns the list of all Namespace objects matching the labelSelector
337-
func (k *Kube) GetNamespaces(labelSelector metav1.LabelSelector) ([]*corev1.Namespace, error) {
338-
list := []*corev1.Namespace{}
339-
err := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
340-
return k.KClient.CoreV1().Namespaces().List(ctx, opts)
341-
}).EachListItem(context.TODO(), metav1.ListOptions{
342-
LabelSelector: labels.Set(labelSelector.MatchLabels).String(),
343-
ResourceVersion: "0",
344-
}, func(obj runtime.Object) error {
345-
list = append(list, obj.(*corev1.Namespace))
346-
return nil
347-
})
348-
return list, err
349-
}
350-
351-
// GetPods returns the list of all Pod objects in a namespace matching the options
352-
func (k *Kube) GetPods(namespace string, opts metav1.ListOptions) ([]*corev1.Pod, error) {
325+
// GetPodsForDBChecker returns the list of all Pod objects in a namespace matching the options. Only used by the legacy db checker.
326+
func (k *Kube) GetPodsForDBChecker(namespace string, opts metav1.ListOptions) ([]*corev1.Pod, error) {
353327
list := []*corev1.Pod{}
354328
opts.ResourceVersion = "0"
355329
err := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
@@ -361,13 +335,8 @@ func (k *Kube) GetPods(namespace string, opts metav1.ListOptions) ([]*corev1.Pod
361335
return list, err
362336
}
363337

364-
// GetPod obtains the pod from kubernetes apiserver, given the name and namespace
365-
func (k *Kube) GetPod(namespace, name string) (*corev1.Pod, error) {
366-
return k.KClient.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{})
367-
}
368-
369-
// GetNodes returns the list of all Node objects from kubernetes
370-
func (k *Kube) GetNodes() ([]*corev1.Node, error) {
338+
// GetNodesForWindows returns the list of all Node objects from kubernetes. Only used by windows binary.
339+
func (k *Kube) GetNodesForWindows() ([]*corev1.Node, error) {
371340
list := []*corev1.Node{}
372341
err := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
373342
return k.KClient.CoreV1().Nodes().List(ctx, opts)
@@ -380,8 +349,8 @@ func (k *Kube) GetNodes() ([]*corev1.Node, error) {
380349
return list, err
381350
}
382351

383-
// GetNode returns the Node resource from kubernetes apiserver, given its name
384-
func (k *Kube) GetNode(name string) (*corev1.Node, error) {
352+
// GetNodeForWindows returns the Node resource from kubernetes apiserver, given its name. Only used by windows binary.
353+
func (k *Kube) GetNodeForWindows(name string) (*corev1.Node, error) {
385354
return k.KClient.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
386355
}
387356

0 commit comments

Comments
 (0)