diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index 343a6d40cf..7cd59aef61 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -52,8 +52,8 @@ set_default_params() { export SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} export JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} export JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} - export TRANSIT_SWITCH_SUBNET_IPV4=${TRANSIT_SWITCH_SUBNET_IPV4:-100.88.0.0/16} - export TRANSIT_SWITCH_SUBNET_IPV6=${TRANSIT_SWITCH_SUBNET_IPV6:-fd97::/64} + export TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} + export TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} export METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} export METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} diff --git a/contrib/kind.sh b/contrib/kind.sh index cff76b68ef..34faaea1aa 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -593,8 +593,8 @@ set_default_params() { JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} - TRANSIT_SWITCH_SUBNET_IPV4=${TRANSIT_SWITCH_SUBNET_IPV4:-100.88.0.0/16} - TRANSIT_SWITCH_SUBNET_IPV6=${TRANSIT_SWITCH_SUBNET_IPV6:-fd97::/64} + TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} + TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} BGP_SERVER_NET_SUBNET_IPV4=${BGP_SERVER_NET_SUBNET_IPV4:-172.26.0.0/16} @@ -631,7 +631,7 @@ set_default_params() { OVN_HOST_NETWORK_NAMESPACE=${OVN_HOST_NETWORK_NAMESPACE:-ovn-host-network} OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} - OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-zone-controller ovnkube-control-plane ovnkube-master ovnkube-node"} + OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-identity ovnkube-zone-controller ovnkube-control-plane ovnkube-master ovnkube-node"} OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} OVN_GATEWAY_OPTS=${OVN_GATEWAY_OPTS:-""} @@ -914,8 +914,8 @@ create_ovn_kube_manifests() { --v6-join-subnet="${JOIN_SUBNET_IPV6}" \ --v4-masquerade-subnet="${MASQUERADE_SUBNET_IPV4}" \ --v6-masquerade-subnet="${MASQUERADE_SUBNET_IPV6}" \ - --v4-transit-switch-subnet="${TRANSIT_SWITCH_SUBNET_IPV4}" \ - --v6-transit-switch-subnet="${TRANSIT_SWITCH_SUBNET_IPV6}" \ + --v4-transit-subnet="${TRANSIT_SUBNET_IPV4}" \ + --v6-transit-subnet="${TRANSIT_SUBNET_IPV6}" \ --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \ --multi-network-enable="${ENABLE_MULTI_NET}" \ --network-segmentation-enable="${ENABLE_NETWORK_SEGMENTATION}" \ diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index f45f473a66..a2072b269e 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -79,8 +79,8 @@ OVN_V4_JOIN_SUBNET="" OVN_V6_JOIN_SUBNET="" OVN_V4_MASQUERADE_SUBNET="" OVN_V6_MASQUERADE_SUBNET="" -OVN_V4_TRANSIT_SWITCH_SUBNET="" -OVN_V6_TRANSIT_SWITCH_SUBNET="" +OVN_V4_TRANSIT_SUBNET="" +OVN_V6_TRANSIT_SUBNET="" OVN_NETFLOW_TARGETS="" OVN_SFLOW_TARGETS="" OVN_IPFIX_TARGETS="" @@ -302,11 +302,11 @@ while [ "$1" != "" ]; do --v6-masquerade-subnet) OVN_V6_MASQUERADE_SUBNET=$VALUE ;; - --v4-transit-switch-subnet) - OVN_V4_TRANSIT_SWITCH_SUBNET=$VALUE + --v4-transit-subnet) + OVN_V4_TRANSIT_SUBNET=$VALUE ;; - --v6-transit-switch-subnet) - OVN_V6_TRANSIT_SWITCH_SUBNET=$VALUE + --v6-transit-subnet) + OVN_V6_TRANSIT_SUBNET=$VALUE ;; --netflow-targets) OVN_NETFLOW_TARGETS=$VALUE @@ -536,10 +536,10 @@ ovn_v4_masquerade_subnet=${OVN_V4_MASQUERADE_SUBNET} echo "ovn_v4_masquerade_subnet: ${ovn_v4_masquerade_subnet}" ovn_v6_masquerade_subnet=${OVN_V6_MASQUERADE_SUBNET} echo "ovn_v6_masquerade_subnet: ${ovn_v6_masquerade_subnet}" -ovn_v4_transit_switch_subnet=${OVN_V4_TRANSIT_SWITCH_SUBNET} -echo "ovn_v4_transit_switch_subnet: ${ovn_v4_transit_switch_subnet}" -ovn_v6_transit_switch_subnet=${OVN_V6_TRANSIT_SWITCH_SUBNET} -echo "ovn_v6_transit_switch_subnet: ${ovn_v6_transit_switch_subnet}" +ovn_v4_transit_subnet=${OVN_V4_TRANSIT_SUBNET} +echo "ovn_v4_transit_subnet: ${ovn_v4_transit_subnet}" +ovn_v6_transit_subnet=${OVN_V6_TRANSIT_SUBNET} +echo "ovn_v6_transit_subnet: ${ovn_v6_transit_subnet}" ovn_netflow_targets=${OVN_NETFLOW_TARGETS} echo "ovn_netflow_targets: ${ovn_netflow_targets}" ovn_sflow_targets=${OVN_SFLOW_TARGETS} @@ -842,8 +842,8 @@ ovn_image=${ovnkube_image} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_network_qos_enable=${ovn_network_qos_enable} \ - ovn_v4_transit_switch_subnet=${ovn_v4_transit_switch_subnet} \ - ovn_v6_transit_switch_subnet=${ovn_v6_transit_switch_subnet} \ + ovn_v4_transit_subnet=${ovn_v4_transit_subnet} \ + ovn_v6_transit_subnet=${ovn_v6_transit_subnet} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ ovn_observ_enable=${ovn_observ_enable} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 1e0661f501..be4dedfc97 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -238,10 +238,10 @@ ovn_v6_join_subnet=${OVN_V6_JOIN_SUBNET:-} ovn_v4_masquerade_subnet=${OVN_V4_MASQUERADE_SUBNET:-} # OVN_V6_MASQUERADE_SUBNET - v6 masquerade subnet ovn_v6_masquerade_subnet=${OVN_V6_MASQUERADE_SUBNET:-} -# OVN_V4_TRANSIT_SWITCH_SUBNET - v4 Transit switch subnet -ovn_v4_transit_switch_subnet=${OVN_V4_TRANSIT_SWITCH_SUBNET:-} -# OVN_V6_TRANSIT_SWITCH_SUBNET - v6 Transit switch subnet -ovn_v6_transit_switch_subnet=${OVN_V6_TRANSIT_SWITCH_SUBNET:-} +# OVN_V4_TRANSIT_SUBNET - v4 Transit subnet +ovn_v4_transit_subnet=${OVN_V4_TRANSIT_SUBNET:-} +# OVN_V6_TRANSIT_SUBNET - v6 Transit subnet +ovn_v6_transit_subnet=${OVN_V6_TRANSIT_SUBNET:-} #OVN_REMOTE_PROBE_INTERVAL - ovn remote probe interval in ms (default 100000) ovn_remote_probe_interval=${OVN_REMOTE_PROBE_INTERVAL:-100000} #OVN_MONITOR_ALL - ovn-controller monitor all data in SB DB @@ -2356,17 +2356,17 @@ ovn-cluster-manager() { fi echo "ovn_v6_masquerade_subnet_opt=${ovn_v6_masquerade_subnet_opt}" - ovn_v4_transit_switch_subnet_opt= - if [[ -n ${ovn_v4_transit_switch_subnet} ]]; then - ovn_v4_transit_switch_subnet_opt="--cluster-manager-v4-transit-switch-subnet=${ovn_v4_transit_switch_subnet}" + ovn_v4_transit_subnet_opt= + if [[ -n ${ovn_v4_transit_subnet} ]]; then + ovn_v4_transit_subnet_opt="--cluster-manager-v4-transit-subnet=${ovn_v4_transit_subnet}" fi - echo "ovn_v4_transit_switch_subnet_opt=${ovn_v4_transit_switch_subnet}" + echo "ovn_v4_transit_subnet_opt=${ovn_v4_transit_subnet}" - ovn_v6_transit_switch_subnet_opt= - if [[ -n ${ovn_v6_transit_switch_subnet} ]]; then - ovn_v6_transit_switch_subnet_opt="--cluster-manager-v6-transit-switch-subnet=${ovn_v6_transit_switch_subnet}" + ovn_v6_transit_subnet_opt= + if [[ -n ${ovn_v6_transit_subnet} ]]; then + ovn_v6_transit_subnet_opt="--cluster-manager-v6-transit-subnet=${ovn_v6_transit_subnet}" fi - echo "ovn_v6_transit_switch_subnet_opt=${ovn_v6_transit_switch_subnet}" + echo "ovn_v6_transit_subnet_opt=${ovn_v6_transit_subnet}" multicast_enabled_flag= if [[ ${ovn_multicast_enable} == "true" ]]; then @@ -2476,8 +2476,8 @@ ovn-cluster-manager() { ${ovn_v4_masquerade_subnet_opt} \ ${ovn_v6_join_subnet_opt} \ ${ovn_v6_masquerade_subnet_opt} \ - ${ovn_v4_transit_switch_subnet_opt} \ - ${ovn_v6_transit_switch_subnet_opt} \ + ${ovn_v4_transit_subnet_opt} \ + ${ovn_v6_transit_subnet_opt} \ ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ --gateway-mode=${ovn_gateway_mode} \ diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index ded7096f86..7d82178384 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -179,10 +179,10 @@ spec: value: "{{ ovn_enable_interconnect }}" - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY value: "{{ ovn_enable_multi_external_gateway }}" - - name: OVN_V4_TRANSIT_SWITCH_SUBNET - value: "{{ ovn_v4_transit_switch_subnet }}" - - name: OVN_V6_TRANSIT_SWITCH_SUBNET - value: "{{ ovn_v6_transit_switch_subnet }}" + - name: OVN_V4_TRANSIT_SUBNET + value: "{{ ovn_v4_transit_subnet }}" + - name: OVN_V6_TRANSIT_SUBNET + value: "{{ ovn_v6_transit_subnet }}" - name: OVN_ENABLE_PERSISTENT_IPS value: "{{ ovn_enable_persistent_ips }}" - name: OVN_NETWORK_QOS_ENABLE diff --git a/docs/installation/ovnkube.1 b/docs/installation/ovnkube.1 index c393e928e0..fc11a1f3cd 100644 --- a/docs/installation/ovnkube.1 +++ b/docs/installation/ovnkube.1 @@ -154,10 +154,10 @@ Show help. \fB\--version\fR, \fB\-v\fR Print the version. .TP -\fB\--cluster-manager-v4-transit-switch-subnet\fR string +\fB\--cluster-manager-v4-transit-subnet\fR string The v4 transit switch subnet to use for assigning transit switch IPv4 addresses\fR. .TP -\fB\--cluster-manager-v6-transit-switch-subnet\fR string +\fB\--cluster-manager-v6-transit-subnet\fR string The v6 transit switch subnet to use for assigning transit switch IPv6 addresses\fR. .SH "SEE ALSO" diff --git a/go-controller/.mockery.yaml b/go-controller/.mockery.yaml index 2ebe5c9937..7acd04759d 100644 --- a/go-controller/.mockery.yaml +++ b/go-controller/.mockery.yaml @@ -13,7 +13,6 @@ packages: github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni: interfaces: CNIPluginLibOps: - NetNS: config: dir: pkg/cni/mocks github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube: @@ -24,9 +23,9 @@ packages: config: all: true dir: pkg/kube/mocks - github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node: + github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport: interfaces: - ManagementPort: + Interface: github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set: config: all: true diff --git a/go-controller/Makefile b/go-controller/Makefile index 27ebf94c8b..008accd36f 100644 --- a/go-controller/Makefile +++ b/go-controller/Makefile @@ -35,7 +35,7 @@ TOOLS_OUTPUT_DIR = ${CURDIR}/${OUT_DIR} MOCKERY = ${TOOLS_OUTPUT_DIR}/mockery-${MOCKERY_VERSION} ## Tool Versions -MOCKERY_VERSION ?= v2.43.2 +MOCKERY_VERSION ?= v2.53.4 export NOROOT diff --git a/go-controller/pkg/allocator/id/allocator.go b/go-controller/pkg/allocator/id/allocator.go index a2a08a3b3f..15a30fb501 100644 --- a/go-controller/pkg/allocator/id/allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -2,9 +2,10 @@ package id import ( "fmt" - "sync" + "slices" bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" ) const ( @@ -28,7 +29,7 @@ type NamedAllocator interface { // idAllocator is used to allocate id for a resource and store the resource - id in a map type idAllocator struct { - nameIdMap sync.Map + nameIdMap *syncmap.SyncMap[int] idBitmap *bitmapallocator.AllocationBitmap } @@ -37,7 +38,7 @@ func NewIDAllocator(name string, maxIds int) Allocator { idBitmap := bitmapallocator.NewRoundRobinAllocationMap(maxIds, name) return &idAllocator{ - nameIdMap: sync.Map{}, + nameIdMap: syncmap.NewSyncMap[int](), idBitmap: idBitmap, } } @@ -45,10 +46,12 @@ func NewIDAllocator(name string, maxIds int) Allocator { // AllocateID allocates an id for the resource 'name' and returns the id. // If the id for the resource is already allocated, it returns the cached id. func (idAllocator *idAllocator) AllocateID(name string) (int, error) { + idAllocator.nameIdMap.LockKey(name) + defer idAllocator.nameIdMap.UnlockKey(name) // Check the idMap and return the id if its already allocated v, ok := idAllocator.nameIdMap.Load(name) if ok { - return v.(int), nil + return v, nil } id, allocated, _ := idAllocator.idBitmap.AllocateNext() @@ -66,13 +69,15 @@ func (idAllocator *idAllocator) AllocateID(name string) (int, error) { // It also returns an error if the resource 'name' has a different 'id' // already reserved. func (idAllocator *idAllocator) ReserveID(name string, id int) error { + idAllocator.nameIdMap.LockKey(name) + defer idAllocator.nameIdMap.UnlockKey(name) v, ok := idAllocator.nameIdMap.Load(name) if ok { - if v.(int) == id { + if v == id { // All good. The id is already reserved by the same resource name. return nil } - return fmt.Errorf("can't reserve id %d for the resource %s. It is already allocated with a different id %d", id, name, v.(int)) + return fmt.Errorf("can't reserve id %d for the resource %s. It is already allocated with a different id %d", id, name, v) } reserved, _ := idAllocator.idBitmap.Allocate(id) @@ -86,9 +91,11 @@ func (idAllocator *idAllocator) ReserveID(name string, id int) error { // ReleaseID releases the id allocated for the resource 'name' func (idAllocator *idAllocator) ReleaseID(name string) { + idAllocator.nameIdMap.LockKey(name) + defer idAllocator.nameIdMap.UnlockKey(name) v, ok := idAllocator.nameIdMap.Load(name) if ok { - idAllocator.idBitmap.Release(v.(int)) + idAllocator.idBitmap.Release(v) idAllocator.nameIdMap.Delete(name) } } @@ -116,3 +123,109 @@ func (allocator *namedAllocator) ReserveID(id int) error { func (allocator *namedAllocator) ReleaseID() { allocator.allocator.ReleaseID(allocator.name) } + +// idsAllocator is used to allocate multiple ids for a resource and store the resource - ids in a map +type idsAllocator struct { + // idBitmap allocated ids in range [0, maxIds-1] + idBitmap *bitmapallocator.AllocationBitmap + // offset can be used to shift the range to [offset, offset+maxIds-1] + offset int + // nameIdsMap stores the final allocated ids in range [offset, offset+maxIds-1] for a resource name + nameIdsMap *syncmap.SyncMap[[]int] +} + +// newIDsAllocator returns an idsAllocator. +// If offset is non-zero, the allocated ids will be in the range [offset, offset+maxIds-1) +func newIDsAllocator(name string, maxIds int, offset int) *idsAllocator { + idBitmap := bitmapallocator.NewRoundRobinAllocationMap(maxIds, name) + return &idsAllocator{ + nameIdsMap: syncmap.NewSyncMap[[]int](), + idBitmap: idBitmap, + offset: offset, + } +} + +// AllocateIDs allocates numOfIDs for the resource 'name' and returns the ids. +// If less ids than numOfIDs are already allocated for the resource name, it will allocate the missing amount. +// If more ids than numOfIDs are already allocated for the resource name, it returns an error. +func (idsAllocator *idsAllocator) AllocateIDs(name string, numOfIDs int) ([]int, error) { + idsAllocator.nameIdsMap.LockKey(name) + defer idsAllocator.nameIdsMap.UnlockKey(name) + // Check the idMap and return the id if its already allocated + ids, ok := idsAllocator.nameIdsMap.Load(name) + if ok { + if len(ids) == numOfIDs { + return ids, nil + } + if len(ids) > numOfIDs { + return ids, fmt.Errorf("the resource %s already has more ids allocated %v than requested %v", name, ids, numOfIDs) + } + } else { + ids = make([]int, 0, numOfIDs) + } + previouslyAllocated := len(ids) + for len(ids) < numOfIDs { + id, allocated, _ := idsAllocator.idBitmap.AllocateNext() + if !allocated { + // release newly allocated ids + for _, id := range ids[previouslyAllocated:] { + idsAllocator.idBitmap.Release(id - idsAllocator.offset) + } + return ids, fmt.Errorf("failed to allocate the id for the resource %s", name) + } + ids = append(ids, id+idsAllocator.offset) + } + if len(ids) == 0 { + // don't store empty slice in the map + return ids, nil + } + idsAllocator.nameIdsMap.Store(name, ids) + return ids, nil +} + +// ReserveIDs reserves 'ids' for the resource 'name'. It returns an +// error if one of the 'ids' is already reserved by a resource other than 'name'. +// It also returns an error if the resource 'name' has a different 'ids' slice +// already reserved. Slice elements order is important for comparison. +func (idsAllocator *idsAllocator) ReserveIDs(name string, ids []int) error { + idsAllocator.nameIdsMap.LockKey(name) + defer idsAllocator.nameIdsMap.UnlockKey(name) + existingIDs, ok := idsAllocator.nameIdsMap.Load(name) + if ok { + if slices.Equal(existingIDs, ids) { + // All good. The ids are already reserved by the same resource name. + return nil + } + return fmt.Errorf("can't reserve ids %v for the resource %s. It is already allocated with different ids %v", + ids, name, existingIDs) + } + allocatedIDs := make([]int, 0, len(ids)) + for _, id := range ids { + // don't forget to adjust the id with the offset + reserved, _ := idsAllocator.idBitmap.Allocate(id - idsAllocator.offset) + if !reserved { + // cleanup previously allocated ids + for _, allocatedID := range allocatedIDs { + idsAllocator.idBitmap.Release(allocatedID - idsAllocator.offset) + } + return fmt.Errorf("id %d is already reserved by another resource", id) + } + allocatedIDs = append(allocatedIDs, id) + } + idsAllocator.nameIdsMap.Store(name, allocatedIDs) + return nil +} + +// ReleaseIDs releases all ids allocated for the resource 'name' +func (idsAllocator *idsAllocator) ReleaseIDs(name string) { + idsAllocator.nameIdsMap.LockKey(name) + defer idsAllocator.nameIdsMap.UnlockKey(name) + existingIDs, ok := idsAllocator.nameIdsMap.Load(name) + if !ok { + return + } + for _, id := range existingIDs { + idsAllocator.idBitmap.Release(id - idsAllocator.offset) + } + idsAllocator.nameIdsMap.Delete(name) +} diff --git a/go-controller/pkg/allocator/id/allocator_test.go b/go-controller/pkg/allocator/id/allocator_test.go new file mode 100644 index 0000000000..d520145625 --- /dev/null +++ b/go-controller/pkg/allocator/id/allocator_test.go @@ -0,0 +1,173 @@ +package id + +import ( + "slices" + "testing" +) + +func TestIDsAllocator(t *testing.T) { + // create allocator with range [3, 8] + allocator := newIDsAllocator("test", 6, 3) + ids, err := allocator.AllocateIDs("test1", 0) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if len(ids) != 0 { + t.Errorf("expect 0 ids allocated, but got %v", ids) + } + // test reserve IDs + err = allocator.ReserveIDs("test1", []int{4}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // ids: test1 = [4] + // test offset and multiple IDs allocation skipping allocated ID + ids, err = allocator.AllocateIDs("test2", 3) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{3, 5, 6}) { + t.Errorf("expect ids [3,5,6] allocated, but got %v", ids) + } + // ids: test1 = [4] + // ids: test2 = [3,5,6] + // try to allocate more ids for test1 + ids, err = allocator.AllocateIDs("test1", 2) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{4, 7}) { + t.Errorf("expect ids [4,7] allocated, but got %v", ids) + } + // ids: test1 = [4,7] + // ids: test2 = [3,5,6] + // request already existing IDs + ids, err = allocator.AllocateIDs("test1", 2) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{4, 7}) { + t.Errorf("expect ids [4,7] allocated, but got %v", ids) + } + // ids: test1 = [4,7] + // ids: test2 = [3,5,6] + // try to allocate more ids than available + ids, err = allocator.AllocateIDs("test3", 2) + if err == nil { + t.Errorf("expect error allocating id for test3, but got ids %v", ids) + } + // try to reserve last available ID + err = allocator.ReserveIDs("test3", []int{8}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // ids: test1 = [4,7] + // ids: test2 = [3,5,6] + // ids: test3 = [8] + // try to reserve different IDs + err = allocator.ReserveIDs("test3", []int{7, 8}) + if err == nil { + t.Errorf("expect error reserving ids for test3") + } + // now release IDs for test1 + allocator.ReleaseIDs("test1") + // ids: test2 = [3,5,6] + // ids: test3 = [8] + // try to allocate more ids than available + ids, err = allocator.AllocateIDs("test3", 4) + if err == nil { + t.Errorf("expect error allocating id for test3, but got ids %v", ids) + } + ids, err = allocator.AllocateIDs("test3", 3) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{8, 4, 7}) { + t.Errorf("expect ids [8,4,7] allocated, but got %v", ids) + } + // ids: test2 = [3,5,6] + // ids: test3 = [8,4,7] +} + +func TestTunnelKeysAllocator(t *testing.T) { + allocator := NewTunnelKeyAllocator("test") + transitSwitchBase := 16711683 + tunnelKeyBase := 16715779 + // allocate 1 key for networkID 1 (transit switch key is preserved) + ids, err := allocator.AllocateKeys("net1", 1, 1) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{transitSwitchBase + 1}) { + t.Errorf("expect ids %v allocated, but got %v", []int{transitSwitchBase + 1}, ids) + } + // now add one more key for networkID 1 (should return the same key) + ids, err = allocator.AllocateKeys("net1", 1, 2) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{transitSwitchBase + 1, tunnelKeyBase}) { + t.Errorf("expect ids %v allocated, but got %v", []int{transitSwitchBase + 1, tunnelKeyBase}, ids) + } + // now ask for 1 key again for networkID 1 (reducing the number of requested keys is not expected and should return error) + ids, err = allocator.AllocateKeys("net1", 1, 1) + if err == nil { + t.Errorf("expect error allocating id for net1, but got ids %v", ids) + } + // check the 0 also works + ids, err = allocator.AllocateKeys("net1", 1, 0) + if err == nil { + t.Errorf("expect error allocating id for net1, but got ids %v", ids) + } + // same for reserve IDs + err = allocator.ReserveKeys("net1", []int{transitSwitchBase + 1}) + if err == nil { + t.Errorf("expect error reserving ids for net1") + } + // now reserve already allocated ids, should be ok + err = allocator.ReserveKeys("net1", []int{transitSwitchBase + 1, tunnelKeyBase}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + // allocate 3 keys for networkID 2 (transit switch key is preserved + 2 allocated keys) + ids, err = allocator.AllocateKeys("net2", 2, 3) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{transitSwitchBase + 2, tunnelKeyBase + 1, tunnelKeyBase + 2}) { + t.Errorf("expect ids %v allocated, but got %v", []int{transitSwitchBase + 1, tunnelKeyBase + 1, tunnelKeyBase + 2}, ids) + } + // reserve next 2 keys for networkID 3 + err = allocator.ReserveKeys("net3", []int{tunnelKeyBase + 3, tunnelKeyBase + 4}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // allocate 2 keys for networkID 4 + ids, err = allocator.AllocateKeys("net4", 4, 2) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{transitSwitchBase + 4, tunnelKeyBase + 5}) { + t.Errorf("expect ids %v allocated, but got %v", []int{transitSwitchBase + 4, tunnelKeyBase + 5}, ids) + } + // check network ID out of reserved range + ids, err = allocator.AllocateKeys("net5", 5000, 1) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !slices.Equal(ids, []int{tunnelKeyBase + 6}) { + t.Errorf("expect ids %v allocated, but got %v", []int{tunnelKeyBase + 6}, ids) + } + + totalKeys := 61437 + // we have already allocated 7 keys from the free range, request the rest of them + 1 + _, err = allocator.AllocateKeys("net6", 10000, totalKeys-7+1) + if err == nil { + t.Errorf("expect error allocating id for net5") + } + _, err = allocator.AllocateKeys("net6", 10000, totalKeys-7) + if err != nil { + t.Errorf("unexpected error: %v", err) + } +} diff --git a/go-controller/pkg/allocator/id/tunnelkeyallocator.go b/go-controller/pkg/allocator/id/tunnelkeyallocator.go new file mode 100644 index 0000000000..66730fe0c4 --- /dev/null +++ b/go-controller/pkg/allocator/id/tunnelkeyallocator.go @@ -0,0 +1,67 @@ +package id + +// TunnelKeysAllocator is used to allocate tunnel Keys for distributed OVN datapaths. +// It preserves first 4096 keys for the already-used transit switch IDs based on the networkID. +type TunnelKeysAllocator struct { + idsAllocator *idsAllocator + preservedRange int + idsOffset int +} + +// NewTunnelKeyAllocator returns an TunnelKeysAllocator +func NewTunnelKeyAllocator(name string) *TunnelKeysAllocator { + // OVN-defined constants from + // https://github.com/ovn-org/ovn/blob/cfaf849c034469502fc97149f20676dec4d76595/lib/ovn-util.h#L159-L164 + // total number of datapath(switches and routers) keys + maxDPKey := (1 << 24) - 1 + // We have already used some keys for transit switch tunnels, the maximum tunnel key that is already allocated + // is BaseTransitSwitchTunnelKey + MaxNetworks. + // BaseTransitSwitchTunnelKey = 16711683 + // MaxNetworks = 4096 + rangeStart := 16711683 + 4096 + // this is how many keys are left for allocation + freeIDs := maxDPKey - rangeStart + 1 + + return &TunnelKeysAllocator{ + idsAllocator: newIDsAllocator(name, freeIDs, rangeStart), + preservedRange: 4096, + idsOffset: 16711683, + } +} + +// AllocateKeys allocates 'numOfKeys' for the resource 'name'. +// Previously allocated keys for 'name' are preserved in case of error. +// If networkID is less than 4096, the first key will come from the preserved range +// based on the networkID. +// If less keys than numOfKeys are already allocated for the resource name, it will allocate the missing amount. +// If more keys than numOfKeys are already allocated for the resource name, it returns an error. +func (allocator *TunnelKeysAllocator) AllocateKeys(name string, networkID, numOfKeys int) ([]int, error) { + allocatedIDs := make([]int, 0, numOfKeys) + if networkID < allocator.preservedRange && numOfKeys > 0 { + // transit switch tunnel key is preserved + allocatedIDs = append(allocatedIDs, allocator.idsOffset+networkID) + numOfKeys -= 1 + } + newIDs, err := allocator.idsAllocator.AllocateIDs(name, numOfKeys) + if err != nil { + return nil, err + } + return append(allocatedIDs, newIDs...), nil +} + +// ReserveKeys reserves 'tunnelKeys' for the resource 'name'. It returns an +// error if one of the 'tunnelKeys' is already reserved by a resource other than 'name'. +// It also returns an error if the resource 'name' has a different 'tunnelKeys' slice +// already reserved. Slice elements order is important for comparison. +func (allocator *TunnelKeysAllocator) ReserveKeys(name string, tunnelKeys []int) error { + if len(tunnelKeys) > 0 && tunnelKeys[0]-allocator.idsOffset < allocator.preservedRange { + // transit switch tunnel key is not allocated by the allocator + tunnelKeys = tunnelKeys[1:] + } + return allocator.idsAllocator.ReserveIDs(name, tunnelKeys) +} + +// ReleaseKeys releases the tunnelKeys allocated for the resource 'name' +func (allocator *TunnelKeysAllocator) ReleaseKeys(name string) { + allocator.idsAllocator.ReleaseIDs(name) +} diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index eed6bab488..1f69442176 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -524,6 +524,8 @@ func AddRoutesGatewayIP( if !util.IsNetworkSegmentationSupportEnabled() || !netinfo.IsPrimaryNetwork() { return nil } + var nodeLRPMAC net.HardwareAddr + var hasV4 bool for _, podIfAddr := range podAnnotation.IPs { isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) nodeSubnet, err := util.MatchFirstIPNetFamily(isIPv6, nodeSubnets) @@ -538,18 +540,30 @@ func AddRoutesGatewayIP( if network != nil && len(network.GatewayRequest) == 0 { // if specific default route for pod was not requested then add gatewayIP podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) } + if !isIPv6 { + hasV4 = true + nodeLRPMAC = util.IPAddrToHWAddr(gatewayIPnet.IP) + } else if !hasV4 { + nodeLRPMAC = util.IPAddrToHWAddr(gatewayIPnet.IP) + } } // Until https://github.com/ovn-kubernetes/ovn-kubernetes/issues/4876 is fixed, it is limited to IC only if config.OVNKubernetesFeature.EnableInterconnect { if _, isIPv6Mode := netinfo.IPMode(); isIPv6Mode { - joinAddrs, err := udn.GetGWRouterIPs(node, netinfo.GetNetInfo()) - if err != nil { - if util.IsAnnotationNotSetError(err) { - return types.NewSuppressedError(err) + var routerPortMac net.HardwareAddr + if !util.UDNLayer2NodeUsesTransitRouter(node) { + joinAddrs, err := udn.GetGWRouterIPs(node, netinfo.GetNetInfo()) + if err != nil { + if util.IsAnnotationNotSetError(err) { + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed parsing node gateway router join addresses, network %q, %w", netinfo.GetNetworkName(), err) } - return fmt.Errorf("failed parsing node gateway router join addresses, network %q, %w", netinfo.GetNetworkName(), err) + routerPortMac = util.IPAddrToHWAddr(joinAddrs[0].IP) + } else { + routerPortMac = nodeLRPMAC } - podAnnotation.GatewayIPv6LLA = util.HWAddrToIPv6LLA(util.IPAddrToHWAddr(joinAddrs[0].IP)) + podAnnotation.GatewayIPv6LLA = util.HWAddrToIPv6LLA(routerPortMac) } } return nil diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index b382cb5212..49226315f4 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -5,10 +5,14 @@ import ( "fmt" "net" + networkattchmentdefclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/dnsnameresolver" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/endpointslicemirror" @@ -22,6 +26,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/unidling" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -85,7 +90,16 @@ func NewClusterManager( cm.networkManager = networkmanager.Default() if config.OVNKubernetesFeature.EnableMultiNetwork { - cm.networkManager, err = networkmanager.NewForCluster(cm, wf, ovnClient, recorder) + // tunnelKeysAllocator is now only used for NAD tunnel keys allocation, but will be reused + // for Connecting UDNs. So we initialize it here and pass it to the networkManager. + // The same instance should be initialized only once and passed to all the + // users of tunnel-keys. + tunnelKeysAllocator, err := initTunnelKeysAllocator(ovnClient.NetworkAttchDefClient) + if err != nil { + return nil, fmt.Errorf("failed to initialize tunnel keys allocator: %w", err) + } + + cm.networkManager, err = networkmanager.NewForCluster(cm, wf, ovnClient, recorder, tunnelKeysAllocator) if err != nil { return nil, err } @@ -275,3 +289,39 @@ func (cm *ClusterManager) Reconcile(name string, old, new util.NetInfo) error { } return nil } + +// initTunnelKeysAllocator reserves any existing tunnel keys to avoid re-allocation. +// It will be shared across multiple controllers and should account for different object types. +// Good news is that we don't care about missing events, because we only need to reserve ids that are already +// annotated, and no one else can annotate them except ClusterManager. +func initTunnelKeysAllocator(nadClient networkattchmentdefclientset.Interface) (*id.TunnelKeysAllocator, error) { + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + + existingNADs, err := nadClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("").List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list existing NADs: %w", err) + } + for _, nad := range existingNADs.Items { + // reserve tunnel keys that are already allocated to make sure they are + if nad.Annotations[types.OvnNetworkTunnelKeysAnnotation] != "" { + netconf, err := util.ParseNetConf(&nad) + if err != nil { + // ignore non-OVN NADs; otherwise log and continue + if err.Error() == util.ErrorAttachDefNotOvnManaged.Error() { + continue + } + klog.Warningf("Failed to parse NAD annotation %s: %v", nad.Name, err) + continue + } + networkName := netconf.Name + tunnelKeys, err := util.ParseTunnelKeysAnnotation(nad.Annotations[types.OvnNetworkTunnelKeysAnnotation]) + if err != nil { + return nil, fmt.Errorf("failed to parse annotated tunnel keys: %w", err) + } + if err = tunnelKeysAllocator.ReserveKeys(networkName, tunnelKeys); err != nil { + return nil, fmt.Errorf("failed to reserve tunnel keys %v for network %s: %w", tunnelKeys, networkName, err) + } + } + } + return tunnelKeysAllocator, nil +} diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index 74c2e0d1bf..f1bcb28a16 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -21,6 +21,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -60,6 +61,7 @@ var _ = ginkgo.Describe("Cluster Manager", func() { ginkgo.AfterEach(func() { if f != nil { f.Shutdown() + f = nil } wg.Wait() }) @@ -843,6 +845,50 @@ var _ = ginkgo.Describe("Cluster Manager", func() { }) }) + ginkgo.Context("tunnel keys allocations", func() { + ginkgo.It("check for tunnel keys allocations", func() { + app.Action = func(_ *cli.Context) error { + nad1 := testing.GenerateNAD("test1", "test1", "test", ovntypes.Layer2Topology, + "10.0.0.0/24", ovntypes.NetworkRolePrimary) + // start with test1 network that already has keys allocated + nad1.Annotations = map[string]string{ + ovntypes.OvnNetworkTunnelKeysAnnotation: "[16711685,16715780]", + } + // and test2 network without keys allocated + nad2 := testing.GenerateNAD("test2", "test2", "test", ovntypes.Layer2Topology, + "10.0.0.0/24", ovntypes.NetworkRolePrimary) + clientSet := util.GetOVNClientset(nad1, nad2) + + // init the allocator that should reserve already allocated keys for test1 + allocator, err := initTunnelKeysAllocator(clientSet.NetworkAttchDefClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // check that reserving different keys for test2 will fail + err = allocator.ReserveKeys("test1", []int{16711685, 16715779}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("can't reserve ids [16715779] for the resource test1. It is already allocated with different ids [16715780]")) + // now try to allocate correct number of keys for test1 and check that returned IDs are correct + ids, err := allocator.AllocateKeys("test1", 2, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16711685, 16715780})) + // now allocate ids for networkID 1 + ids, err = allocator.AllocateKeys("test2", 1, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16711684, 16715779})) + // now try networkID 3 to make sure IDs of nad test1 are not allocated again + ids, err = allocator.AllocateKeys("test3", 3, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16711686, 16715781})) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("Node gateway router port IP allocations", func() { ginkgo.It("verify the node annotations", func() { app.Action = func(ctx *cli.Context) error { @@ -919,8 +965,8 @@ var _ = ginkgo.Describe("Cluster Manager", func() { ginkgo.Context("Transit switch port IP allocations", func() { ginkgo.It("Interconnect enabled", func() { - config.ClusterManager.V4TransitSwitchSubnet = "100.89.0.0/16" - config.ClusterManager.V6TransitSwitchSubnet = "fd99::/64" + config.ClusterManager.V4TransitSubnet = "100.89.0.0/16" + config.ClusterManager.V6TransitSubnet = "fd99::/64" app.Action = func(ctx *cli.Context) error { nodes := []corev1.Node{ { @@ -984,12 +1030,12 @@ var _ = ginkgo.Describe("Cluster Manager", func() { return fmt.Errorf("transit switch ips for node %s not allocated", n.Name) } - _, transitSwitchV4Subnet, err := net.ParseCIDR(config.ClusterManager.V4TransitSwitchSubnet) + _, transitSwitchV4Subnet, err := net.ParseCIDR(config.ClusterManager.V4TransitSubnet) if err != nil { return fmt.Errorf("could not parse IPv4 transit switch subnet %v", err) } - _, transitSwitchV6Subnet, err := net.ParseCIDR(config.ClusterManager.V6TransitSwitchSubnet) + _, transitSwitchV6Subnet, err := net.ParseCIDR(config.ClusterManager.V6TransitSubnet) if err != nil { return fmt.Errorf("could not parse IPv6 transit switch subnet %v", err) } diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go index 508a9e5b55..58c1f41554 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go @@ -17,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" @@ -41,7 +42,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( fakeClient = util.GetOVNClientset(objects...).GetClusterManagerClientset() wf, err := factory.NewClusterManagerWatchFactory(fakeClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - networkManager, err = networkmanager.NewForCluster(&testnm.FakeControllerManager{}, wf, fakeClient, nil) + networkManager, err = networkmanager.NewForCluster(&testnm.FakeControllerManager{}, wf, fakeClient, nil, id.NewTunnelKeyAllocator("TunnelKeys")) gomega.Expect(err).NotTo(gomega.HaveOccurred()) controller, err = NewController(fakeClient, wf, networkManager.Interface()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index c03c851808..680ee18a37 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -24,6 +24,7 @@ import ( "k8s.io/client-go/util/workqueue" "k8s.io/utils/ptr" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" controllerutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" eiptypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -1005,7 +1006,7 @@ func TestController_reconcile(t *testing.T) { wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) g.Expect(err).ToNot(gomega.HaveOccurred()) - nm, err := networkmanager.NewForCluster(&nmtest.FakeControllerManager{}, wf, fakeClientset, nil) + nm, err := networkmanager.NewForCluster(&nmtest.FakeControllerManager{}, wf, fakeClientset, nil, id.NewTunnelKeyAllocator("TunnelKeys")) g.Expect(err).ToNot(gomega.HaveOccurred()) c := NewController(nm.Interface(), wf, fakeClientset) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 08791e9bf4..ae9b88f6f1 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -20,6 +20,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" @@ -61,7 +62,7 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).NotTo(HaveOccurred()) Expect(f.Start()).To(Succeed()) - networkManager, err := networkmanager.NewForCluster(&nmtest.FakeControllerManager{}, f, cs, nil) + networkManager, err := networkmanager.NewForCluster(&nmtest.FakeControllerManager{}, f, cs, nil, id.NewTunnelKeyAllocator("TunnelKeys")) Expect(err).NotTo(HaveOccurred()) return New(cs.NetworkAttchDefClient, f.NADInformer(), cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(), diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index c18cad708d..e451ed3923 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -174,6 +174,13 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s netConfSpec.DefaultGatewayIPs = ipString(cfg.DefaultGatewayIPs) } netConfSpec.JoinSubnet = cidrString(renderJoinSubnets(cfg.Role, cfg.JoinSubnets)) + // now generate transit subnet for layer2 topology + if cfg.Role == userdefinednetworkv1.NetworkRolePrimary { + err := util.SetTransitSubnets(netConfSpec) + if err != nil { + return nil, err + } + } case userdefinednetworkv1.NetworkTopologyLocalnet: cfg := spec.GetLocalnet() netConfSpec.Role = strings.ToLower(string(cfg.Role)) @@ -194,6 +201,7 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s if err := util.ValidateNetConf(nadName, netConfSpec); err != nil { return nil, err } + if _, err := util.NewNetInfo(netConfSpec); err != nil { return nil, err } @@ -218,6 +226,9 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s if len(netConfSpec.JoinSubnet) > 0 { cniNetConf["joinSubnet"] = netConfSpec.JoinSubnet } + if len(netConfSpec.TransitSubnet) > 0 { + cniNetConf["transitSubnet"] = netConfSpec.TransitSubnet + } if len(netConfSpec.Subnets) > 0 { cniNetConf["subnets"] = netConfSpec.Subnets } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index c24b56503e..e44cee4366 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -1,14 +1,18 @@ package template import ( + "strings" + netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" . "github.com/onsi/ginkgo/v2" @@ -373,6 +377,7 @@ var _ = Describe("NetAttachDefTemplate", func() { "role": "primary", "topology": "layer2", "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16,fd97::/64", "subnets": "192.168.100.0/24,2001:dbb::/64", "mtu": 1500, "allowPersistentIPs": true @@ -399,6 +404,7 @@ var _ = Describe("NetAttachDefTemplate", func() { "role": "primary", "topology": "layer2", "joinSubnet": "100.62.0.0/24,fd92::/64", + "transitSubnet": "100.88.0.0/16,fd97::/64", "subnets": "192.168.100.0/24,2001:dbb::/64", "mtu": 1500, "allowPersistentIPs": true @@ -509,6 +515,7 @@ var _ = Describe("NetAttachDefTemplate", func() { "role": "primary", "topology": "layer2", "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16,fd97::/64", "subnets": "192.168.100.0/24,2001:dbb::/64", "mtu": 1500, "allowPersistentIPs": true @@ -535,6 +542,7 @@ var _ = Describe("NetAttachDefTemplate", func() { "role": "primary", "topology": "layer2", "joinSubnet": "100.62.0.0/24,fd92::/64", + "transitSubnet": "100.88.0.0/16,fd97::/64", "subnets": "192.168.100.0/24,2001:dbb::/64", "mtu": 1500, "allowPersistentIPs": true @@ -624,4 +632,45 @@ var _ = Describe("NetAttachDefTemplate", func() { }`, ), ) + + It("should correctly assign transit Subnets", func() { + // check no overlap, use default values + netConf := &ovncnitypes.NetConf{ + Role: strings.ToLower(types.NetworkRolePrimary), + Topology: strings.ToLower(types.Layer2Topology), + Subnets: "10.12.0.0/16,fd12:dbba::/64", + } + err := util.SetTransitSubnets(netConf) + Expect(err).NotTo(HaveOccurred()) + Expect(netConf.TransitSubnet).To(Equal("100.88.0.0/16,fd97::/64")) + // check Subnet with the default Transit subnet overlap + netConf = &ovncnitypes.NetConf{ + Role: strings.ToLower(types.NetworkRolePrimary), + Topology: strings.ToLower(types.Layer2Topology), + Subnets: "100.88.0.0/15,fd97::/63", + } + err = util.SetTransitSubnets(netConf) + Expect(err).NotTo(HaveOccurred()) + Expect(netConf.TransitSubnet).To(Equal("100.90.0.0/16,fd97:0:0:2::/64")) + // check joinSubnet with the default Transit subnet overlap + netConf = &ovncnitypes.NetConf{ + Role: strings.ToLower(types.NetworkRolePrimary), + Topology: strings.ToLower(types.Layer2Topology), + Subnets: "10.12.0.0/16,fd12:dbba::/64", + JoinSubnet: "100.88.0.0/17,fd97::/65", + } + err = util.SetTransitSubnets(netConf) + Expect(err).NotTo(HaveOccurred()) + Expect(netConf.TransitSubnet).To(Equal("100.89.0.0/16,fd97:0:0:1::/64")) + // check Subnet with the default Transit subnet overlap, then joinSubnet overlaps with the next selected transit subnet + netConf = &ovncnitypes.NetConf{ + Role: strings.ToLower(types.NetworkRolePrimary), + Topology: strings.ToLower(types.Layer2Topology), + Subnets: "100.88.0.0/15,fd97::/65", + JoinSubnet: "100.90.0.0/16,fd97:0:0:1::/64", + } + err = util.SetTransitSubnets(netConf) + Expect(err).NotTo(HaveOccurred()) + Expect(netConf.TransitSubnet).To(Equal("100.91.0.0/16,fd97:0:0:2::/64")) + }) }) diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go index 41452e9c80..f56c61d9d8 100644 --- a/go-controller/pkg/clustermanager/zone_cluster_controller.go +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -66,16 +66,16 @@ func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *fa var err error if config.OVNKubernetesFeature.EnableInterconnect { if config.IPv4Mode { - transitSwitchIPv4Generator, err = ipgenerator.NewIPGenerator(config.ClusterManager.V4TransitSwitchSubnet) + transitSwitchIPv4Generator, err = ipgenerator.NewIPGenerator(config.ClusterManager.V4TransitSubnet) if err != nil { - return nil, fmt.Errorf("error creating IP Generator for v4 transit switch subnet %s: %w", config.ClusterManager.V4TransitSwitchSubnet, err) + return nil, fmt.Errorf("error creating IP Generator for v4 transit subnet %s: %w", config.ClusterManager.V4TransitSubnet, err) } } if config.IPv6Mode { - transitSwitchIPv6Generator, err = ipgenerator.NewIPGenerator(config.ClusterManager.V6TransitSwitchSubnet) + transitSwitchIPv6Generator, err = ipgenerator.NewIPGenerator(config.ClusterManager.V6TransitSubnet) if err != nil { - return nil, fmt.Errorf("error creating IP Generator for v6 transit switch subnet %s: %w", config.ClusterManager.V4TransitSwitchSubnet, err) + return nil, fmt.Errorf("error creating IP Generator for v6 transit subnet %s: %w", config.ClusterManager.V4TransitSubnet, err) } } } diff --git a/go-controller/pkg/cni/helper_linux_test.go b/go-controller/pkg/cni/helper_linux_test.go index 4efb711de0..fb744c3018 100644 --- a/go-controller/pkg/cni/helper_linux_test.go +++ b/go-controller/pkg/cni/helper_linux_test.go @@ -560,7 +560,7 @@ func TestSetupSriovInterface(t *testing.T) { t.Fatal("failed to get NameSpace for test") }*/ - netNsDoForward := &mocks.NetNS{} + netNsDoForward := &cni_ns_mocks.NetNS{} netNsDoForward.On("Fd", mock.Anything).Return(uintptr(0)) var netNsDoError error netNsDoForward.On("Do", mock.AnythingOfType("func(ns.NetNS) error")).Run(func(args mock.Arguments) { diff --git a/go-controller/pkg/cni/mocks/CNIPluginLibOps.go b/go-controller/pkg/cni/mocks/CNIPluginLibOps.go index 8ddbeb5fd0..43e437a01d 100644 --- a/go-controller/pkg/cni/mocks/CNIPluginLibOps.go +++ b/go-controller/pkg/cni/mocks/CNIPluginLibOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/cni/mocks/NetNS.go b/go-controller/pkg/cni/mocks/NetNS.go deleted file mode 100644 index 193359080b..0000000000 --- a/go-controller/pkg/cni/mocks/NetNS.go +++ /dev/null @@ -1,98 +0,0 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. - -package mocks - -import ( - ns "github.com/containernetworking/plugins/pkg/ns" - mock "github.com/stretchr/testify/mock" -) - -// NetNS is an autogenerated mock type for the NetNS type -type NetNS struct { - mock.Mock -} - -// Close provides a mock function with given fields: -func (_m *NetNS) Close() error { - ret := _m.Called() - - var r0 error - if rf, ok := ret.Get(0).(func() error); ok { - r0 = rf() - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Do provides a mock function with given fields: toRun -func (_m *NetNS) Do(toRun func(ns.NetNS) error) error { - ret := _m.Called(toRun) - - var r0 error - if rf, ok := ret.Get(0).(func(func(ns.NetNS) error) error); ok { - r0 = rf(toRun) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Fd provides a mock function with given fields: -func (_m *NetNS) Fd() uintptr { - ret := _m.Called() - - var r0 uintptr - if rf, ok := ret.Get(0).(func() uintptr); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(uintptr) - } - - return r0 -} - -// Path provides a mock function with given fields: -func (_m *NetNS) Path() string { - ret := _m.Called() - - var r0 string - if rf, ok := ret.Get(0).(func() string); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(string) - } - - return r0 -} - -// Set provides a mock function with given fields: -func (_m *NetNS) Set() error { - ret := _m.Called() - - var r0 error - if rf, ok := ret.Get(0).(func() error); ok { - r0 = rf() - } else { - r0 = ret.Error(0) - } - - return r0 -} - -type mockConstructorTestingTNewNetNS interface { - mock.TestingT - Cleanup(func()) -} - -// NewNetNS creates a new instance of NetNS. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewNetNS(t mockConstructorTestingTNewNetNS) *NetNS { - mock := &NetNS{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/go-controller/pkg/cni/types/types.go b/go-controller/pkg/cni/types/types.go index 1813dccf83..90fcb47ef0 100644 --- a/go-controller/pkg/cni/types/types.go +++ b/go-controller/pkg/cni/types/types.go @@ -50,6 +50,11 @@ type NetConf struct { // valid for UDN layer3/layer2 network topology // default value: 100.65.0.0/16,fd99::/64 if not provided JoinSubnet string `json:"joinSubnet,omitempty"` + // transit subnet cidr was previously internally set to the default value, + // but with the recent layer2 topology changes it may overlap with the network Subnet. + // To avoid that, transit subnet is now configurable. Only used by Primary Layer2 networks. + // in case of dualstack cluster, please do a comma-separated list + TransitSubnet string `json:"transitSubnet,omitempty"` // comma-separated list of default gateway IPs for layer2 primary networks // in case of dualstack cluster, please do a comma-separated list // expected format: diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 72a22defbd..1dd4dce6ef 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -203,9 +203,11 @@ var ( } ClusterManager = ClusterManagerConfig{ - V4TransitSwitchSubnet: "100.88.0.0/16", - V6TransitSwitchSubnet: "fd97::/64", + V4TransitSubnet: "100.88.0.0/16", + V6TransitSubnet: "fd97::/64", } + + Layer2UsesTransitRouter bool ) const ( @@ -561,10 +563,10 @@ type OvnKubeNodeConfig struct { // ClusterManagerConfig holds configuration for ovnkube-cluster-manager type ClusterManagerConfig struct { - // V4TransitSwitchSubnet to be used in the cluster for interconnecting multiple zones - V4TransitSwitchSubnet string `gcfg:"v4-transit-switch-subnet"` - // V6TransitSwitchSubnet to be used in the cluster for interconnecting multiple zones - V6TransitSwitchSubnet string `gcfg:"v6-transit-switch-subnet"` + // V4TransitSubnet to be used in the cluster for interconnecting multiple zones + V4TransitSubnet string `gcfg:"v4-transit-subnet"` + // V6TransitSubnet to be used in the cluster for interconnecting multiple zones + V6TransitSubnet string `gcfg:"v6-transit-subnet"` } // OvnDBScheme describes the OVN database connection transport method @@ -684,6 +686,7 @@ func PrepareTestConfig() error { if Gateway.Mode != GatewayModeDisabled { Gateway.EphemeralPortRange = DefaultEphemeralPortRange } + Layer2UsesTransitRouter = true if err := completeConfig(); err != nil { return err @@ -1659,16 +1662,16 @@ var OvnKubeNodeFlags = []cli.Flag{ // ClusterManagerFlags captures ovnkube-cluster-manager specific configurations var ClusterManagerFlags = []cli.Flag{ &cli.StringFlag{ - Name: "cluster-manager-v4-transit-switch-subnet", - Usage: "The v4 transit switch subnet used for assigning transit switch IPv4 addresses for interconnect", - Destination: &cliConfig.ClusterManager.V4TransitSwitchSubnet, - Value: ClusterManager.V4TransitSwitchSubnet, + Name: "cluster-manager-v4-transit-subnet", + Usage: "The v4 transit subnet used for assigning transit switch and transit router IPv4 addresses for interconnect", + Destination: &cliConfig.ClusterManager.V4TransitSubnet, + Value: ClusterManager.V4TransitSubnet, }, &cli.StringFlag{ - Name: "cluster-manager-v6-transit-switch-subnet", - Usage: "The v6 transit switch subnet used for assigning transit switch IPv6 addresses for interconnect", - Destination: &cliConfig.ClusterManager.V6TransitSwitchSubnet, - Value: ClusterManager.V6TransitSwitchSubnet, + Name: "cluster-manager-v6-transit-subnet", + Usage: "The v6 transit switch subnet used for assigning transit switch and transit router IPv6 addresses for interconnect", + Destination: &cliConfig.ClusterManager.V6TransitSubnet, + Value: ClusterManager.V6TransitSubnet, }, } @@ -2187,14 +2190,14 @@ func buildClusterManagerConfig(cli, file *config) error { // into their final form. func completeClusterManagerConfig(allSubnets *ConfigSubnets) error { // Validate v4 and v6 transit switch subnets - v4IP, v4TransitCIDR, err := net.ParseCIDR(ClusterManager.V4TransitSwitchSubnet) + v4IP, v4TransitCIDR, err := net.ParseCIDR(ClusterManager.V4TransitSubnet) if err != nil || utilnet.IsIPv6(v4IP) { - return fmt.Errorf("invalid transit switch v4 subnet specified, subnet: %s: error: %v", ClusterManager.V4TransitSwitchSubnet, err) + return fmt.Errorf("invalid transit switch v4 subnet specified, subnet: %s: error: %v", ClusterManager.V4TransitSubnet, err) } - v6IP, v6TransitCIDR, err := net.ParseCIDR(ClusterManager.V6TransitSwitchSubnet) + v6IP, v6TransitCIDR, err := net.ParseCIDR(ClusterManager.V6TransitSubnet) if err != nil || !utilnet.IsIPv6(v6IP) { - return fmt.Errorf("invalid transit switch v6 subnet specified, subnet: %s: error: %v", ClusterManager.V6TransitSwitchSubnet, err) + return fmt.Errorf("invalid transit switch v6 subnet specified, subnet: %s: error: %v", ClusterManager.V6TransitSubnet, err) } allSubnets.Append(ConfigSubnetTransit, v4TransitCIDR) allSubnets.Append(ConfigSubnetTransit, v6TransitCIDR) @@ -2507,7 +2510,7 @@ func completeConfig() error { return err } - if err := allSubnets.CheckForOverlaps(); err != nil { + if _, _, err := allSubnets.CheckForOverlaps(); err != nil { return err } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index c5a032c92c..6eb6013ea0 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -236,8 +236,8 @@ enable-admin-network-policy=false enable-persistent-ips=false [clustermanager] -v4-transit-switch-subnet=100.89.0.0/16 -v6-transit-switch-subnet=fd98::/64 +v4-transit-subnet=100.89.0.0/16 +v6-transit-subnet=fd98::/64 ` var newData string @@ -703,8 +703,8 @@ var _ = Describe("Config Operations", func() { gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("11.132.0.0/14"), 23}, })) - gomega.Expect(ClusterManager.V4TransitSwitchSubnet).To(gomega.Equal("100.89.0.0/16")) - gomega.Expect(ClusterManager.V6TransitSwitchSubnet).To(gomega.Equal("fd98::/64")) + gomega.Expect(ClusterManager.V4TransitSubnet).To(gomega.Equal("100.89.0.0/16")) + gomega.Expect(ClusterManager.V6TransitSubnet).To(gomega.Equal("fd98::/64")) return nil } @@ -815,8 +815,8 @@ var _ = Describe("Config Operations", func() { })) gomega.Expect(Default.MonitorAll).To(gomega.BeFalse()) gomega.Expect(Default.OfctrlWaitBeforeClear).To(gomega.Equal(5000)) - gomega.Expect(ClusterManager.V4TransitSwitchSubnet).To(gomega.Equal("100.90.0.0/16")) - gomega.Expect(ClusterManager.V6TransitSwitchSubnet).To(gomega.Equal("fd96::/64")) + gomega.Expect(ClusterManager.V4TransitSubnet).To(gomega.Equal("100.90.0.0/16")) + gomega.Expect(ClusterManager.V6TransitSubnet).To(gomega.Equal("fd96::/64")) return nil } @@ -891,8 +891,8 @@ var _ = Describe("Config Operations", func() { "-dns-service-namespace=kube-system-2", "-dns-service-name=kube-dns-2", "-disable-requestedchassis=true", - "-cluster-manager-v4-transit-switch-subnet=100.90.0.0/16", - "-cluster-manager-v6-transit-switch-subnet=fd96::/64", + "-cluster-manager-v4-transit-subnet=100.90.0.0/16", + "-cluster-manager-v6-transit-subnet=fd96::/64", } err = app.Run(cliArgs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1224,7 +1224,7 @@ enable-pprof=true } cliArgs := []string{ app.Name, - "-cluster-manager-v4-transit-switch-subnet=foobar", + "-cluster-manager-v4-transit-subnet=foobar", } err := app.Run(cliArgs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1237,7 +1237,7 @@ enable-pprof=true } cliArgs := []string{ app.Name, - "-cluster-manager-v6-transit-switch-subnet=100.89.0.0/16", + "-cluster-manager-v6-transit-subnet=100.89.0.0/16", } err := app.Run(cliArgs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1250,13 +1250,13 @@ enable-pprof=true } cliArgs := []string{ app.Name, - "-cluster-manager-v4-transit-switch-subnet=100.89.0.0/16", - "-cluster-manager-v6-transit-switch-subnet=fd99::/64", + "-cluster-manager-v4-transit-subnet=100.89.0.0/16", + "-cluster-manager-v6-transit-subnet=fd99::/64", } err := app.Run(cliArgs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(ClusterManager.V4TransitSwitchSubnet).To(gomega.Equal("100.89.0.0/16")) - gomega.Expect(ClusterManager.V6TransitSwitchSubnet).To(gomega.Equal("fd99::/64")) + gomega.Expect(ClusterManager.V4TransitSubnet).To(gomega.Equal("100.89.0.0/16")) + gomega.Expect(ClusterManager.V6TransitSubnet).To(gomega.Equal("fd99::/64")) }) It("overrides config file and defaults with CLI options (multi-master)", func() { kubeconfigFile, _, err := createTempFile("kubeconfig") diff --git a/go-controller/pkg/config/utils.go b/go-controller/pkg/config/utils.go index 20f4e0b35c..8aed084d57 100644 --- a/go-controller/pkg/config/utils.go +++ b/go-controller/pkg/config/utils.go @@ -209,17 +209,18 @@ func (cs *ConfigSubnets) Append(subnetType ConfigSubnetType, subnet *net.IPNet) } } -// CheckForOverlaps checks if any of the subnets in cs overlap -func (cs *ConfigSubnets) CheckForOverlaps() error { +// CheckForOverlaps checks if any of the subnets in cs overlap, and returns the first overlapping subnets +// together with an error. +func (cs *ConfigSubnets) CheckForOverlaps() (*net.IPNet, *net.IPNet, error) { for i, si := range cs.Subnets { for j := 0; j < i; j++ { sj := cs.Subnets[j] if si.Subnet.Contains(sj.Subnet.IP) || sj.Subnet.Contains(si.Subnet.IP) { - return NewSubnetOverlapError(si, sj) + return si.Subnet, sj.Subnet, NewSubnetOverlapError(si, sj) } } } - return nil + return nil, nil, nil } func (cs *ConfigSubnets) describeSubnetType(subnetType ConfigSubnetType) string { diff --git a/go-controller/pkg/config/utils_test.go b/go-controller/pkg/config/utils_test.go index 0092dc34dd..804b179a99 100644 --- a/go-controller/pkg/config/utils_test.go +++ b/go-controller/pkg/config/utils_test.go @@ -308,7 +308,7 @@ func Test_checkForOverlap(t *testing.T) { allSubnets.Append(ConfigSubnetCluster, subnet) } - err := allSubnets.CheckForOverlaps() + _, _, err := allSubnets.CheckForOverlaps() if err == nil && tc.shouldError { t.Errorf("testcase \"%s\" failed to find overlap", tc.name) } else if err != nil && !tc.shouldError { diff --git a/go-controller/pkg/controller/controller.go b/go-controller/pkg/controller/controller.go index f486981bdd..6d518c3e0c 100644 --- a/go-controller/pkg/controller/controller.go +++ b/go-controller/pkg/controller/controller.go @@ -255,7 +255,7 @@ func (c *controller[T]) processNextQueueItem() bool { if err != nil { retry := c.config.MaxAttempts == InfiniteAttempts || c.queue.NumRequeues(key) < c.config.MaxAttempts if retry { - klog.Infof("Controller %s: error found while processing %s: %v", c.name, key, err) + klog.Errorf("Controller %s: error found while processing %s: %v", c.name, key, err) c.queue.AddRateLimited(key) return true } diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index 6597e381ca..8b81cc3261 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -4,11 +4,13 @@ import ( "context" "errors" "fmt" + "strings" "sync" "time" "github.com/containernetworking/cni/pkg/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" @@ -252,8 +254,8 @@ func NewControllerManager(ovnClient *util.OVNClientset, wf *factory.WatchFactory wg: wg, multicastSupport: config.EnableMulticast, } - var err error + cm.networkManager = networkmanager.Default() if config.OVNKubernetesFeature.EnableMultiNetwork { cm.networkManager, err = networkmanager.NewForZone(config.Default.Zone, cm, wf) @@ -422,6 +424,10 @@ func (cm *ControllerManager) Start(ctx context.Context) error { } klog.Infof("Waiting for node in zone sync took: %s", time.Since(start)) + if err = cm.setTopologyType(); err != nil { + return fmt.Errorf("failed to set layer2 topology type: %w", err) + } + cm.configureMetrics(cm.stopChan) err = cm.configureSCTPSupport() @@ -536,3 +542,92 @@ func (cm *ControllerManager) configureAdvertisedNetworkIsolation() error { _, err := addressSetFactory.EnsureAddressSet(ovn.GetAdvertisedNetworkSubnetsAddressSetDBIDs()) return err } + +func (cm *ControllerManager) setTopologyType() error { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(cm.nbClient, func(lr *nbdb.LogicalRouter) bool { + return strings.Contains(lr.Name, ovntypes.TransitRouter) + }) + if err != nil { + return fmt.Errorf("failed to find transit routers: %w", err) + } + if len(routers) > 0 { + // Transit router is already used, no need to check further + config.Layer2UsesTransitRouter = true + return nil + } + // Transit router is not used yet, check if we can switch to the new topology now. + // Find all layer2 switches and check if they have any running pods. + layer2Switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(cm.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.ExternalIDs[ovntypes.TopologyExternalID] == ovntypes.Layer2Topology + }) + if err != nil { + return fmt.Errorf("failed to find layer2 switches: %w", err) + } + for _, sw := range layer2Switches { + hasRunningPods, err := cm.hasLocalPodsOnSwitch(sw) + if err != nil { + return fmt.Errorf("failed to check if there are running pods on switch %s: %w", sw.Name, err) + } + if hasRunningPods { + klog.Infof("Network %s has running pods, not switching to transit router topology yet", sw.Name) + return nil + } + } + klog.Infof("Switching to transit router for layer2 networks") + // we checked all layer2 switches and none of them has running pods, so we can switch to the new topology + config.Layer2UsesTransitRouter = true + return cm.setUDNLayer2NodeUsesTransitRouter() +} + +func hasPort(ports []string, port string) bool { + for _, p := range ports { + if p == port { + return true + } + } + return false +} + +func (cm *ControllerManager) hasLocalPodsOnSwitch(sw *nbdb.LogicalSwitch) (bool, error) { + if len(sw.Ports) == 0 { + return false, nil + } + + ports, err := libovsdbops.FindLogicalSwitchPortWithPredicate( + cm.nbClient, + func(lsp *nbdb.LogicalSwitchPort) bool { + return lsp.Type == "" && + lsp.ExternalIDs["pod"] == "true" && + hasPort(sw.Ports, lsp.UUID) + }) + if err != nil { + return false, err + } + if len(ports) > 0 { + return true, nil + } + return false, nil +} + +func (cm *ControllerManager) setUDNLayer2NodeUsesTransitRouter() error { + nodes, err := cm.kube.KClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("unable to get nodes from informer while waiting for node zone sync") + } + if len(nodes.Items) == 0 { + klog.Infof("No nodes in cluster: waiting for a node to have %q zone is not needed", config.Default.Zone) + return nil + } + for _, node := range nodes.Items { + if util.GetNodeZone(&node) == config.Default.Zone { + annotator := kube.NewNodeAnnotator(cm.kube, node.Name) + if err = annotator.Set(util.Layer2TopologyVersion, "2.0"); err != nil { + return fmt.Errorf("failed to set node %s annotation %s: %w", node.Name, util.Layer2TopologyVersion, err) + } + if err = annotator.Run(); err != nil { + return fmt.Errorf("failed to run node %s annotator: %w", node.Name, err) + } + } + } + return nil +} diff --git a/go-controller/pkg/factory/mocks/NodeWatchFactory.go b/go-controller/pkg/factory/mocks/NodeWatchFactory.go index 11c39843de..3eb31c87ae 100644 --- a/go-controller/pkg/factory/mocks/NodeWatchFactory.go +++ b/go-controller/pkg/factory/mocks/NodeWatchFactory.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -32,7 +32,7 @@ type NodeWatchFactory struct { mock.Mock } -// APBRouteInformer provides a mock function with given fields: +// APBRouteInformer provides a mock function with no fields func (_m *NodeWatchFactory) APBRouteInformer() v1.AdminPolicyBasedExternalRouteInformer { ret := _m.Called() @@ -202,7 +202,7 @@ func (_m *NodeWatchFactory) AddServiceHandler(handlerFuncs cache.ResourceEventHa return r0, r1 } -// ClusterUserDefinedNetworkInformer provides a mock function with given fields: +// ClusterUserDefinedNetworkInformer provides a mock function with no fields func (_m *NodeWatchFactory) ClusterUserDefinedNetworkInformer() userdefinednetworkv1.ClusterUserDefinedNetworkInformer { ret := _m.Called() @@ -222,7 +222,7 @@ func (_m *NodeWatchFactory) ClusterUserDefinedNetworkInformer() userdefinednetwo return r0 } -// EgressIPInformer provides a mock function with given fields: +// EgressIPInformer provides a mock function with no fields func (_m *NodeWatchFactory) EgressIPInformer() egressipv1.EgressIPInformer { ret := _m.Called() @@ -242,7 +242,7 @@ func (_m *NodeWatchFactory) EgressIPInformer() egressipv1.EgressIPInformer { return r0 } -// GetAllPods provides a mock function with given fields: +// GetAllPods provides a mock function with no fields func (_m *NodeWatchFactory) GetAllPods() ([]*corev1.Pod, error) { ret := _m.Called() @@ -332,7 +332,7 @@ func (_m *NodeWatchFactory) GetNamespace(name string) (*corev1.Namespace, error) return r0, r1 } -// GetNamespaces provides a mock function with given fields: +// GetNamespaces provides a mock function with no fields func (_m *NodeWatchFactory) GetNamespaces() ([]*corev1.Namespace, error) { ret := _m.Called() @@ -392,7 +392,7 @@ func (_m *NodeWatchFactory) GetNode(name string) (*corev1.Node, error) { return r0, r1 } -// GetNodes provides a mock function with given fields: +// GetNodes provides a mock function with no fields func (_m *NodeWatchFactory) GetNodes() ([]*corev1.Node, error) { ret := _m.Called() @@ -542,7 +542,7 @@ func (_m *NodeWatchFactory) GetServiceEndpointSlices(namespace string, svcName s return r0, r1 } -// GetServices provides a mock function with given fields: +// GetServices provides a mock function with no fields func (_m *NodeWatchFactory) GetServices() ([]*corev1.Service, error) { ret := _m.Called() @@ -602,7 +602,7 @@ func (_m *NodeWatchFactory) ListNodes(selector labels.Selector) ([]*corev1.Node, return r0, r1 } -// LocalPodInformer provides a mock function with given fields: +// LocalPodInformer provides a mock function with no fields func (_m *NodeWatchFactory) LocalPodInformer() cache.SharedIndexInformer { ret := _m.Called() @@ -622,7 +622,7 @@ func (_m *NodeWatchFactory) LocalPodInformer() cache.SharedIndexInformer { return r0 } -// NADInformer provides a mock function with given fields: +// NADInformer provides a mock function with no fields func (_m *NodeWatchFactory) NADInformer() k8s_cni_cncf_iov1.NetworkAttachmentDefinitionInformer { ret := _m.Called() @@ -642,7 +642,7 @@ func (_m *NodeWatchFactory) NADInformer() k8s_cni_cncf_iov1.NetworkAttachmentDef return r0 } -// NamespaceInformer provides a mock function with given fields: +// NamespaceInformer provides a mock function with no fields func (_m *NodeWatchFactory) NamespaceInformer() informerscorev1.NamespaceInformer { ret := _m.Called() @@ -662,7 +662,7 @@ func (_m *NodeWatchFactory) NamespaceInformer() informerscorev1.NamespaceInforme return r0 } -// NodeCoreInformer provides a mock function with given fields: +// NodeCoreInformer provides a mock function with no fields func (_m *NodeWatchFactory) NodeCoreInformer() informerscorev1.NodeInformer { ret := _m.Called() @@ -682,7 +682,7 @@ func (_m *NodeWatchFactory) NodeCoreInformer() informerscorev1.NodeInformer { return r0 } -// NodeInformer provides a mock function with given fields: +// NodeInformer provides a mock function with no fields func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { ret := _m.Called() @@ -702,7 +702,7 @@ func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { return r0 } -// PodCoreInformer provides a mock function with given fields: +// PodCoreInformer provides a mock function with no fields func (_m *NodeWatchFactory) PodCoreInformer() informerscorev1.PodInformer { ret := _m.Called() @@ -742,7 +742,7 @@ func (_m *NodeWatchFactory) RemoveServiceHandler(handler *factory.Handler) { _m.Called(handler) } -// RouteAdvertisementsInformer provides a mock function with given fields: +// RouteAdvertisementsInformer provides a mock function with no fields func (_m *NodeWatchFactory) RouteAdvertisementsInformer() routeadvertisementsv1.RouteAdvertisementsInformer { ret := _m.Called() @@ -762,12 +762,12 @@ func (_m *NodeWatchFactory) RouteAdvertisementsInformer() routeadvertisementsv1. return r0 } -// Shutdown provides a mock function with given fields: +// Shutdown provides a mock function with no fields func (_m *NodeWatchFactory) Shutdown() { _m.Called() } -// Start provides a mock function with given fields: +// Start provides a mock function with no fields func (_m *NodeWatchFactory) Start() error { ret := _m.Called() @@ -785,7 +785,7 @@ func (_m *NodeWatchFactory) Start() error { return r0 } -// UserDefinedNetworkInformer provides a mock function with given fields: +// UserDefinedNetworkInformer provides a mock function with no fields func (_m *NodeWatchFactory) UserDefinedNetworkInformer() userdefinednetworkv1.UserDefinedNetworkInformer { ret := _m.Called() diff --git a/go-controller/pkg/factory/mocks/ObjectCacheInterface.go b/go-controller/pkg/factory/mocks/ObjectCacheInterface.go index b63736d685..8b6dcb5b7a 100644 --- a/go-controller/pkg/factory/mocks/ObjectCacheInterface.go +++ b/go-controller/pkg/factory/mocks/ObjectCacheInterface.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -15,7 +15,7 @@ type ObjectCacheInterface struct { mock.Mock } -// GetAllPods provides a mock function with given fields: +// GetAllPods provides a mock function with no fields func (_m *ObjectCacheInterface) GetAllPods() ([]*v1.Pod, error) { ret := _m.Called() @@ -75,7 +75,7 @@ func (_m *ObjectCacheInterface) GetNamespace(name string) (*v1.Namespace, error) return r0, r1 } -// GetNamespaces provides a mock function with given fields: +// GetNamespaces provides a mock function with no fields func (_m *ObjectCacheInterface) GetNamespaces() ([]*v1.Namespace, error) { ret := _m.Called() @@ -135,7 +135,7 @@ func (_m *ObjectCacheInterface) GetNode(name string) (*v1.Node, error) { return r0, r1 } -// GetNodes provides a mock function with given fields: +// GetNodes provides a mock function with no fields func (_m *ObjectCacheInterface) GetNodes() ([]*v1.Node, error) { ret := _m.Called() diff --git a/go-controller/pkg/generator/ip/ip_generator.go b/go-controller/pkg/generator/ip/ip_generator.go index 1344fc3d03..61d0a1fe45 100644 --- a/go-controller/pkg/generator/ip/ip_generator.go +++ b/go-controller/pkg/generator/ip/ip_generator.go @@ -5,6 +5,8 @@ import ( "math/big" "net" + iputils "github.com/containernetworking/plugins/pkg/ip" + utilnet "k8s.io/utils/net" ) @@ -40,3 +42,24 @@ func (ipGenerator *IPGenerator) GenerateIP(idx int) (*net.IPNet, error) { } return nil, fmt.Errorf("generated ip %s from the idx %d is out of range in the network %s", ip.String(), idx, ipGenerator.netCidr.String()) } + +// GenerateIPPair generates a pair of IPs from the base ip and the provided 'idx' +// For example, if the subnet was - 100.88.0.0/16 and the specified +// index is 1, it will return 100.88.0.2/31 and 100.88.0.3/31 +func (ipGenerator *IPGenerator) GenerateIPPair(idx int) (*net.IPNet, *net.IPNet, error) { + netMask := net.CIDRMask(31, 32) + if utilnet.IsIPv6CIDR(ipGenerator.netCidr) { + netMask = net.CIDRMask(127, 128) + } + numberOfIPs := 2 + // nodeIDs start from 1, netIP is the first IP of the subnet + firstIP := utilnet.AddIPOffset(ipGenerator.netBaseIP, idx*numberOfIPs) + if !ipGenerator.netCidr.Contains(firstIP) { + return nil, nil, fmt.Errorf("generated ip %s from the idx %d is out of range in the network %s", firstIP.String(), idx, ipGenerator.netCidr.String()) + } + secondIP := iputils.NextIP(firstIP) + if secondIP == nil || !ipGenerator.netCidr.Contains(secondIP) { + return nil, nil, fmt.Errorf("generated ip %s from the idx %d is out of range in the network %s", secondIP.String(), idx, ipGenerator.netCidr.String()) + } + return &net.IPNet{IP: firstIP, Mask: netMask}, &net.IPNet{IP: secondIP, Mask: netMask}, nil +} diff --git a/go-controller/pkg/generator/udn/join_ips.go b/go-controller/pkg/generator/udn/join_ips.go index 945799d245..f632365b37 100644 --- a/go-controller/pkg/generator/udn/join_ips.go +++ b/go-controller/pkg/generator/udn/join_ips.go @@ -88,3 +88,14 @@ func getGWRouterIP(subnet string, nodeID int) (*net.IPNet, error) { } return nodeGWRouterLRPIPGenerator.GenerateIP(nodeID) } + +func GetLastIPsFromJoinSubnet(netInfo util.NetInfo) []*net.IPNet { + var gwRouterAddrs []*net.IPNet + if config.IPv4Mode { + gwRouterAddrs = append(gwRouterAddrs, util.GetLastIPOfSubnet(netInfo.JoinSubnetV4(), 1)) + } + if config.IPv6Mode { + gwRouterAddrs = append(gwRouterAddrs, util.GetLastIPOfSubnet(netInfo.JoinSubnetV6(), 1)) + } + return gwRouterAddrs +} diff --git a/go-controller/pkg/generator/udn/masquerade_ips.go b/go-controller/pkg/generator/udn/masquerade_ips.go index 5882fb809e..3cec3c3833 100644 --- a/go-controller/pkg/generator/udn/masquerade_ips.go +++ b/go-controller/pkg/generator/udn/masquerade_ips.go @@ -86,3 +86,23 @@ func GetUDNGatewayMasqueradeIPs(networkID int) ([]*net.IPNet, error) { } return masqIPs, nil } + +// GetUDNMgmtPortMasqueradeIPs returns the list of management port masqueradeIPs for the given UDN's networkID +func GetUDNMgmtPortMasqueradeIPs(networkID int) ([]*net.IPNet, error) { + var masqIPs []*net.IPNet + if config.IPv4Mode { + v4MasqIPs, err := AllocateV4MasqueradeIPs(networkID) + if err != nil { + return nil, fmt.Errorf("failed to get v4 masquerade IP, networkID %d: %v", networkID, err) + } + masqIPs = append(masqIPs, v4MasqIPs.ManagementPort) + } + if config.IPv6Mode { + v6MasqIPs, err := AllocateV6MasqueradeIPs(networkID) + if err != nil { + return nil, fmt.Errorf("failed to get v6 masquerade IP, networkID %d: %v", networkID, err) + } + masqIPs = append(masqIPs, v6MasqIPs.ManagementPort) + } + return masqIPs, nil +} diff --git a/go-controller/pkg/kube/mocks/Annotator.go b/go-controller/pkg/kube/mocks/Annotator.go index 082d7c43fc..3bc47c92c1 100644 --- a/go-controller/pkg/kube/mocks/Annotator.go +++ b/go-controller/pkg/kube/mocks/Annotator.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -14,7 +14,7 @@ func (_m *Annotator) Delete(key string) { _m.Called(key) } -// Run provides a mock function with given fields: +// Run provides a mock function with no fields func (_m *Annotator) Run() error { ret := _m.Called() diff --git a/go-controller/pkg/kube/mocks/HTTPServer.go b/go-controller/pkg/kube/mocks/HTTPServer.go index 56f2f9ff9f..1998cb8716 100644 --- a/go-controller/pkg/kube/mocks/HTTPServer.go +++ b/go-controller/pkg/kube/mocks/HTTPServer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/kube/mocks/HTTPServerFactory.go b/go-controller/pkg/kube/mocks/HTTPServerFactory.go index dfe42ae0b7..be3dcb6df7 100644 --- a/go-controller/pkg/kube/mocks/HTTPServerFactory.go +++ b/go-controller/pkg/kube/mocks/HTTPServerFactory.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index 594d33d699..81dd42e2b3 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -17,7 +17,7 @@ type Interface struct { mock.Mock } -// Events provides a mock function with given fields: +// Events provides a mock function with no fields func (_m *Interface) Events() v1.EventInterface { ret := _m.Called() @@ -37,7 +37,7 @@ func (_m *Interface) Events() v1.EventInterface { return r0 } -// GetNode provides a mock function with given fields: name +// GetNodeForWindows provides a mock function with given fields: name func (_m *Interface) GetNodeForWindows(name string) (*corev1.Node, error) { ret := _m.Called(name) @@ -67,7 +67,7 @@ func (_m *Interface) GetNodeForWindows(name string) (*corev1.Node, error) { return r0, r1 } -// GetNodesForWindows provides a mock function with given fields: +// GetNodesForWindows provides a mock function with no fields func (_m *Interface) GetNodesForWindows() ([]*corev1.Node, error) { ret := _m.Called() @@ -97,7 +97,7 @@ func (_m *Interface) GetNodesForWindows() ([]*corev1.Node, error) { return r0, r1 } -// GetPods provides a mock function with given fields: namespace, opts +// GetPodsForDBChecker provides a mock function with given fields: namespace, opts func (_m *Interface) GetPodsForDBChecker(namespace string, opts metav1.ListOptions) ([]*corev1.Pod, error) { ret := _m.Called(namespace, opts) diff --git a/go-controller/pkg/kube/mocks/InterfaceOVN.go b/go-controller/pkg/kube/mocks/InterfaceOVN.go index 18e93ed800..0243889b2f 100644 --- a/go-controller/pkg/kube/mocks/InterfaceOVN.go +++ b/go-controller/pkg/kube/mocks/InterfaceOVN.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -71,7 +71,7 @@ func (_m *InterfaceOVN) DeleteCloudPrivateIPConfig(name string) error { return r0 } -// Events provides a mock function with given fields: +// Events provides a mock function with no fields func (_m *InterfaceOVN) Events() corev1.EventInterface { ret := _m.Called() @@ -91,7 +91,7 @@ func (_m *InterfaceOVN) Events() corev1.EventInterface { return r0 } -// GetEgressFirewalls provides a mock function with given fields: +// GetEgressFirewalls provides a mock function with no fields func (_m *InterfaceOVN) GetEgressFirewalls() ([]*egressfirewallv1.EgressFirewall, error) { ret := _m.Called() @@ -151,7 +151,7 @@ func (_m *InterfaceOVN) GetEgressIP(name string) (*egressipv1.EgressIP, error) { return r0, r1 } -// GetEgressIPs provides a mock function with given fields: +// GetEgressIPs provides a mock function with no fields func (_m *InterfaceOVN) GetEgressIPs() ([]*egressipv1.EgressIP, error) { ret := _m.Called() @@ -181,7 +181,7 @@ func (_m *InterfaceOVN) GetEgressIPs() ([]*egressipv1.EgressIP, error) { return r0, r1 } -// GetNode provides a mock function with given fields: name +// GetNodeForWindows provides a mock function with given fields: name func (_m *InterfaceOVN) GetNodeForWindows(name string) (*apicorev1.Node, error) { ret := _m.Called(name) @@ -211,12 +211,12 @@ func (_m *InterfaceOVN) GetNodeForWindows(name string) (*apicorev1.Node, error) return r0, r1 } -// GetNodesForWindows provides a mock function with given fields: +// GetNodesForWindows provides a mock function with no fields func (_m *InterfaceOVN) GetNodesForWindows() ([]*apicorev1.Node, error) { ret := _m.Called() if len(ret) == 0 { - panic("no return value specified for GetNodes") + panic("no return value specified for GetNodesForWindows") } var r0 []*apicorev1.Node @@ -241,7 +241,7 @@ func (_m *InterfaceOVN) GetNodesForWindows() ([]*apicorev1.Node, error) { return r0, r1 } -// GetPods provides a mock function with given fields: namespace, opts +// GetPodsForDBChecker provides a mock function with given fields: namespace, opts func (_m *InterfaceOVN) GetPodsForDBChecker(namespace string, opts metav1.ListOptions) ([]*apicorev1.Pod, error) { ret := _m.Called(namespace, opts) diff --git a/go-controller/pkg/kube/mocks/Listener.go b/go-controller/pkg/kube/mocks/Listener.go index dc8fda0074..5c3802c71c 100644 --- a/go-controller/pkg/kube/mocks/Listener.go +++ b/go-controller/pkg/kube/mocks/Listener.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/kube/mocks/Server.go b/go-controller/pkg/kube/mocks/Server.go index a91ea7cd89..6d911fd71b 100644 --- a/go-controller/pkg/kube/mocks/Server.go +++ b/go-controller/pkg/kube/mocks/Server.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/kubevirt/pod.go b/go-controller/pkg/kubevirt/pod.go index 8cde9d713e..2396af9045 100644 --- a/go-controller/pkg/kubevirt/pod.go +++ b/go-controller/pkg/kubevirt/pod.go @@ -15,6 +15,7 @@ import ( libovsdbclient "github.com/ovn-kubernetes/libovsdb/client" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -515,24 +516,29 @@ func (r *DefaultGatewayReconciler) ReconcileIPv4AfterLiveMigration(liveMigration if liveMigrationStatus.State != LiveMigrationTargetDomainReady { return nil } + var gwMAC net.HardwareAddr + if !config.Layer2UsesTransitRouter { + targetNode, err := r.watchFactory.GetNode(liveMigrationStatus.TargetPod.Spec.NodeName) + if err != nil { + return err + } - targetNode, err := r.watchFactory.GetNode(liveMigrationStatus.TargetPod.Spec.NodeName) - if err != nil { - return err - } + lrpJoinAddress, err := udn.GetGWRouterIPv4(targetNode, r.netInfo) + if err != nil { + return err + } - lrpJoinAddress, err := udn.GetGWRouterIPv4(targetNode, r.netInfo) - if err != nil { - return err + gwMAC = util.IPAddrToHWAddr(lrpJoinAddress) } - - lrpMAC := util.IPAddrToHWAddr(lrpJoinAddress) for _, subnet := range r.netInfo.Subnets() { gwIP := r.netInfo.GetNodeGatewayIP(subnet.CIDR).IP.To4() if gwIP == nil { continue } - garp := util.GARP{IP: gwIP, MAC: &lrpMAC} + if config.Layer2UsesTransitRouter { + gwMAC = util.IPAddrToHWAddr(gwIP) + } + garp := util.GARP{IP: gwIP, MAC: &gwMAC} if err := util.BroadcastGARP(r.interfaceName, garp); err != nil { return err } @@ -573,7 +579,7 @@ func (r *DefaultGatewayReconciler) ReconcileIPv6AfterLiveMigration(liveMigration ras := make([]ndp.RouterAdvertisement, 0, len(nodes)) for _, node := range nodes { - if node.Name == liveMigration.TargetPod.Spec.NodeName { + if !config.Layer2UsesTransitRouter && node.Name == liveMigration.TargetPod.Spec.NodeName { // skip the target node since this is the proper gateway continue } @@ -587,22 +593,54 @@ func (r *DefaultGatewayReconciler) ReconcileIPv6AfterLiveMigration(liveMigration // to signal the removal of the old default gateway. // NOTE: This is a workaround for the issue and may not be needed in the future, after // upgrading to a version that supports the new behavior. - ras = append(ras, newRouterAdvertisementFromJoinIPAndLifetime(nodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 0)) - } - targetNode, err := r.watchFactory.GetNode(liveMigration.TargetPod.Spec.NodeName) - if err != nil { - return fmt.Errorf("failed fetching node %q to reconcile ipv6 gateway: %w", liveMigration.TargetPod.Spec.NodeName, err) + ras = append(ras, newRouterAdvertisementFromIPAndLifetime(nodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 0)) } - targetNodeJoinAddrs, err := udn.GetGWRouterIPs(targetNode, r.netInfo) - if err != nil { - return ovntypes.NewSuppressedError(fmt.Errorf("failed parsing join addresss from live migration target node %q and network %q to reconcile ipv6 gateway: %w", targetNode.Name, r.netInfo.GetNetworkName(), err)) + if !config.Layer2UsesTransitRouter { + targetNode, err := r.watchFactory.GetNode(liveMigration.TargetPod.Spec.NodeName) + if err != nil { + return fmt.Errorf("failed fetching node %q to reconcile ipv6 gateway: %w", liveMigration.TargetPod.Spec.NodeName, err) + } + targetNodeJoinAddrs, err := udn.GetGWRouterIPs(targetNode, r.netInfo) + if err != nil { + return ovntypes.NewSuppressedError(fmt.Errorf("failed parsing join addresss from live migration target node %q and network %q to reconcile ipv6 gateway: %w", targetNode.Name, r.netInfo.GetNetworkName(), err)) + } + ras = append(ras, newRouterAdvertisementFromIPAndLifetime(targetNodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 65535)) + } else { + if len(targetPodAnnotation.Gateways) == 0 { + return fmt.Errorf("missing gateways to calculate ipv6 gateway reconciler RA") + } +<<<<<<< HEAD + // The LRP mac is calculated from the first address on the list. + gwIP := targetPodAnnotation.Gateways[0] + + // Create Prefix Information Option with IPv6 join subnet + prefixNet := r.netInfo.JoinSubnetV6() + if prefixNet == nil { + return fmt.Errorf("no IPv6 join subnet available for network %q", r.netInfo.GetNetworkName()) + } + + prefixInfo := ndp.PrefixInformation{ + Prefix: *prefixNet, + ValidLifetime: 0, + PreferredLifetime: 0, // IP lifetime 0 as requested + OnLink: true, + Autonomous: true, + } + + ras = append(ras, newRouterAdvertisementWithPrefixInfos(gwIP, destinationMAC, destinationIP.IP, 65535, []ndp.PrefixInformation{prefixInfo})) +======= + + // The LRP mac is calculated from the first address on the list. + gwIP := targetPodAnnotation.Gateways[0] + ras = append(ras, newRouterAdvertisementFromIPAndLifetime(gwIP, destinationMAC, destinationIP.IP, 65535)) +>>>>>>> 4d02af31e (kv: Reconcile transit router as gateway after LM) } - ras = append(ras, newRouterAdvertisementFromJoinIPAndLifetime(targetNodeJoinAddrs[0].IP, destinationMAC, destinationIP.IP, 65535)) + return ndp.SendRouterAdvertisements(r.interfaceName, ras...) } -// newRouterAdvertisementFromJoinIPAndLifetime creates a new Router Advertisement (RA) message -// using the provided join IP address, destination MAC, destination IP, and lifetime. +// newRouterAdvertisementFromIPAndLifetime creates a new Router Advertisement (RA) message +// using the provided IP address, destination MAC, destination IP, and lifetime. // // This function performs the following: // - Derives the source MAC address from the given IP using util.IPAddrToHWAddr. @@ -611,14 +649,25 @@ func (r *DefaultGatewayReconciler) ReconcileIPv6AfterLiveMigration(liveMigration // - Sets the RA message's lifetime to the specified value. // // Parameters: -// - ip: The join IP address used to derive the source MAC and LLA. +// - ip: The IP address used to derive the source MAC and LLA. // - destinationMAC: The MAC address to which the RA message will be sent. // - destinationIP: The IP address to which the RA message will be sent. // - lifetime: The lifetime value for the RA message, in seconds. // // Returns: // - An ndp.RouterAdvertisement object configured with the calculated source MAC, LLA, and the provided destination MAC, IP, and lifetime. -func newRouterAdvertisementFromJoinIPAndLifetime(ip net.IP, destinationMAC net.HardwareAddr, destinationIP net.IP, lifetime uint16) ndp.RouterAdvertisement { +func newRouterAdvertisementFromIPAndLifetime(ip net.IP, destinationMAC net.HardwareAddr, destinationIP net.IP, lifetime uint16) ndp.RouterAdvertisement { + sourceMAC := util.IPAddrToHWAddr(ip) + return ndp.RouterAdvertisement{ + SourceMAC: sourceMAC, + SourceIP: util.HWAddrToIPv6LLA(sourceMAC), + DestinationMAC: destinationMAC, + DestinationIP: destinationIP, + Lifetime: lifetime, + } +} + +func newRouterAdvertisementWithPrefixInfos(ip net.IP, destinationMAC net.HardwareAddr, destinationIP net.IP, lifetime uint16, prefixInfos []ndp.PrefixInformation) ndp.RouterAdvertisement { sourceMAC := util.IPAddrToHWAddr(ip) return ndp.RouterAdvertisement{ SourceMAC: sourceMAC, @@ -626,5 +675,6 @@ func newRouterAdvertisementFromJoinIPAndLifetime(ip net.IP, destinationMAC net.H DestinationMAC: destinationMAC, DestinationIP: destinationIP, Lifetime: lifetime, + PrefixInfos: prefixInfos, } } diff --git a/go-controller/pkg/libovsdb/ops/router.go b/go-controller/pkg/libovsdb/ops/router.go index 5f0ce594d4..2db149b352 100644 --- a/go-controller/pkg/libovsdb/ops/router.go +++ b/go-controller/pkg/libovsdb/ops/router.go @@ -443,6 +443,42 @@ func CreateOrAddNextHopsToLogicalRouterPolicyWithPredicateOps(nbClient libovsdbc return m.CreateOrUpdateOps(ops, opModels...) } +func ReplaceNextHopsForLogicalRouterPolicyWithPredicateOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, p logicalRouterPolicyPredicate, + oldNextHop, newNextHop string) ([]ovsdb.Operation, error) { + lrps, err := FindLogicalRouterPoliciesWithPredicate(nbClient, p) + if err != nil { + return nil, err + } + for _, lrp := range lrps { + lrp.Nexthops = []string{oldNextHop} + opModel := operationModel{ + Model: lrp, + OnModelMutations: []interface{}{&lrp.Nexthops}, + ErrNotFound: false, + BulkOp: false, + } + + m := newModelClient(nbClient) + var err error + ops, err = m.DeleteOps(ops, opModel) + if err != nil { + return nil, fmt.Errorf("failed to get delete old nexthop %s ops: %w", oldNextHop, err) + } + lrp.Nexthops = []string{newNextHop} + opModel = operationModel{ + Model: lrp, + OnModelMutations: []interface{}{&lrp.Nexthops}, + ErrNotFound: false, + BulkOp: true, + } + ops, err = m.CreateOrUpdateOps(ops, opModel) + if err != nil { + return nil, fmt.Errorf("failed to get delete old nexthop %s ops: %w", oldNextHop, err) + } + } + return ops, nil +} + // DeleteNextHopsFromLogicalRouterPolicyOps removes the Nexthops from the // provided logical router policies. func DeleteNextHopsFromLogicalRouterPolicyOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, routerName string, lrps []*nbdb.LogicalRouterPolicy, nextHops ...string) ([]ovsdb.Operation, error) { diff --git a/go-controller/pkg/libovsdb/util/router.go b/go-controller/pkg/libovsdb/util/router.go index b316fea0e3..6d301b19f8 100644 --- a/go-controller/pkg/libovsdb/util/router.go +++ b/go-controller/pkg/libovsdb/util/router.go @@ -33,7 +33,8 @@ import ( // (TODO: FIXME): With this route, we are officially breaking support for IC with zones that have multiple-nodes // NOTE: This route is exactly the same as what is added by pod-live-migration feature and we keep the route exactly // same across the 3 features so that if the route already exists on the node, this is just a no-op -func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, clusterSubnets []config.CIDRNetworkEntry, gatewayIPs []*net.IPNet) error { +func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter, gwRouterName string, + clusterSubnets []config.CIDRNetworkEntry, gatewayIPs []*net.IPNet) error { for _, clusterSubnet := range clusterSubnets { isClusterSubnetIPV6 := utilnet.IsIPv6String(clusterSubnet.CIDR.IP.String()) gatewayIP, err := util.MatchFirstIPNetFamily(isClusterSubnetIPV6, gatewayIPs) diff --git a/go-controller/pkg/networkmanager/api.go b/go-controller/pkg/networkmanager/api.go index 79f131da71..79a19415dd 100644 --- a/go-controller/pkg/networkmanager/api.go +++ b/go-controller/pkg/networkmanager/api.go @@ -6,6 +6,7 @@ import ( "k8s.io/client-go/tools/record" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -69,6 +70,7 @@ func NewForCluster( wf watchFactory, ovnClient *util.OVNClusterManagerClientset, recorder record.EventRecorder, + tunnelKeysAllocator *id.TunnelKeysAllocator, ) (Controller, error) { return new( "clustermanager-nad-controller", @@ -78,6 +80,7 @@ func NewForCluster( wf, ovnClient, recorder, + tunnelKeysAllocator, ) } @@ -95,6 +98,7 @@ func NewForZone( wf, nil, nil, + nil, ) } @@ -112,6 +116,7 @@ func NewForNode( wf, nil, nil, + nil, ) } @@ -126,8 +131,9 @@ func new( wf watchFactory, ovnClient *util.OVNClusterManagerClientset, recorder record.EventRecorder, + tunnelKeysAllocator *id.TunnelKeysAllocator, ) (Controller, error) { - return newController(name, zone, node, cm, wf, ovnClient, recorder) + return newController(name, zone, node, cm, wf, ovnClient, recorder, tunnelKeysAllocator) } // ControllerManager manages controllers. Needs to be provided in order to build diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index 78c0fea60e..62c3c3c7af 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -73,8 +73,9 @@ type nadController struct { // primaryNADs holds a mapping of namespace to NAD of primary UDNs primaryNADs map[string]string - networkIDAllocator id.Allocator - nadClient nadclientset.Interface + networkIDAllocator id.Allocator + tunnelKeysAllocator *id.TunnelKeysAllocator + nadClient nadclientset.Interface } func newController( @@ -85,6 +86,7 @@ func newController( wf watchFactory, ovnClient *util.OVNClusterManagerClientset, recorder record.EventRecorder, + tunnelKeysAllocator *id.TunnelKeysAllocator, ) (*nadController, error) { c := &nadController{ name: fmt.Sprintf("[%s NAD controller]", name), @@ -100,7 +102,7 @@ func newController( c.nadClient = ovnClient.NetworkAttchDefClient } - // this is cluster network manager, so we allocate network IDs + // this is cluster network manager, so we allocate network IDs and tunnel keys if zone == "" && node == "" { c.networkIDAllocator = id.NewIDAllocator("NetworkIDs", MaxNetworks) // Reserve the ID of the default network @@ -108,6 +110,8 @@ func newController( if err != nil { return nil, fmt.Errorf("failed to allocate default network ID: %w", err) } + // tunnelKeysAllocator must be passed for cluster manager + c.tunnelKeysAllocator = tunnelKeysAllocator } config := &controller.ControllerConfig[nettypes.NetworkAttachmentDefinition]{ @@ -348,7 +352,7 @@ func (c *nadController) syncNAD(key string, nad *nettypes.NetworkAttachmentDefin } } - if err := c.handleNetworkID(oldNetwork, ensureNetwork, nad); err != nil { + if err := c.handleNetworkAnnotations(oldNetwork, ensureNetwork, nad); err != nil { return err } @@ -565,22 +569,21 @@ func (c *nadController) DoWithLock(f func(network util.NetInfo) error) error { return errors.Join(errs...) } -// handleNetworkID finds out what the network ID should be for a new network and -// sets it on 'new'. The network ID is primarily found annotated in the NAD. If -// not annotated, it means it is still to be allocated. If this is not the NAD -// controller running in cluster manager, then we don't do anything as we are -// expected to wait until it happens. If this is the NAD controller running in -// cluster manager then a new ID is allocated and annotated on the NAD. The NAD -// controller running in cluster manager also releases here the network ID of a -// network that is being deleted. -func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInfo, nad *nettypes.NetworkAttachmentDefinition) error { +// handleNetworkAnnotations assigns or reads info from the NAD annotations. +// We store network ID and tunnel keys in the AND annotation. This function +// finds out what these values should be for a new network and +// sets it on 'new'. If not annotated, it means it is still to be allocated. +// If this is not the NAD controller running in cluster manager, then we don't +// do anything as we are expected to wait until it happens. +// If this is the NAD controller running in cluster manager then a new ID +// is allocated and annotated on the NAD. The NAD controller running in +// cluster manager also releases here the network ID of a network that is being deleted. +func (c *nadController) handleNetworkAnnotations(old util.NetInfo, new util.MutableNetInfo, nad *nettypes.NetworkAttachmentDefinition) (err error) { if new != nil && new.IsDefault() { return nil } - var err error id := types.InvalidID - // check what ID is currently annotated if nad != nil && nad.Annotations[types.OvnNetworkIDAnnotation] != "" { annotated := nad.Annotations[types.OvnNetworkIDAnnotation] @@ -590,11 +593,21 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf } } + tunnelKeys := []int{} + // check what tunnel keys are currently annotated + if nad != nil && nad.Annotations[types.OvnNetworkTunnelKeysAnnotation] != "" { + tunnelKeys, err = util.ParseTunnelKeysAnnotation(nad.Annotations[types.OvnNetworkTunnelKeysAnnotation]) + if err != nil { + return fmt.Errorf("failed to parse annotated tunnel keys: %w", err) + } + } + // this is not the cluster manager nad controller and we are not allocating // so just return what we got from the annotation if c.networkIDAllocator == nil { if new != nil { new.SetNetworkID(id) + new.SetTunnelKeys(tunnelKeys) } return nil } @@ -602,6 +615,7 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf // release old ID if the network is being deleted if old != nil && !old.IsDefault() && len(old.GetNADs()) == 0 { c.networkIDAllocator.ReleaseID(old.GetNetworkName()) + c.tunnelKeysAllocator.ReleaseKeys(old.GetNetworkName()) } // nothing to allocate @@ -610,7 +624,7 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf } name := new.GetNetworkName() - // an ID was annotated, check if it is free to use or stale + // a network ID was annotated, check if it is free to use or stale if id != types.InvalidID { err = c.networkIDAllocator.ReserveID(name, id) if err != nil { @@ -618,27 +632,56 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf id = types.InvalidID } } + // tunnel key annotation doesn't need the same check ^ because it is initialized outside the + // nad controller and has already assured that all annotated tunnel keys are reserved. + // we are about to allocate resources, so prepare a cleanup function + // in case of error to release them. + var allocatedNetworkID, allocatedTunnelKeys bool + defer func() { + if err != nil { + if allocatedNetworkID { + c.networkIDAllocator.ReleaseID(name) + } + if allocatedTunnelKeys { + c.tunnelKeysAllocator.ReleaseKeys(name) + } + } + }() // we don't have an ID, allocate a new one if id == types.InvalidID { id, err = c.networkIDAllocator.AllocateID(name) if err != nil { return fmt.Errorf("failed to allocate network ID: %w", err) } + allocatedNetworkID = true // check if there is still a network running with that ID in the process // of being stopped other := c.networkController.getRunningNetwork(id) if other != "" && c.networkController.getNetwork(other) == nil { - c.networkIDAllocator.ReleaseID(name) return fmt.Errorf("found other network %s being stopped with allocated ID %d, will retry", other, id) } } + // allocate tunnel keys + if len(tunnelKeys) != getNumberOfTunnelKeys(new) { + tunnelKeys, err = c.tunnelKeysAllocator.AllocateKeys(name, id, getNumberOfTunnelKeys(new)) + if err != nil { + return fmt.Errorf("failed to allocate tunnel keys: %w", err) + } + allocatedTunnelKeys = true + } + // set and annotate the network ID + tunnelKeyAnno, err := util.FormatTunnelKeysAnnotation(tunnelKeys) + if err != nil { + return fmt.Errorf("failed to format tunnel keys annotation: %w", err) + } annotations := map[string]string{ - types.OvnNetworkNameAnnotation: name, - types.OvnNetworkIDAnnotation: strconv.Itoa(id), + types.OvnNetworkNameAnnotation: name, + types.OvnNetworkIDAnnotation: strconv.Itoa(id), + types.OvnNetworkTunnelKeysAnnotation: tunnelKeyAnno, } if nad.Annotations[types.OvnNetworkNameAnnotation] == annotations[types.OvnNetworkNameAnnotation] { delete(annotations, types.OvnNetworkNameAnnotation) @@ -646,8 +689,12 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf if nad.Annotations[types.OvnNetworkIDAnnotation] == annotations[types.OvnNetworkIDAnnotation] { delete(annotations, types.OvnNetworkIDAnnotation) } + if nad.Annotations[types.OvnNetworkTunnelKeysAnnotation] == annotations[types.OvnNetworkTunnelKeysAnnotation] { + delete(annotations, types.OvnNetworkTunnelKeysAnnotation) + } if len(annotations) == 0 { new.SetNetworkID(id) + new.SetTunnelKeys(tunnelKeys) return nil } @@ -662,10 +709,10 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf c.name, ) if err != nil { - c.networkIDAllocator.ReleaseID(name) - return fmt.Errorf("failed to annotate network ID on NAD: %w", err) + return fmt.Errorf("failed to annotate network ID and/or tunnel keys on NAD: %w", err) } new.SetNetworkID(id) + new.SetTunnelKeys(tunnelKeys) return nil } @@ -679,3 +726,18 @@ func (c *nadController) GetActiveNetwork(network string) util.NetInfo { } return state.controller } + +func getNumberOfTunnelKeys(netInfo util.NetInfo) int { + if netInfo.IsDefault() { + // default network does not need tunnel keys allocation because it always uses network ID 0. + return 0 + } + // Layer3, Secondary Layer2 and Localnet topologies need only 1 tunnel key for now that is derived from the network ID + // and is limited by the MaxNetworks. Don't annotate any tunnel keys in that case until we decide to + // increase the MaxNetworks. + if netInfo.TopologyType() != types.Layer2Topology || !netInfo.IsPrimaryNetwork() { + return 0 + } + // Primary Layer2 UDNs need 2 tunnel keys: one for the layer2 switch and one for the transit router + return 2 +} diff --git a/go-controller/pkg/networkmanager/nad_controller_test.go b/go-controller/pkg/networkmanager/nad_controller_test.go index 1ce5ad9168..6e083b785f 100644 --- a/go-controller/pkg/networkmanager/nad_controller_test.go +++ b/go-controller/pkg/networkmanager/nad_controller_test.go @@ -139,9 +139,10 @@ func TestNADController(t *testing.T) { Name: "networkAPrimary", Type: "ovn-k8s-cni-overlay", }, - Subnets: "10.1.130.0/24", - Role: types.NetworkRolePrimary, - MTU: 1400, + Subnets: "10.1.130.0/24", + TransitSubnet: config.ClusterManager.V4TransitSubnet, + Role: types.NetworkRolePrimary, + MTU: 1400, } networkAIncompatible := &ovncnitypes.NetConf{ Topology: types.LocalnetTopology, @@ -500,12 +501,13 @@ func TestNADController(t *testing.T) { } fakeClient := util.GetOVNClientset().GetClusterManagerClientset() nadController := &nadController{ - nads: map[string]string{}, - primaryNADs: map[string]string{}, - networkController: newNetworkController("", "", "", tcm, nil), - networkIDAllocator: id.NewIDAllocator("NetworkIDs", MaxNetworks), - nadClient: fakeClient.NetworkAttchDefClient, - namespaceLister: &fakeNamespaceLister{}, + nads: map[string]string{}, + primaryNADs: map[string]string{}, + networkController: newNetworkController("", "", "", tcm, nil), + networkIDAllocator: id.NewIDAllocator("NetworkIDs", MaxNetworks), + tunnelKeysAllocator: id.NewTunnelKeyAllocator("TunnelKeys"), + nadClient: fakeClient.NetworkAttchDefClient, + namespaceLister: &fakeNamespaceLister{}, } err = nadController.networkIDAllocator.ReserveID(types.DefaultNetworkName, types.DefaultNetworkID) g.Expect(err).ToNot(gomega.HaveOccurred()) @@ -563,7 +565,11 @@ func TestNADController(t *testing.T) { id, err := nadController.networkIDAllocator.AllocateID(name) g.Expect(err).ToNot(gomega.HaveOccurred()) g.Expect(netController.networks[name].GetNetworkID()).To(gomega.Equal(id)) - + if netInfo.TopologyType() == types.Layer2Topology && netInfo.IsPrimaryNetwork() { + tunnelKeys, err := nadController.tunnelKeysAllocator.AllocateKeys(name, id, 2) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(netController.networks[name].GetTunnelKeys()).To(gomega.Equal(tunnelKeys)) + } // test that the actual controllers have the expected config and NADs if !netInfo.IsDefault() { g.Expect(tcm.controllers).To(gomega.HaveKey(testNetworkKey)) @@ -696,6 +702,7 @@ func TestSyncAll(t *testing.T) { wf, fakeClient, nil, + id.NewTunnelKeyAllocator("TunnelKeys"), ) g.Expect(err).ToNot(gomega.HaveOccurred()) @@ -778,6 +785,62 @@ func TestSyncAll(t *testing.T) { } } +func TestResourceCleanup(t *testing.T) { + g := gomega.NewWithT(t) + err := config.PrepareTestConfig() + g.Expect(err).ToNot(gomega.HaveOccurred()) + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + tcm := &testControllerManager{ + controllers: map[string]NetworkController{}, + defaultNetwork: &testNetworkController{ + ReconcilableNetInfo: &util.DefaultNetInfo{}, + }, + } + fakeClient := util.GetOVNClientset().GetClusterManagerClientset() + nadController := &nadController{ + nads: map[string]string{}, + primaryNADs: map[string]string{}, + networkController: newNetworkController("", "", "", tcm, nil), + networkIDAllocator: id.NewIDAllocator("NetworkIDs", MaxNetworks), + tunnelKeysAllocator: id.NewTunnelKeyAllocator("TunnelKeys"), + nadClient: fakeClient.NetworkAttchDefClient, + namespaceLister: &fakeNamespaceLister{}, + } + err = nadController.networkIDAllocator.ReserveID(types.DefaultNetworkName, types.DefaultNetworkID) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(nadController.networkController.Start()).To(gomega.Succeed()) + defer nadController.networkController.Stop() + + nadNs := "test" + nadName := "nad_1" + nadKey := nadNs + "/" + nadName + networkAPrimary := &ovncnitypes.NetConf{ + Topology: types.Layer2Topology, + NetConf: cnitypes.NetConf{ + Name: "networkAPrimary", + Type: "ovn-k8s-cni-overlay", + }, + Subnets: "10.1.130.0/24", + Role: types.NetworkRolePrimary, + MTU: 1400, + NADName: nadKey, + } + nad, err := buildNAD(nadName, nadNs, networkAPrimary) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // make annotation update fail (nad doesn't exist), make sure networkID and tunnel keys are released + err = nadController.syncNAD(nadKey, nad) + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("failed to annotate network ID and/or tunnel keys")) + // we know the allocated network ID was 1 and tunnelKeys were [16711684, 16715779] (first available IDs after Default network) + // try to reserve these exact IDs for a different network to make sure they were released + err = nadController.networkIDAllocator.ReserveID("networkB", 1) + g.Expect(err).ToNot(gomega.HaveOccurred()) + err = nadController.tunnelKeysAllocator.ReserveKeys("networkB", []int{16711684, 16715779}) + g.Expect(err).ToNot(gomega.HaveOccurred()) +} + func buildNAD(name, namespace string, network *ovncnitypes.NetConf) (*nettypes.NetworkAttachmentDefinition, error) { config, err := json.Marshal(network) if err != nil { diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 7e1f330937..a7c5f20dff 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -39,7 +39,7 @@ import ( nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" - nodemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" + mgmtportmock "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport" linkMock "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -777,7 +777,7 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, // FIXME(mk): starting the gateway causing go routines to be spawned within sub functions and therefore they escape the // netns we wanted to set it to originally here. Refactor test cases to not spawn a go routine or just fake out everything // and remove need to create netns - mpmock := &nodemocks.ManagementPort{} + mpmock := &mgmtportmock.Interface{} err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() diff --git a/go-controller/pkg/node/node_ip_handler_linux_test.go b/go-controller/pkg/node/node_ip_handler_linux_test.go index aa819cdb8a..c78307cca1 100644 --- a/go-controller/pkg/node/node_ip_handler_linux_test.go +++ b/go-controller/pkg/node/node_ip_handler_linux_test.go @@ -24,7 +24,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/bridgeconfig" nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" - nodemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" + mgmtportmock "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -399,7 +399,7 @@ func configureKubeOVNContext(nodeName string, useNetlink bool) *testCtx { _ = nodenft.SetFakeNFTablesHelper() - mpmock := &nodemocks.ManagementPort{} + mpmock := &mgmtportmock.Interface{} mpmock.On("GetAddresses").Return([]*net.IPNet{tc.mgmtPortIP4, tc.mgmtPortIP6}) fakeBridgeConfiguration := bridgeconfig.TestBridgeConfig("breth0") diff --git a/go-controller/pkg/ovn/address_set/mocks/AddressSet.go b/go-controller/pkg/ovn/address_set/mocks/AddressSet.go index f5dd89448f..abaaf6f47f 100644 --- a/go-controller/pkg/ovn/address_set/mocks/AddressSet.go +++ b/go-controller/pkg/ovn/address_set/mocks/AddressSet.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -108,7 +108,7 @@ func (_m *AddressSet) DeleteAddressesReturnOps(addresses []string) ([]ovsdb.Oper return r0, r1 } -// Destroy provides a mock function with given fields: +// Destroy provides a mock function with no fields func (_m *AddressSet) Destroy() error { ret := _m.Called() @@ -126,7 +126,7 @@ func (_m *AddressSet) Destroy() error { return r0 } -// GetASHashNames provides a mock function with given fields: +// GetASHashNames provides a mock function with no fields func (_m *AddressSet) GetASHashNames() (string, string) { ret := _m.Called() @@ -154,7 +154,7 @@ func (_m *AddressSet) GetASHashNames() (string, string) { return r0, r1 } -// GetAddresses provides a mock function with given fields: +// GetAddresses provides a mock function with no fields func (_m *AddressSet) GetAddresses() ([]string, []string) { ret := _m.Called() @@ -186,7 +186,7 @@ func (_m *AddressSet) GetAddresses() ([]string, []string) { return r0, r1 } -// GetName provides a mock function with given fields: +// GetName provides a mock function with no fields func (_m *AddressSet) GetName() string { ret := _m.Called() diff --git a/go-controller/pkg/ovn/address_set/mocks/AddressSetDoFunc.go b/go-controller/pkg/ovn/address_set/mocks/AddressSetDoFunc.go deleted file mode 100644 index 88385d570c..0000000000 --- a/go-controller/pkg/ovn/address_set/mocks/AddressSetDoFunc.go +++ /dev/null @@ -1,42 +0,0 @@ -// Code generated by mockery v2.16.0. DO NOT EDIT. - -package mocks - -import ( - addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" - mock "github.com/stretchr/testify/mock" -) - -// AddressSetDoFunc is an autogenerated mock type for the AddressSetDoFunc type -type AddressSetDoFunc struct { - mock.Mock -} - -// Execute provides a mock function with given fields: as -func (_m *AddressSetDoFunc) Execute(as addressset.AddressSet) error { - ret := _m.Called(as) - - var r0 error - if rf, ok := ret.Get(0).(func(addressset.AddressSet) error); ok { - r0 = rf(as) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -type mockConstructorTestingTNewAddressSetDoFunc interface { - mock.TestingT - Cleanup(func()) -} - -// NewAddressSetDoFunc creates a new instance of AddressSetDoFunc. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewAddressSetDoFunc(t mockConstructorTestingTNewAddressSetDoFunc) *AddressSetDoFunc { - mock := &AddressSetDoFunc{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/go-controller/pkg/ovn/address_set/mocks/AddressSetFactory.go b/go-controller/pkg/ovn/address_set/mocks/AddressSetFactory.go index 0d18215185..b226488211 100644 --- a/go-controller/pkg/ovn/address_set/mocks/AddressSetFactory.go +++ b/go-controller/pkg/ovn/address_set/mocks/AddressSetFactory.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/ovn/address_set/mocks/AddressSetIterFunc.go b/go-controller/pkg/ovn/address_set/mocks/AddressSetIterFunc.go index 1c9ff3de62..6657de1d2e 100644 --- a/go-controller/pkg/ovn/address_set/mocks/AddressSetIterFunc.go +++ b/go-controller/pkg/ovn/address_set/mocks/AddressSetIterFunc.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/ovn/address_set/mocks/removeFunc.go b/go-controller/pkg/ovn/address_set/mocks/removeFunc.go index 044f4b440a..d408953a3a 100644 --- a/go-controller/pkg/ovn/address_set/mocks/removeFunc.go +++ b/go-controller/pkg/ovn/address_set/mocks/removeFunc.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index ea72526b10..2ed5a3a959 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -383,6 +383,13 @@ func (bnc *BaseNetworkController) getOVNClusterRouterPortToJoinSwitchIfAddrs() ( return gwLRPIPs, nil } +func (bnc *BaseNetworkController) getCRToSwitchPortName(switchName string) string { + if bnc.TopologyType() == types.Layer2Topology { + return types.TransitRouterToSwitchPrefix + switchName + } + return types.RouterToSwitchPrefix + switchName +} + // syncNodeClusterRouterPort ensures a node's LS to the cluster router's LRP is created. // NOTE: We could have created the router port in createNodeLogicalSwitch() instead of here, // but chassis ID is not available at that moment. We need the chassis ID to set the @@ -412,9 +419,9 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h } } - switchName := bnc.GetNetworkScopedName(node.Name) + switchName := bnc.GetNetworkScopedSwitchName(node.Name) logicalRouterName := bnc.GetNetworkScopedClusterRouterName() - lrpName := types.RouterToSwitchPrefix + switchName + lrpName := bnc.getCRToSwitchPortName(switchName) lrpNetworks := []string{} for _, hostSubnet := range hostSubnets { gwIfAddr := bnc.GetNodeGatewayIP(hostSubnet) @@ -440,6 +447,22 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h ChassisName: chassisID, Priority: 1, } + _, isNetIPv6 := bnc.IPMode() + if bnc.TopologyType() == types.Layer2Topology && + isNetIPv6 && + util.IsNetworkSegmentationSupportEnabled() && + bnc.IsPrimaryNetwork() { + logicalRouterPort.Ipv6RaConfigs = map[string]string{ + "address_mode": "dhcpv6_stateful", + "send_periodic": "true", + "max_interval": "900", // 15 minutes + "min_interval": "300", // 5 minutes + "router_preference": "LOW", // The static gateway configured by CNI is MEDIUM, so make this SLOW so it has less effect for pods + } + if bnc.MTU() > 0 { + logicalRouterPort.Ipv6RaConfigs["mtu"] = fmt.Sprintf("%d", bnc.MTU()) + } + } err = libovsdbops.CreateOrUpdateLogicalRouterPort(bnc.nbClient, &logicalRouter, &logicalRouterPort, &gatewayChassis, &logicalRouterPort.MAC, &logicalRouterPort.Networks, &logicalRouterPort.Options) @@ -450,7 +473,8 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h if util.IsNetworkSegmentationSupportEnabled() && bnc.IsPrimaryNetwork() && !config.OVNKubernetesFeature.EnableInterconnect && - bnc.TopologyType() == types.Layer3Topology { + (bnc.TopologyType() == types.Layer3Topology || + bnc.TopologyType() == types.Layer2Topology) { // since in nonIC the ovn_cluster_router is distributed, we must specify the gatewayPort for the // conditional SNATs to signal OVN which gatewayport should be chosen if there are mutiple distributed // gateway ports. Now that the LRP is created, let's update the NATs to reflect that. diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 421314586b..fa9931dbba 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -11,6 +11,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + zoneinterconnect "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/zone_interconnect" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" @@ -179,7 +180,14 @@ func (oc *BaseLayer2UserDefinedNetworkController) initializeLogicalSwitch(switch } if oc.isLayer2Interconnect() { - err := oc.zoneICHandler.AddTransitSwitchConfig(&logicalSwitch) + tunnelKey := zoneinterconnect.BaseTransitSwitchTunnelKey + oc.GetNetworkID() + if config.Layer2UsesTransitRouter && oc.IsPrimaryNetwork() { + if len(oc.GetTunnelKeys()) != 2 { + return nil, fmt.Errorf("layer2 network %s with transit router enabled requires exactly 2 tunnel keys, got: %v", oc.GetNetworkName(), oc.GetTunnelKeys()) + } + tunnelKey = oc.GetTunnelKeys()[0] + } + err := oc.zoneICHandler.AddTransitSwitchConfig(&logicalSwitch, tunnelKey) if err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 3f4275e028..8d8cdbcd80 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -779,7 +779,7 @@ func (c *Controller) cleanupUDNEnabledServiceRoute(key string) error { var ops []ovsdb.Operation var err error - if c.netInfo.TopologyType() == types.Layer2Topology { + if c.netInfo.TopologyType() == types.Layer2Topology && !globalconfig.Layer2UsesTransitRouter { for _, node := range c.nodeInfos { if ops, err = libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicateOps(c.nbClient, ops, c.netInfo.GetNetworkScopedGWRouterName(node.name), delPredicate); err != nil { return err @@ -824,7 +824,7 @@ func (c *Controller) configureUDNEnabledServiceRoute(service *corev1.Service) er ExternalIDs: extIDs, } routerName := c.netInfo.GetNetworkScopedClusterRouterName() - if c.netInfo.TopologyType() == types.Layer2Topology { + if c.netInfo.TopologyType() == types.Layer2Topology && !globalconfig.Layer2UsesTransitRouter { routerName = nodeInfo.gatewayRouterName } ops, err = libovsdbops.CreateOrUpdateLogicalRouterStaticRoutesWithPredicateOps(c.nbClient, nil, routerName, &staticRoute, func(item *nbdb.LogicalRouterStaticRoute) bool { diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 31a48d0c99..4dc5a42f19 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -174,13 +174,14 @@ type EgressIPController struct { // used as a locking mechanism to serialize egress IP processing on a per egress IP basis // the order of locking should always be egressIPCache, then podAssignment, then nodeZoneState egressIPCache *syncmap.SyncMap[bool] - // nodeUpdateMutex is used for two reasons: + // nodeUpdateMutex is used for three reasons: // (1) to ensure safe handling of node ip address updates. VIP addresses are // dynamic and might move across nodes. // (2) used in ensureDefaultNoRerouteQoSRules function to ensure // creating QoS rules is thread safe since otherwise when two nodes are added // at the same time by two different threads we end up creating duplicate // QoS rules in database due to libovsdb cache race + // (3) to update nextHop during layer2 topology upgrade nodeUpdateMutex *sync.Mutex // podAssignment is a cache used for keeping track of which egressIP status // has been set up for each pod. The key is defined by getPodKey @@ -1515,14 +1516,9 @@ func (e *EgressIPController) syncPodAssignmentCache(egressIPCache egressIPCache) if ni == nil { return fmt.Errorf("failed to get active network for network name %q", networkName) } - routerName := ni.GetNetworkScopedClusterRouterName() - if ni.TopologyType() == types.Layer2Topology { - // no support for multiple Nodes per OVN zone, therefore pick the first local zone node - localNodeName, err := e.getALocalZoneNodeName() - if err != nil { - return err - } - routerName = ni.GetNetworkScopedGWRouterName(localNodeName) + routerName, err := e.getTopologyScopedLocalZoneRouterName(ni) + if err != nil { + return err } reRoutePolicies, err := libovsdbops.FindALogicalRouterPoliciesWithPredicate(e.nbClient, routerName, p1) if err != nil { @@ -1864,7 +1860,7 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { if localZoneNodes.Len() > 0 { localNodeName = localZoneNodes.UnsortedList()[0] } - routerName, err := getTopologyScopedRouterName(ni, localNodeName) + routerName, err := e.getTopologyScopedRouterName(ni, localNodeName) if err != nil { klog.Errorf("Failed to get network topology scoped router name for network %s attached to namespace %s, stale objects may remain: %v", ni.GetNetworkName(), namespace.Name, err) @@ -1903,14 +1899,14 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { if localZoneNodes.Has(node.Name) { if e.v4 { - if gatewayRouterIP, err := e.getGatewayNextHop(ni, node.Name, false); err != nil { + if gatewayRouterIP, err := e.getGatewayNextHop(ni, node, false); err != nil { klog.V(5).Infof("Unable to retrieve gateway IP for node: %s, protocol is IPv4: err: %v", node.Name, err) } else { r.v4Gateway = gatewayRouterIP.String() } } if e.v6 { - if gatewayRouterIP, err := e.getGatewayNextHop(ni, node.Name, true); err != nil { + if gatewayRouterIP, err := e.getGatewayNextHop(ni, node, true); err != nil { klog.V(5).Infof("Unable to retrieve gateway IP for node: %s, protocol is IPv6: err: %v", node.Name, err) } else { r.v6Gateway = gatewayRouterIP.String() @@ -2141,7 +2137,7 @@ func (e *EgressIPController) initClusterEgressPolicies(_ []interface{}) error { if len(subnets) == 0 { return nil } - routerName, err := getTopologyScopedRouterName(network, localNodeName) + routerName, err := e.getTopologyScopedRouterName(network, localNodeName) if err != nil { return err } @@ -2400,8 +2396,8 @@ func (e *EgressIPController) addPodEgressIPAssignment(ni util.NetInfo, egressIPN return fmt.Errorf("unable to create NAT rule ops for status: %v, err: %v", status, err) } - } else if ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer3Topology { - // not required for L2 because we always have LRPs using reroute action to pkt mark + } else if ni.IsUserDefinedNetwork() && (ni.TopologyType() == types.Layer3Topology || + ni.TopologyType() == types.Layer2Topology && config.Layer2UsesTransitRouter) { ops, err = e.createGWMarkPolicyOps(ni, ops, podIPs, status, mark, pod.Namespace, pod.Name, egressIPName) if err != nil { return fmt.Errorf("unable to create GW router LRP ops to packet mark pod %s/%s: %v", pod.Namespace, pod.Name, err) @@ -2411,7 +2407,7 @@ func (e *EgressIPController) addPodEgressIPAssignment(ni util.NetInfo, egressIPN if config.OVNKubernetesFeature.EnableInterconnect && ni.IsDefault() && !isOVNNetwork && (loadedPodNode && !isLocalZonePod) { // For CDNs, configure LRP with reroute action for non-local-zone pods on egress nodes to support redirect to local management port // when the egress IP is assigned to a host secondary interface - routerName, err := getTopologyScopedRouterName(ni, pod.Spec.NodeName) + routerName, err := e.getTopologyScopedRouterName(ni, pod.Spec.NodeName) if err != nil { return err } @@ -2424,10 +2420,11 @@ func (e *EgressIPController) addPodEgressIPAssignment(ni util.NetInfo, egressIPN // For L2, we always attach an LRP with reroute action to the Nodes gateway router. If the pod is remote, use the local zone Node name to generate the GW router name. nodeName := pod.Spec.NodeName - if loadedEgressNode && loadedPodNode && !isLocalZonePod && isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer2Topology { + if loadedEgressNode && loadedPodNode && !isLocalZonePod && isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && + ni.TopologyType() == types.Layer2Topology && !config.Layer2UsesTransitRouter { nodeName = status.Node } - routerName, err := getTopologyScopedRouterName(ni, nodeName) + routerName, err := e.getTopologyScopedRouterName(ni, nodeName) if err != nil { return err } @@ -2492,10 +2489,11 @@ func (e *EgressIPController) deletePodEgressIPAssignment(ni util.NetInfo, egress } // For L2, we always attach an LRP with reroute action to the Nodes gateway router. If the pod is remote, use the local zone Node name to generate the GW router name. nodeName := pod.Spec.NodeName - if !isLocalZonePod && isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer2Topology { + if !isLocalZonePod && isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && + ni.TopologyType() == types.Layer2Topology && !config.Layer2UsesTransitRouter { nodeName = status.Node } - routerName, err := getTopologyScopedRouterName(ni, nodeName) + routerName, err := e.getTopologyScopedRouterName(ni, nodeName) if err != nil { return err } @@ -2511,7 +2509,8 @@ func (e *EgressIPController) deletePodEgressIPAssignment(ni util.NetInfo, egress // Case 1 - node where pod is hosted is not known // Case 2 - pod is within the local zone // case 3 - a local zone node is egress node and pod is attached to layer 2. For layer2, there is always an LRP attached to the egress Node GW router - if !loadedPodNode || isLocalZonePod || (isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer2Topology) { + if !loadedPodNode || isLocalZonePod || (isLocalZoneEgressNode && ni.IsUserDefinedNetwork() && + ni.TopologyType() == types.Layer2Topology) { ops, err = e.deleteReroutePolicyOps(ni, ops, status, egressIPName, nextHopIP, routerName, pod.Namespace, pod.Name) if errors.Is(err, libovsdbclient.ErrNotFound) { // if the gateway router join IP setup is already gone, then don't count it as error. @@ -2534,7 +2533,8 @@ func (e *EgressIPController) deletePodEgressIPAssignment(ni util.NetInfo, egress if err != nil { return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) } - } else if ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer3Topology { + } else if ni.IsUserDefinedNetwork() && ni.TopologyType() == types.Layer3Topology || + ni.TopologyType() == types.Layer2Topology && config.Layer2UsesTransitRouter { ops, err = e.deleteGWMarkPolicyOps(ni, ops, status, pod.Namespace, pod.Name, egressIPName) if err != nil { return fmt.Errorf("unable to create GW router packet mark LRPs delete ops for pod %s/%s: %v", pod.Namespace, pod.Name, err) @@ -2653,29 +2653,51 @@ func (e *EgressIPController) deleteExternalGWPodSNATOps(ni util.NetInfo, ops []o // getGatewayNextHop determines the next hop for a given Node considering the network topology type // For layer 3, next hop is gateway routers 'router to join' port IP // For layer 2, it's the callers responsibility to ensure that the egress node is remote because a LRP should not be created -func (e *EgressIPController) getGatewayNextHop(ni util.NetInfo, nodeName string, isIPv6 bool) (net.IP, error) { - // fetch gateway router 'router to join' port IP +func (e *EgressIPController) getGatewayNextHop(ni util.NetInfo, node *corev1.Node, isIPv6 bool) (net.IP, error) { if ni.TopologyType() == types.Layer3Topology { - return e.getRouterPortIP(types.GWRouterToJoinSwitchPrefix+ni.GetNetworkScopedGWRouterName(nodeName), isIPv6) - } - - // If egress node is local, retrieve the external default gateway next hops from the Node L3 gateway annotation. - // We must pick one of the next hops to add to the LRP reroute next hops to not break ECMP. - // If an egress node is remote, retrieve the remote Nodes gateway router 'router to switch' port IP - // from the Node annotation. - // FIXME: remove gathering the required information from a Node annotations as this approach does not scale - // FIXME: we do not respect multiple default gateway next hops and instead pick the first IP that matches the IP family of the EIP - if ni.TopologyType() == types.Layer2Topology { - node, err := e.watchFactory.GetNode(nodeName) + return e.getRouterPortIP(types.GWRouterToJoinSwitchPrefix+ni.GetNetworkScopedGWRouterName(node.Name), isIPv6) + } else if ni.TopologyType() == types.Layer2Topology { + if config.Layer2UsesTransitRouter { + upgradedNode := util.UDNLayer2NodeUsesTransitRouter(node) + if upgradedNode { + transitRouterInfo, err := getTransitRouterInfo(ni, node) + if err != nil { + return nil, err + } + nodeTransitIP, err := util.MatchFirstIPNetFamily(isIPv6, transitRouterInfo.gatewayRouterNets) + if err != nil { + return nil, fmt.Errorf("could not find transit router IP of node %v for this family %v: %v", node, isIPv6, err) + } + return nodeTransitIP.IP, nil + } else { + gwIPs, err := udn.GetGWRouterIPs(node, ni) + if err != nil { + return nil, fmt.Errorf("failed to get gateway router IPs for node %s: %w", node.Name, err) + } + gwIP, err := util.MatchFirstIPNetFamily(isIPv6, gwIPs) + if err != nil { + return nil, fmt.Errorf("failed to find a gateway router IP for node %s that matches the EgressIP IP family (is IPv6: %v): %w", + node.Name, isIPv6, err) + } + return gwIP.IP, nil + } + } + // If egress node is local, retrieve the external default gateway next hops from the Node L3 gateway annotation. + // We must pick one of the next hops to add to the LRP reroute next hops to not break ECMP. + // If an egress node is remote, retrieve the remote Nodes gateway router 'router to switch' port IP + // from the Node annotation. + // FIXME: remove gathering the required information from a Node annotations as this approach does not scale + // FIXME: we do not respect multiple default gateway next hops and instead pick the first IP that matches the IP family of the EIP + node, err := e.watchFactory.GetNode(node.Name) if err != nil { - return nil, fmt.Errorf("failed to retrive node %s: %w", nodeName, err) + return nil, fmt.Errorf("failed to retrive node %s: %w", node.Name, err) } localNode, err := e.getALocalZoneNodeName() if err != nil { return nil, err } // Node is local - if localNode == nodeName { + if localNode == node.Name { nextHopIPs, err := util.ParseNodeL3GatewayAnnotation(node) if err != nil { if util.IsAnnotationNotSetError(err) { @@ -2780,11 +2802,15 @@ func (e *EgressIPController) getTransitIP(nodeName string, wantsIPv6 bool) (stri // and no error returned. This means we searched successfully but could not find the information required to generate the next hop IP. func (e *EgressIPController) getNextHop(ni util.NetInfo, egressNodeName, egressIP, egressIPName string, isLocalZoneEgressNode, isOVNNetwork bool) (string, error) { isEgressIPv6 := utilnet.IsIPv6String(egressIP) + egressNode, err := e.watchFactory.GetNode(egressNodeName) + if err != nil { + return "", err + } if isLocalZoneEgressNode || ni.TopologyType() == types.Layer2Topology { // isOVNNetwork is true when an EgressIP is "assigned" to the Nodes primary interface (breth0). Ext traffic will egress breth0. // is OVNNetwork is false when the EgressIP is assigned to a host secondary interface (not breth0). Ext traffic will egress this interface. if isOVNNetwork { - gatewayRouterIP, err := e.getGatewayNextHop(ni, egressNodeName, isEgressIPv6) + gatewayRouterIP, err := e.getGatewayNextHop(ni, egressNode, isEgressIPv6) // return error only when we failed to retrieve the gateway IP. Do not return error when we can never get this IP (gw deleted) if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { return "", fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", @@ -3003,13 +3029,9 @@ func (e *EgressIPController) deleteEgressIPStatusSetup(ni util.NetInfo, name str } if nextHopIP != "" { - router := ni.GetNetworkScopedClusterRouterName() - if ni.TopologyType() == types.Layer2Topology { - nodeName, err := e.getALocalZoneNodeName() - if err != nil { - return err - } - router = ni.GetNetworkScopedGWRouterName(nodeName) + router, err := e.getTopologyScopedLocalZoneRouterName(ni) + if err != nil { + return err } ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, router, policyPredNextHop, nextHopIP) if err != nil { @@ -3058,7 +3080,7 @@ func (e *EgressIPController) ensureOnlyValidNextHops(ni util.NetInfo, name, node strings.HasPrefix(item.ExternalIDs[libovsdbops.ObjectNameKey.String()], name+dbIDEIPNamePodDivider) && item.ExternalIDs[libovsdbops.NetworkKey.String()] == ni.GetNetworkName() } - routerName, err := getTopologyScopedRouterName(ni, nodeName) + routerName, err := e.getTopologyScopedRouterName(ni, nodeName) if err != nil { return ops, err } @@ -3205,7 +3227,7 @@ func (e *EgressIPController) ensureRouterPoliciesForNetwork(ni util.NetInfo, nod if err != nil { return err } - routerName, err := getTopologyScopedRouterName(ni, localNode) + routerName, err := e.getTopologyScopedRouterName(ni, localNode) if err != nil { return err } @@ -3230,6 +3252,56 @@ func (e *EgressIPController) ensureRouterPoliciesForNetwork(ni util.NetInfo, nod return nil } +// updateNodeNextHop updates the next hop IP for reroute policies on the node's logical router. +// Only used during layer2 topology upgrade to change gwIP to the transit routerIP +func (e *EgressIPController) updateNodeNextHop(ni util.NetInfo, node *corev1.Node) error { + e.nodeUpdateMutex.Lock() + defer e.nodeUpdateMutex.Unlock() + transitRouterInfo, err := getTransitRouterInfo(ni, node) + if err != nil { + return err + } + gwIPs, err := udn.GetGWRouterIPs(node, ni) + if err != nil { + return fmt.Errorf("failed to get gateway router IPs for node %s: %w", node.Name, err) + } + for _, transitIP := range transitRouterInfo.gatewayRouterNets { + gwIP, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6(transitIP.IP), gwIPs) + if err != nil { + return fmt.Errorf("failed to find a gateway router IP for node %s that matches the transit IP %v family: %w", + node.Name, transitIP, err) + } + // replace reroute policies with the new next hop IP + ops, err := libovsdbops.ReplaceNextHopsForLogicalRouterPolicyWithPredicateOps( + e.nbClient, nil, func(policy *nbdb.LogicalRouterPolicy) bool { + if policy.Priority != types.EgressIPReroutePriority { + return false + } + // Restrict to this network and controller + if policy.ExternalIDs[libovsdbops.NetworkKey.String()] != ni.GetNetworkName() || + policy.ExternalIDs[libovsdbops.OwnerControllerKey.String()] != e.controllerName || + policy.ExternalIDs[libovsdbops.OwnerTypeKey.String()] != libovsdbops.EgressIPOwnerType { + return false + } + for _, nextHop := range policy.Nexthops { + if nextHop == gwIP.IP.String() { + return true + } + } + return false + }, gwIP.IP.String(), transitIP.IP.String()) + if err != nil { + return fmt.Errorf("failed to build update reroute policies ops for node %s with transit IP %s: %v", + node.Name, transitIP.IP.String(), err) + } + if _, err = libovsdbops.TransactAndCheck(e.nbClient, ops); err != nil { + return fmt.Errorf("failed to update reroute policies for node %s with transit IP %s: %v", + node.Name, transitIP.IP.String(), err) + } + } + return nil +} + func (e *EgressIPController) ensureSwitchPoliciesForNode(ni util.NetInfo, nodeName string) error { e.nodeUpdateMutex.Lock() defer e.nodeUpdateMutex.Unlock() @@ -3445,14 +3517,9 @@ func (e *EgressIPController) ensureDefaultNoRerouteNodePolicies() error { if network.GetNetworkName() == types.DefaultNetworkName { return nil } - routerName := network.GetNetworkScopedClusterRouterName() - if network.TopologyType() == types.Layer2Topology { - // assume one node per zone only. Multi nodes per zone not supported. - nodeName, err := e.getALocalZoneNodeName() - if err != nil { - return err - } - routerName = network.GetNetworkScopedGWRouterName(nodeName) + routerName, err := e.getTopologyScopedLocalZoneRouterName(network) + if err != nil { + return err } err = ensureDefaultNoRerouteNodePolicies(e.nbClient, e.addressSetFactory, network.GetNetworkName(), routerName, e.controllerName, nodeLister, e.v4, e.v6) @@ -3822,8 +3889,8 @@ func addPktMarkToLRPOptions(options map[string]string, mark string) { // getTopologyScopedRouterName returns the router name that we attach polices to support EgressIP depending on network topology // For Layer 3, we return the network scoped OVN "cluster router" name. For layer 2, we return a Nodes network scoped OVN gateway router name. -func getTopologyScopedRouterName(ni util.NetInfo, nodeName string) (string, error) { - if ni.TopologyType() == types.Layer2Topology { +func (e *EgressIPController) getTopologyScopedRouterName(ni util.NetInfo, nodeName string) (string, error) { + if ni.TopologyType() == types.Layer2Topology && !config.Layer2UsesTransitRouter { if nodeName == "" { return "", fmt.Errorf("node name is required to determine the Nodes gateway router name") } @@ -3832,6 +3899,19 @@ func getTopologyScopedRouterName(ni util.NetInfo, nodeName string) (string, erro return ni.GetNetworkScopedClusterRouterName(), nil } +func (e *EgressIPController) getTopologyScopedLocalZoneRouterName(ni util.NetInfo) (string, error) { + routerName := ni.GetNetworkScopedClusterRouterName() + if ni.TopologyType() == types.Layer2Topology && !config.Layer2UsesTransitRouter { + // no support for multiple Nodes per OVN zone, therefore pick the first local zone node + localNodeName, err := e.getALocalZoneNodeName() + if err != nil { + return "", err + } + routerName = ni.GetNetworkScopedGWRouterName(localNodeName) + } + return routerName, nil +} + func isEgressIPForUDNSupported() bool { return config.OVNKubernetesFeature.EnableInterconnect && config.OVNKubernetesFeature.EnableNetworkSegmentation diff --git a/go-controller/pkg/ovn/egressip_udn_l2_test.go b/go-controller/pkg/ovn/egressip_udn_l2_test.go index 581992ab6b..6bf9faf09e 100644 --- a/go-controller/pkg/ovn/egressip_udn_l2_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l2_test.go @@ -32,36 +32,32 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ) const ( - nadName1 = "nad1" - networkName1 = "network1" - networkName1_ = networkName1 + "_" - node1Name = "node1" - v4Net1 = "20.128.0.0/14" - v4Node1Net1 = "20.128.0.0/16" - v4Pod1IPNode1Net1 = "20.128.0.5" - node1DefaultRtoJIP = "100.64.0.1" - node1DefaultRtoJIPCIDR = node1DefaultRtoJIP + "/16" - node1Network1RtoSIP = "100.65.0.1" - node1Network1RtoSIPCIDR = node1Network1RtoSIP + "/16" - podName3 = "egress-pod3" - v4Pod2IPNode1Net1 = "20.128.0.6" - v4Node1Tsp = "100.88.0.2" - node2Name = "node2" - v4Node2Net1 = "20.129.0.0/16" - v4Node2Tsp = "100.88.0.3" - podName4 = "egress-pod4" - v4Pod1IPNode2Net1 = "20.129.0.2" - v4Pod2IPNode2Net1 = "20.129.0.3" - node2DefaultRtoJIP = "100.64.0.2" - node2DefaultRtoJIPCIDR = node2DefaultRtoJIP + "/16" - node2Network1RtoSIP = "100.65.0.2" - node2Network1RtoSIPCIDR = node2Network1RtoSIP + "/16" - eIP1Mark = 50000 - eIP2Mark = 50001 - layer2SwitchName = "ovn_layer2_switch" - gwIP = "192.168.126.1" - gwIP2 = "192.168.127.1" - userDefinedNetworkID = "2" + nadName1 = "nad1" + networkName1 = "network1" + networkName1_ = networkName1 + "_" + node1Name = "node1" + v4Net1 = "20.128.0.0/14" + v4Node1Net1 = "20.128.0.0/16" + v4Pod1IPNode1Net1 = "20.128.0.5" + node1DefaultRtoJIP = "100.64.0.1" + node1DefaultRtoJIPCIDR = node1DefaultRtoJIP + "/16" + node1Network1JoinIP = "100.65.0.1" + node1Network1JoinCIDR = node1Network1JoinIP + "/16" + node1Network1TransitIP = "100.88.0.3" + node1Network1TransitCIDR = node1Network1TransitIP + "/31" + podName3 = "egress-pod3" + v4Node1Tsp = "100.88.0.2" + node2Name = "node2" + v4Node2Net1 = "20.129.0.0/16" + v4Node2Tsp = "100.88.0.3" + podName4 = "egress-pod4" + v4Pod2IPNode2Net1 = "20.129.0.3" + node2Network1JoinIP = "100.65.0.2" + node2Network1TransitIP = "100.88.0.5" + eIP1Mark = 50000 + layer2SwitchName = "ovn_layer2_switch" + gwIP = "192.168.126.1" + userDefinedNetworkID = "2" ) getEgressIPStatusLen := func(egressIPName string) func() int { @@ -145,10 +141,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -156,7 +153,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -170,6 +166,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -185,6 +182,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1", "next-hops": ["192.168.126.1"]}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) eIP := egressipv1.EgressIP{ @@ -238,21 +236,26 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Ports: []string{"k8s-" + node1Name + "-UUID"}, }, // UDN start - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP2, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), // stale gateway - getReRoutePolicyForController(egressIPName, eipNamespace2, podName, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP2, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), // stale pod + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1JoinCIDR, node2Network1JoinIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), // stale gateway + getReRoutePolicyForController(egressIPName, eipNamespace2, podName, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1JoinCIDR, node2Network1JoinIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), // stale pod &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, Policies: []string{getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName, IPFamilyValueV4, netInfo.GetNetworkName())}, // stale policies }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -413,8 +416,12 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -440,20 +447,27 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), + getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), - getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName()), - "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", - fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), "udn-enabled-svc-no-reroute-UUID", - }, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", @@ -513,10 +527,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -524,7 +539,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -539,6 +553,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -555,6 +570,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -611,14 +627,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -783,9 +804,14 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, + egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == %s && ip4.dst == %s", v4Net1, v4Net1), @@ -810,14 +836,13 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, Policies: []string{ "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", @@ -825,6 +850,14 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -917,7 +950,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -943,17 +976,23 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -1019,10 +1058,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -1030,7 +1070,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1045,6 +1084,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -1061,6 +1101,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -1117,14 +1158,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -1283,8 +1329,12 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -1310,21 +1360,27 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, Policies: []string{ "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), - getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName()), - }, + getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", @@ -1436,17 +1492,23 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName())}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: secConInfo.bnc.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", @@ -1501,10 +1563,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadNsName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadNsName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -1512,7 +1575,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1527,6 +1589,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -1543,6 +1606,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -1599,14 +1663,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -1772,7 +1841,9 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -1798,20 +1869,26 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", - "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -1865,10 +1942,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadNsName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadNsName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -1876,7 +1954,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1891,6 +1968,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -1907,6 +1985,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -1963,14 +2042,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -2122,7 +2206,9 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -2148,20 +2234,26 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", - "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -2218,10 +2310,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -2229,7 +2322,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2244,6 +2336,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -2260,6 +2353,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -2316,14 +2410,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -2490,8 +2589,12 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP, node2Network1RtoSIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP, node2Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -2517,21 +2620,27 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", - "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), - getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName()), - }, + getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", @@ -2586,10 +2695,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol Name: networkName1, Type: "ovn-k8s-cni-overlay", }, - Role: ovntypes.NetworkRolePrimary, - Topology: ovntypes.Layer2Topology, - NADName: nadName, - Subnets: v4Net1, + Role: ovntypes.NetworkRolePrimary, + Topology: ovntypes.Layer2Topology, + NADName: nadName, + Subnets: v4Net1, + TransitSubnet: config.ClusterManager.V4TransitSubnet, } nad, err := newNetworkAttachmentDefinition( eipNamespace2, @@ -2597,7 +2707,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2612,6 +2721,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", @@ -2628,6 +2738,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, "default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop": "192.168.126.1", "next-hops": ["192.168.126.1"]}}`, networkName1, v4Net1, gwIP, gwIP), + util.Layer2TopologyVersion: "2.0", } node2 := getNodeObj(node2Name, node2Annotations, nil) oneNodeStatus := []egressipv1.EgressIPStatusItem{ @@ -2680,14 +2791,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }, // UDN start &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, }, &nbdb.LogicalSwitchPort{ @@ -2849,8 +2965,12 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressNodeIPsASv4, // UDN - getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), - getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{gwIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getReRoutePolicyForController(egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, eIP1Mark, IPFamilyValueV4, []string{node1Network1TransitIP}, netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName2, v4Pod1IPNode1Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), + getGWPktMarkLRPForController(eIP1Mark, egressIPName, eipNamespace2, podName4, v4Pod2IPNode2Net1, IPFamilyValueV4, + netInfo.GetNetworkName(), DefaultNetworkControllerName), getNoReRoutePolicyForUDNEnabledSvc(false, netInfo.GetNetworkName(), DefaultNetworkControllerName, egressIPServedPodsASUDNv4.Name, egressSVCServedPodsASv4.Name, udnEnabledSvcV4.Name), &nbdb.LogicalRouterPolicy{ Priority: ovntypes.DefaultNoRereoutePriority, @@ -2876,21 +2996,27 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, }, &nbdb.LogicalRouter{ - UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", - Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", - "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), getReRoutePolicyUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), - getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName()), - }, + getReRoutePolicyUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, + }, + &nbdb.LogicalRouter{ + UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", + Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), + getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", @@ -3006,19 +3132,23 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, netInfo.GetNetworkName(), DefaultNetworkControllerName).GetExternalIDs(), }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID", - Name: ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName, - Networks: []string{node1Network1RtoSIPCIDR}, + UUID: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID", + Name: ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name, + Networks: []string{node1Network1JoinCIDR, node1Network1TransitCIDR}, + }, + &nbdb.LogicalRouter{ + Name: netInfo.GetNetworkScopedClusterRouterName(), + UUID: netInfo.GetNetworkScopedClusterRouterName() + "-UUID", + ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, + Policies: []string{ + "udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", + fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName())}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), - Ports: []string{ovntypes.RouterToSwitchPrefix + networkName1_ + layer2SwitchName + "-UUID"}, + Ports: []string{ovntypes.RouterToTransitRouterPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer2Topology}, - Policies: []string{"udn-default-no-reroute-node-UUID", "udn-default-no-reroute-UUID", - "udn-no-reroute-service-UUID", "udn-enabled-svc-no-reroute-UUID", - fmt.Sprintf("%s-no-reroute-reply-traffic", netInfo.GetNetworkName()), - }, }, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", diff --git a/go-controller/pkg/ovn/egressip_udn_l3_test.go b/go-controller/pkg/ovn/egressip_udn_l3_test.go index 28035e8374..cf6875f446 100644 --- a/go-controller/pkg/ovn/egressip_udn_l3_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l3_test.go @@ -157,7 +157,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -534,7 +533,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1055,7 +1053,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1791,7 +1788,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2161,7 +2157,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2522,7 +2517,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol netconf, ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} netInfo, err := util.NewNetInfo(&netconf) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index 4c4f63d3fa..ee7bd61c0c 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -14,6 +14,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + "k8s.io/utils/ptr" libovsdbclient "github.com/ovn-kubernetes/libovsdb/client" @@ -54,6 +55,8 @@ type GatewayManager struct { // Cluster wide router Load_Balancer_Group UUID. // Includes all node gateway routers. routerLoadBalancerGroupUUID string + + transitRouterInfo *transitRouterInfo } type GatewayOption func(*GatewayManager) @@ -65,14 +68,18 @@ func NewGatewayManagerForLayer2Topology( nbClient libovsdbclient.Client, netInfo util.NetInfo, watchFactory *factory.WatchFactory, + useTransitRouter bool, opts ...GatewayOption, ) *GatewayManager { + routerName := "" + if useTransitRouter { + routerName = netInfo.GetNetworkScopedClusterRouterName() + } return newGWManager( nodeName, - "", - netInfo.GetNetworkScopedGWRouterName(nodeName), + routerName, netInfo.GetNetworkScopedExtSwitchName(nodeName), - netInfo.GetNetworkScopedName(types.OVNLayer2Switch), + netInfo.GetNetworkScopedSwitchName(""), coopUUID, kube, nbClient, @@ -94,7 +101,6 @@ func NewGatewayManager( return newGWManager( nodeName, netInfo.GetNetworkScopedClusterRouterName(), - netInfo.GetNetworkScopedGWRouterName(nodeName), netInfo.GetNetworkScopedExtSwitchName(nodeName), netInfo.GetNetworkScopedJoinSwitchName(), coopUUID, @@ -107,7 +113,7 @@ func NewGatewayManager( } func newGWManager( - nodeName, clusterRouterName, gwRouterName, extSwitchName, joinSwitchName string, + nodeName, clusterRouterName, extSwitchName, joinSwitchName string, coopUUID string, kube kube.InterfaceOVN, nbClient libovsdbclient.Client, @@ -117,7 +123,7 @@ func newGWManager( gwManager := &GatewayManager{ nodeName: nodeName, clusterRouterName: clusterRouterName, - gwRouterName: gwRouterName, + gwRouterName: netInfo.GetNetworkScopedGWRouterName(nodeName), extSwitchName: extSwitchName, joinSwitchName: joinSwitchName, coppUUID: coopUUID, @@ -241,7 +247,7 @@ func (gw *GatewayManager) cleanupStalePodSNATs(nodeName string, nodeIPs []*net.I return nil } -func (gw *GatewayManager) createGWRouter(l3GatewayConfig *util.L3GatewayConfig, gwLRPJoinIPs []*net.IPNet) (*nbdb.LogicalRouter, error) { +func (gw *GatewayManager) createGWRouter(gwConfig *GatewayConfig) (*nbdb.LogicalRouter, error) { // Create a gateway router. dynamicNeighRouters := "true" if config.OVNKubernetesFeature.EnableInterconnect { @@ -251,7 +257,7 @@ func (gw *GatewayManager) createGWRouter(l3GatewayConfig *util.L3GatewayConfig, logicalRouterOptions := map[string]string{ "always_learn_from_arp_request": "false", "dynamic_neigh_routers": dynamicNeighRouters, - "chassis": l3GatewayConfig.ChassisID, + "chassis": gwConfig.annoConfig.ChassisID, "lb_force_snat_ip": "router_ip", "mac_binding_age_threshold": types.GRMACBindingAgeThreshold, } @@ -267,14 +273,10 @@ func (gw *GatewayManager) createGWRouter(l3GatewayConfig *util.L3GatewayConfig, // when it comes to SNATing traffic after load balancing. // Hence for Layer2 UDPNs let's set the snat-ip explicitly to the // joinsubnetIP - joinIPDualStack := make([]string, len(gwLRPJoinIPs)) - for i, gwLRPJoinIP := range gwLRPJoinIPs { - joinIPDualStack[i] = gwLRPJoinIP.IP.String() - } - logicalRouterOptions["lb_force_snat_ip"] = strings.Join(joinIPDualStack, " ") + logicalRouterOptions["lb_force_snat_ip"] = strings.Join(util.IPNetsIPToStringSlice(gwConfig.gwRouterJoinNets), " ") } - physicalIPs := make([]string, len(l3GatewayConfig.IPAddresses)) - for i, ip := range l3GatewayConfig.IPAddresses { + physicalIPs := make([]string, len(gwConfig.annoConfig.IPAddresses)) + for i, ip := range gwConfig.annoConfig.IPAddresses { physicalIPs[i] = ip.IP.String() } logicalRouterExternalIDs := map[string]string{ @@ -295,7 +297,7 @@ func (gw *GatewayManager) createGWRouter(l3GatewayConfig *util.L3GatewayConfig, if gw.clusterLoadBalancerGroupUUID != "" { gwRouter.LoadBalancerGroup = []string{gw.clusterLoadBalancerGroupUUID} - if l3GatewayConfig.NodePortEnable && gw.routerLoadBalancerGroupUUID != "" { + if gwConfig.annoConfig.NodePortEnable && gw.routerLoadBalancerGroupUUID != "" { // add routerLoadBalancerGroupUUID to the gateway router only if nodePort is enabled gwRouter.LoadBalancerGroup = append(gwRouter.LoadBalancerGroup, gw.routerLoadBalancerGroupUUID) } @@ -309,31 +311,29 @@ func (gw *GatewayManager) createGWRouter(l3GatewayConfig *util.L3GatewayConfig, return &gwRouter, nil } -func (gw *GatewayManager) getGWRouterPeerPortName() string { - // In Layer2 networks there is no join switch and the gw.joinSwitchName points to the cluster switch. - // Ensure that the ports are named appropriately, this is important for the logical router policies - // created for local node access. - // TODO(kyrtapz): Clean this up for clarity as part of https://github.com/ovn-org/ovn-kubernetes/issues/4689 +func (gw *GatewayManager) getGWRouterPeerRouterPortName() string { + return types.TransitRouterToRouterPrefix + gw.gwRouterName +} + +func (gw *GatewayManager) getGWRouterPeerSwitchPortName() string { if gw.netInfo.TopologyType() == types.Layer2Topology { return types.SwitchToRouterPrefix + gw.joinSwitchName } - return types.JoinSwitchToGWRouterPrefix + gw.gwRouterName } func (gw *GatewayManager) getGWRouterPortName() string { - // In Layer2 networks there is no join switch and the gw.joinSwitchName points to the cluster switch. - // Ensure that the ports are named appropriately, this is important for the logical router policies - // created for local node access. - // TODO(kyrtapz): Clean this up for clarity as part of https://github.com/ovn-org/ovn-kubernetes/issues/4689 if gw.netInfo.TopologyType() == types.Layer2Topology { + if gw.transitRouterInfo != nil { + return types.RouterToTransitRouterPrefix + gw.gwRouterName + } return types.RouterToSwitchPrefix + gw.joinSwitchName } return types.GWRouterToJoinSwitchPrefix + gw.gwRouterName } -func (gw *GatewayManager) createGWRouterPeerPort(nodeName string) error { - gwSwitchPort := gw.getGWRouterPeerPortName() +func (gw *GatewayManager) createGWRouterPeerSwitchPort(nodeName string) error { + gwSwitchPort := gw.getGWRouterPeerSwitchPortName() gwRouterPortName := gw.getGWRouterPortName() logicalSwitchPort := nbdb.LogicalSwitchPort{ @@ -375,25 +375,76 @@ func (gw *GatewayManager) createGWRouterPeerPort(nodeName string) error { return err } -func (gw *GatewayManager) createGWRouterPort(hostSubnets []*net.IPNet, gwLRPJoinIPs []*net.IPNet, - enableGatewayMTU bool, gwRouter *nbdb.LogicalRouter) ([]net.IP, error) { - gwLRPIPs := make([]net.IP, 0) +func (gw *GatewayManager) deleteGWRouterPeerSwitchPort() error { + // Remove the patch port that connects join switch to gateway router + lsp := nbdb.LogicalSwitchPort{Name: gw.getGWRouterPeerSwitchPortName()} + sw := nbdb.LogicalSwitch{Name: gw.joinSwitchName} + err := libovsdbops.DeleteLogicalSwitchPorts(gw.nbClient, &sw, &lsp) + if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to delete logical switch port %s from switch %s: %w", lsp.Name, sw.Name, err) + } + return nil +} + +func (gw *GatewayManager) createGWRouterPeerRouterPort() error { + gwPeerPortName := gw.getGWRouterPeerRouterPortName() + gwRouterPortName := gw.getGWRouterPortName() + + ovnClusterRouterToGWRouterPort := nbdb.LogicalRouterPort{ + Name: gwPeerPortName, + MAC: util.IPAddrToHWAddr(gw.transitRouterInfo.transitRouterNets[0].IP).String(), + Networks: util.IPNetsToStringSlice(gw.transitRouterInfo.transitRouterNets), + Options: map[string]string{ + libovsdbops.RequestedTnlKey: fmt.Sprintf("%d", gw.transitRouterInfo.nodeID), + }, + Peer: ptr.To(gwRouterPortName), + ExternalIDs: map[string]string{ + types.NetworkExternalID: gw.netInfo.GetNetworkName(), + types.TopologyExternalID: gw.netInfo.TopologyType(), + }, + } + + ovnClusterRouter := nbdb.LogicalRouter{Name: gw.clusterRouterName} + err := libovsdbops.CreateOrUpdateLogicalRouterPort(gw.nbClient, &ovnClusterRouter, + &ovnClusterRouterToGWRouterPort, nil, &ovnClusterRouterToGWRouterPort.MAC, &ovnClusterRouterToGWRouterPort.Networks, + &ovnClusterRouterToGWRouterPort.Options, &ovnClusterRouterToGWRouterPort.Peer, &ovnClusterRouterToGWRouterPort.ExternalIDs) + if err != nil { + return fmt.Errorf("failed to create port %+v on router %+v: %v", ovnClusterRouterToGWRouterPort, ovnClusterRouter, err) + } + return nil +} + +func (gw *GatewayManager) deleteGWRouterPeerRouterPort() error { + ovnClusterRouterToGWRouterPort := nbdb.LogicalRouterPort{Name: gw.getGWRouterPeerRouterPortName()} + ovnClusterRouter := nbdb.LogicalRouter{Name: gw.clusterRouterName} + err := libovsdbops.DeleteLogicalRouterPorts(gw.nbClient, &ovnClusterRouter, &ovnClusterRouterToGWRouterPort) + if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to delete router port %s from router %s: %w", ovnClusterRouterToGWRouterPort.Name, ovnClusterRouter.Name, err) + } + return nil +} + +func (gw *GatewayManager) createGWRouterPort(gwConfig *GatewayConfig, + enableGatewayMTU bool, gwRouter *nbdb.LogicalRouter) error { gwLRPNetworks := []string{} - for _, gwLRPJoinIP := range gwLRPJoinIPs { - gwLRPIPs = append(gwLRPIPs, gwLRPJoinIP.IP) - gwLRPNetworks = append(gwLRPNetworks, gwLRPJoinIP.String()) + for _, gwRouterJoinNet := range gwConfig.gwRouterJoinNets { + gwLRPNetworks = append(gwLRPNetworks, gwRouterJoinNet.String()) } - if gw.netInfo.TopologyType() == types.Layer2Topology { + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo == nil { // At layer2 GR LRP acts as the layer3 ovn_cluster_router so we need // to configure here the .1 address, this will work only for IC with // one node per zone, since ARPs for .1 will not go beyond local switch. // This is being done to add the ICMP SNATs for .1 podSubnet that OVN GR generates - for _, subnet := range hostSubnets { - gwLRPIPs = append(gwLRPIPs, gw.netInfo.GetNodeGatewayIP(subnet).IP) + for _, subnet := range gwConfig.hostSubnets { gwLRPNetworks = append(gwLRPNetworks, gw.netInfo.GetNodeGatewayIP(subnet).String()) } } - gwLRPMAC := util.IPAddrToHWAddr(gwLRPIPs[0]) + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + for _, gatewayRouterTransitNetwork := range gw.transitRouterInfo.gatewayRouterNets { + gwLRPNetworks = append(gwLRPNetworks, gatewayRouterTransitNetwork.String()) + } + } + gwLRPMAC := util.IPAddrToHWAddr(gwConfig.gwRouterJoinNets[0].IP) var options map[string]string if enableGatewayMTU { @@ -413,8 +464,12 @@ func (gw *GatewayManager) createGWRouterPort(hostSubnets []*net.IPNet, gwLRPJoin types.NetworkExternalID: gw.netInfo.GetNetworkName(), types.TopologyExternalID: gw.netInfo.TopologyType(), } + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + gwRouterPort.Peer = ptr.To(gw.getGWRouterPeerRouterPortName()) + } + _, isNetIPv6 := gw.netInfo.IPMode() - if gw.netInfo.TopologyType() == types.Layer2Topology && isNetIPv6 && config.IPv6Mode { + if gw.netInfo.TopologyType() == types.Layer2Topology && isNetIPv6 && config.IPv6Mode && gw.transitRouterInfo == nil { gwRouterPort.Ipv6RaConfigs = map[string]string{ "address_mode": "dhcpv6_stateful", "send_periodic": "true", @@ -432,16 +487,16 @@ func (gw *GatewayManager) createGWRouterPort(hostSubnets []*net.IPNet, gwLRPJoin &gwRouterPort, nil, &gwRouterPort.MAC, &gwRouterPort.Networks, &gwRouterPort.Options) if err != nil { - return nil, fmt.Errorf("failed to create port %+v on router %+v: %v", gwRouterPort, gwRouter, err) + return fmt.Errorf("failed to create port %+v on router %+v: %v", gwRouterPort, gwRouter, err) } - return gwLRPIPs, nil + return nil } -func (gw *GatewayManager) updateGWRouterStaticRoutes(clusterIPSubnet, drLRPIfAddrs []*net.IPNet, - l3GatewayConfig *util.L3GatewayConfig, externalRouterPort string, gwRouter *nbdb.LogicalRouter) error { - if len(drLRPIfAddrs) > 0 { - for _, entry := range clusterIPSubnet { - drLRPIfAddr, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6CIDR(entry), drLRPIfAddrs) +func (gw *GatewayManager) updateGWRouterStaticRoutes(gwConfig *GatewayConfig, externalRouterPort string, + gwRouter *nbdb.LogicalRouter) error { + if len(gwConfig.ovnClusterLRPToJoinIfAddrs) > 0 { + for _, entry := range gwConfig.clusterSubnets { + drLRPIfAddr, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6CIDR(entry), gwConfig.ovnClusterLRPToJoinIfAddrs) if err != nil { return fmt.Errorf("failed to add a static route in GR %s with distributed "+ "router as the nexthop: %v", @@ -512,7 +567,7 @@ func (gw *GatewayManager) updateGWRouterStaticRoutes(clusterIPSubnet, drLRPIfAdd } } - nextHops := l3GatewayConfig.NextHops + nextHops := gwConfig.annoConfig.NextHops // Add default gateway routes in GR for _, nextHop := range nextHops { var allIPs string @@ -544,20 +599,64 @@ func (gw *GatewayManager) updateGWRouterStaticRoutes(clusterIPSubnet, drLRPIfAdd } } + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + for _, subnet := range gwConfig.hostSubnets { + nexthop, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6(subnet.IP), gw.transitRouterInfo.transitRouterNets) + if err != nil { + return err + } + subnetRoute := nbdb.LogicalRouterStaticRoute{ + IPPrefix: subnet.String(), + Nexthop: nexthop.IP.String(), + OutputPort: ptr.To(gw.getGWRouterPortName()), + } + subnetRoute.ExternalIDs = map[string]string{ + types.NetworkExternalID: gw.netInfo.GetNetworkName(), + types.TopologyExternalID: gw.netInfo.TopologyType(), + } + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.OutputPort != nil && *item.OutputPort == *subnetRoute.OutputPort && item.IPPrefix == subnetRoute.IPPrefix && + libovsdbops.PolicyEqualPredicate(subnetRoute.Policy, item.Policy) + } + if err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(gw.nbClient, gw.gwRouterName, &subnetRoute, + p, &subnetRoute.Nexthop); err != nil { + return fmt.Errorf("error creating static route %+v in GW router %s: %v", subnetRoute, gw.gwRouterName, err) + } + } + } + return nil } -func (gw *GatewayManager) updateClusterRouterStaticRoutes(hostSubnets []*net.IPNet, gwLRPIPs []net.IP) error { +func (gw *GatewayManager) updateClusterRouterStaticRoutes(gwConfig *GatewayConfig, gwRouterIPs []net.IP) error { // We need to add a route to the Gateway router's IP, on the // cluster router, to ensure that the return traffic goes back // to the same gateway router // // This can be removed once https://bugzilla.redhat.com/show_bug.cgi?id=1891516 is fixed. // FIXME(trozet): if LRP IP is changed, we do not remove stale instances of these routes - for _, gwLRPIP := range gwLRPIPs { + nextHops := gwRouterIPs + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + nextHops = util.IPNetsToIPs(gw.transitRouterInfo.gatewayRouterNets) + } + + for _, gwRouterIP := range gwRouterIPs { + nextHop, err := util.MatchIPFamily(utilnet.IsIPv6(gwRouterIP), nextHops) + if err != nil { + if gw.transitRouterInfo != nil { + // for layer2 networks with transit router it is not an error. + // JoinIPs are allocated for both IP families always, but transit router IPs and routes + // are only created for the actual IP families of the network + continue + } + return fmt.Errorf("failed to add source IP address based "+ + "routes in distributed router %s: %v", + gw.clusterRouterName, err) + } + lrsr := nbdb.LogicalRouterStaticRoute{ - IPPrefix: gwLRPIP.String(), - Nexthop: gwLRPIP.String(), + IPPrefix: gwRouterIP.String(), + Nexthop: nextHop[0].String(), } if gw.netInfo.IsUserDefinedNetwork() { lrsr.ExternalIDs = map[string]string{ @@ -571,21 +670,21 @@ func (gw *GatewayManager) updateClusterRouterStaticRoutes(hostSubnets []*net.IPN } if gw.clusterRouterName != "" { - err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(gw.nbClient, + err = libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(gw.nbClient, gw.clusterRouterName, &lrsr, p, &lrsr.Nexthop) if err != nil { return fmt.Errorf("error creating static route %+v in %s: %v", lrsr, gw.clusterRouterName, err) } } } + if gw.clusterRouterName == "" { + return nil + } // Add source IP address based routes in distributed router // for this gateway router. - for _, hostSubnet := range hostSubnets { - if gw.clusterRouterName == "" { - break - } - gwLRPIP, err := util.MatchIPFamily(utilnet.IsIPv6CIDR(hostSubnet), gwLRPIPs) + for _, hostSubnet := range gwConfig.hostSubnets { + nextHop, err := util.MatchIPFamily(utilnet.IsIPv6CIDR(hostSubnet), nextHops) if err != nil { return fmt.Errorf("failed to add source IP address based "+ "routes in distributed router %s: %v", @@ -595,7 +694,7 @@ func (gw *GatewayManager) updateClusterRouterStaticRoutes(hostSubnets []*net.IPN lrsr := nbdb.LogicalRouterStaticRoute{ Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, IPPrefix: hostSubnet.String(), - Nexthop: gwLRPIP[0].String(), + Nexthop: nextHop[0].String(), } if config.Gateway.Mode != config.GatewayModeLocal { @@ -627,18 +726,16 @@ func (gw *GatewayManager) updateClusterRouterStaticRoutes(hostSubnets []*net.IPN // If migrating from shared to local gateway, let's remove the static routes towards // join switch for the hostSubnet prefix and any potential routes for UDN enabled services. // Note syncManagementPort happens before gateway sync so only remove things pointing to join subnet - if gw.clusterRouterName != "" { - p := func(item *nbdb.LogicalRouterStaticRoute) bool { - if _, ok := item.ExternalIDs[types.UDNEnabledServiceExternalID]; ok { - return true - } - return item.IPPrefix == lrsr.IPPrefix && item.Policy != nil && *item.Policy == *lrsr.Policy && - gw.containsJoinIP(net.ParseIP(item.Nexthop)) - } - err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(gw.nbClient, gw.clusterRouterName, p) - if err != nil { - return fmt.Errorf("error deleting static route %+v in GR %s: %v", lrsr, gw.clusterRouterName, err) + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + if _, ok := item.ExternalIDs[types.UDNEnabledServiceExternalID]; ok { + return true } + return item.IPPrefix == lrsr.IPPrefix && item.Policy != nil && *item.Policy == *lrsr.Policy && + gw.containsJoinIP(net.ParseIP(item.Nexthop)) + } + err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(gw.nbClient, gw.clusterRouterName, p) + if err != nil { + return fmt.Errorf("error deleting static route %+v in GR %s: %v", lrsr, gw.clusterRouterName, err) } } } @@ -659,7 +756,7 @@ func (gw *GatewayManager) updateClusterRouterStaticRoutes(hostSubnets []*net.IPN // This function also updates SNAT created by `updateGWRouterNAT`, because NATs don't use ExternalIDs, // and their fields are used to find equivalent NATs. That means on gateway IPs change, instead of updating // the old NAT, we would create a new one. FIXME: add externalIDs to NATs -func (gw *GatewayManager) syncNATsForGRIPChange(externalIPs, oldExtIPs, gwLRPIPs []net.IP, +func (gw *GatewayManager) syncNATsForGRIPChange(gwConfig *GatewayConfig, oldExtIPs, gwRouterIPs []net.IP, gwRouter, oldGWRouter *nbdb.LogicalRouter) error { // if config.Gateway.DisabledSNATMultipleGWs is not set (by default it is not), // the NAT rules for pods not having annotations to route through either external @@ -685,7 +782,7 @@ func (gw *GatewayManager) syncNATsForGRIPChange(externalIPs, oldExtIPs, gwLRPIPs } // check external ip changed - for _, externalIP := range externalIPs { + for _, externalIP := range gwConfig.externalIPs { oldExternalIP, err := util.MatchFirstIPFamily(utilnet.IsIPv6(externalIP), oldExtIPs) if err != nil { return fmt.Errorf("failed to update GW SNAT rule for pods on router %s error: %v", gw.gwRouterName, err) @@ -707,10 +804,10 @@ func (gw *GatewayManager) syncNATsForGRIPChange(externalIPs, oldExtIPs, gwLRPIPs // check if join ip changed if gw.containsJoinIP(parsedLogicalIP) { // is a join SNAT, check if IP needs updating - joinIP, err := util.MatchFirstIPFamily(utilnet.IsIPv6(parsedLogicalIP), gwLRPIPs) + joinIP, err := util.MatchFirstIPFamily(utilnet.IsIPv6(parsedLogicalIP), gwRouterIPs) if err != nil { return fmt.Errorf("failed to find valid IP family match for join subnet IP: %s on "+ - "gateway router: %s, provided IPs: %#v", parsedLogicalIP, gw.gwRouterName, gwLRPIPs) + "gateway router: %s, provided IPs: %#v", parsedLogicalIP, gw.gwRouterName, gwRouterIPs) } if nat.LogicalIP != joinIP.String() { // needs to be updated @@ -732,8 +829,7 @@ func (gw *GatewayManager) syncNATsForGRIPChange(externalIPs, oldExtIPs, gwLRPIPs return nil } -func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []*net.IPNet, l3GatewayConfig *util.L3GatewayConfig, - externalIPs, gwLRPIPs []net.IP, gwRouter *nbdb.LogicalRouter) error { +func (gw *GatewayManager) updateGWRouterNAT(nodeName string, gwConfig *GatewayConfig, gwLRPIPs []net.IP, gwRouter *nbdb.LogicalRouter) error { // REMOVEME(trozet) workaround - create join subnet SNAT to handle ICMP needs frag return var extIDs map[string]string if gw.netInfo.IsUserDefinedNetwork() { @@ -744,7 +840,7 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* } joinNATs := make([]*nbdb.NAT, 0, len(gwLRPIPs)) for _, gwLRPIP := range gwLRPIPs { - externalIP, err := util.MatchIPFamily(utilnet.IsIPv6(gwLRPIP), externalIPs) + externalIP, err := util.MatchIPFamily(utilnet.IsIPv6(gwLRPIP), gwConfig.externalIPs) if err != nil { return fmt.Errorf("failed to find valid external IP family match for join subnet IP: %s on "+ "gateway router: %s", gwLRPIP, gw.gwRouterName) @@ -761,15 +857,15 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* return fmt.Errorf("failed to create SNAT rule for join subnet on router %s error: %v", gw.gwRouterName, err) } - nats := make([]*nbdb.NAT, 0, len(clusterIPSubnet)) + nats := make([]*nbdb.NAT, 0, len(gwConfig.clusterSubnets)) var nat *nbdb.NAT // DisableSNATMultipleGWs is only applicable to cluster default network and not to user defined networks. // For user defined networks, we always add SNAT rules regardless of whether the network is advertised or not. if !config.Gateway.DisableSNATMultipleGWs || gw.netInfo.IsPrimaryNetwork() { // Default SNAT rules. DisableSNATMultipleGWs=false in LGW (traffic egresses via mp0) always. // We are not checking for gateway mode to be shared explicitly to reduce topology differences. - for _, entry := range clusterIPSubnet { - externalIP, err := util.MatchIPFamily(utilnet.IsIPv6CIDR(entry), externalIPs) + for _, entry := range gwConfig.clusterSubnets { + externalIP, err := util.MatchIPFamily(utilnet.IsIPv6CIDR(entry), gwConfig.externalIPs) if err != nil { return fmt.Errorf("failed to create default SNAT rules for gateway router %s: %v", gw.gwRouterName, err) @@ -780,7 +876,8 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* if utilnet.IsIPv6CIDR(entry) { ipFamily = utilnet.IPv6 } - snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(gw.nbClient, gw.netInfo, nodeName, gw.isRoutingAdvertised(nodeName), ipFamily) + snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(gw.nbClient, gw.netInfo, nodeName, + gw.isRoutingAdvertised(nodeName), ipFamily) if err != nil { return fmt.Errorf("failed to get SNAT match for node %s for network %s: %w", nodeName, gw.netInfo.GetNetworkName(), err) } @@ -794,7 +891,7 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* } } else { // ensure we do not have any leftover SNAT entries after an upgrade - for _, logicalSubnet := range clusterIPSubnet { + for _, logicalSubnet := range gwConfig.clusterSubnets { nat = libovsdbops.BuildSNAT(nil, logicalSubnet, "", extIDs) nats = append(nats, nat) } @@ -804,7 +901,7 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* } } - if err = gw.cleanupStalePodSNATs(nodeName, l3GatewayConfig.IPAddresses, gwLRPIPs); err != nil { + if err = gw.cleanupStalePodSNATs(nodeName, gwConfig.annoConfig.IPAddresses, gwLRPIPs); err != nil { return fmt.Errorf("failed to sync stale SNATs on node %s: %v", nodeName, err) } return nil @@ -818,6 +915,18 @@ func (gw *GatewayManager) gatewayInit( enableGatewayMTU bool, ) error { + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.clusterRouterName != "" { + // layer2 network uses transit router, so we need to set the transit router info + // in all the other operations we can use both `gw.clusterRouterName == ""` and `gw.transitRouterInfo == nil` + // as an indicator of the old topology. + err := gw.setTransitRouterInfo(nodeName) + if err != nil { + return fmt.Errorf("failed to initialize layer2 info for gateway on node %s: %v", nodeName, err) + } + if err = gw.oldLayer2TopoCleanup(); err != nil { + return fmt.Errorf("failed to cleanup old layer2 topology for gateway on node %s: %v", nodeName, err) + } + } // If l3gatewayAnnotation.IPAddresses changed, we need to update the perPodSNATs, // so let's save the old value before we update the router for later use var oldExtIPs []net.IP @@ -844,16 +953,21 @@ func (gw *GatewayManager) gatewayInit( } } - gwRouter, err := gw.createGWRouter(gwConfig.annoConfig, gwConfig.gwLRPJoinIPs) + gwRouter, err := gw.createGWRouter(gwConfig) if err != nil { return err } - if err = gw.createGWRouterPeerPort(nodeName); err != nil { + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + err = gw.createGWRouterPeerRouterPort() + } else { + err = gw.createGWRouterPeerSwitchPort(nodeName) + } + if err != nil { return err } - gwLRPIPs, err := gw.createGWRouterPort(gwConfig.hostSubnets, gwConfig.gwLRPJoinIPs, enableGatewayMTU, gwRouter) + err = gw.createGWRouterPort(gwConfig, enableGatewayMTU, gwRouter) if err != nil { return err } @@ -890,20 +1004,29 @@ func (gw *GatewayManager) gatewayInit( } externalRouterPort := types.GWRouterToExtSwitchPrefix + gw.gwRouterName - if err = gw.updateGWRouterStaticRoutes(gwConfig.clusterSubnets, gwConfig.ovnClusterLRPToJoinIfAddrs, gwConfig.annoConfig, externalRouterPort, - gwRouter); err != nil { + if err = gw.updateGWRouterStaticRoutes(gwConfig, externalRouterPort, gwRouter); err != nil { return err } - if err = gw.updateClusterRouterStaticRoutes(gwConfig.hostSubnets, gwLRPIPs); err != nil { + gwRouterIPs := util.IPNetsToIPs(gwConfig.gwRouterJoinNets) + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo == nil { + // At layer2 GR LRP acts as the layer3 ovn_cluster_router so we need + // to configure here the .1 address, this will work only for IC with + // one node per zone, since ARPs for .1 will not go beyond local switch. + // This is being done to add the ICMP SNATs for .1 podSubnet that OVN GR generates + for _, subnet := range gwConfig.hostSubnets { + gwRouterIPs = append(gwRouterIPs, gw.netInfo.GetNodeGatewayIP(subnet).IP) + } + } + if err = gw.updateClusterRouterStaticRoutes(gwConfig, gwRouterIPs); err != nil { return err } - if err = gw.syncNATsForGRIPChange(gwConfig.externalIPs, oldExtIPs, gwLRPIPs, gwRouter, oldLogicalRouter); err != nil { + if err = gw.syncNATsForGRIPChange(gwConfig, oldExtIPs, gwRouterIPs, gwRouter, oldLogicalRouter); err != nil { return err } - if err = gw.updateGWRouterNAT(nodeName, gwConfig.clusterSubnets, gwConfig.annoConfig, gwConfig.externalIPs, gwLRPIPs, gwRouter); err != nil { + if err = gw.updateGWRouterNAT(nodeName, gwConfig, gwRouterIPs, gwRouter); err != nil { return err } @@ -921,9 +1044,11 @@ func (gw *GatewayManager) gatewayInit( // If the network is advertised: // - For Layer2 topology, the match is the output port of the GR to the join switch and the destination must be a nodeIP in the cluster. // - For Layer3 topology, the match is the destination must be a nodeIP in the cluster. -func GetNetworkScopedClusterSubnetSNATMatch(nbClient libovsdbclient.Client, netInfo util.NetInfo, nodeName string, isNetworkAdvertised bool, ipFamily utilnet.IPFamily) (string, error) { +func GetNetworkScopedClusterSubnetSNATMatch(nbClient libovsdbclient.Client, netInfo util.NetInfo, nodeName string, + isNetworkAdvertised bool, ipFamily utilnet.IPFamily) (string, error) { + layer2NoRouter := netInfo.TopologyType() == types.Layer2Topology && !config.Layer2UsesTransitRouter if !isNetworkAdvertised { - if netInfo.TopologyType() != types.Layer2Topology { + if !layer2NoRouter { return "", nil } return fmt.Sprintf("outport == %q", types.GWRouterToExtSwitchPrefix+netInfo.GetNetworkScopedGWRouterName(nodeName)), nil @@ -940,7 +1065,7 @@ func GetNetworkScopedClusterSubnetSNATMatch(nbClient libovsdbclient.Client, netI if destinationMatch == "" { return "", fmt.Errorf("could not build a destination based SNAT match because no addressSet %v exists for IP family %v", dbIDs, ipFamily) } - if netInfo.TopologyType() != types.Layer2Topology { + if !layer2NoRouter { return destinationMatch, nil } return fmt.Sprintf("outport == %q && %s", types.GWRouterToExtSwitchPrefix+netInfo.GetNetworkScopedGWRouterName(nodeName), destinationMatch), nil @@ -1197,15 +1322,14 @@ func (gw *GatewayManager) Cleanup() error { // Get the gateway router port's IP address (connected to join switch) var nextHops []net.IP - gwRouterToJoinSwitchPortName := gw.getGWRouterPortName() - portName := gw.getGWRouterPeerPortName() + gwRouterPortName := gw.getGWRouterPortName() - gwIPAddrs, err := libovsdbutil.GetLRPAddrs(gw.nbClient, gwRouterToJoinSwitchPortName) + gwIPAddrs, err := libovsdbutil.GetLRPAddrs(gw.nbClient, gwRouterPortName) if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { return fmt.Errorf( "failed to get gateway IPs for network %q from LRP %s: %v", gw.netInfo.GetNetworkName(), - gwRouterToJoinSwitchPortName, + gwRouterPortName, err, ) } @@ -1216,12 +1340,13 @@ func (gw *GatewayManager) Cleanup() error { gw.staticRouteCleanup(nextHops, nil) gw.policyRouteCleanup(nextHops) - // Remove the patch port that connects join switch to gateway router - lsp := nbdb.LogicalSwitchPort{Name: portName} - sw := nbdb.LogicalSwitch{Name: gw.joinSwitchName} - err = libovsdbops.DeleteLogicalSwitchPorts(gw.nbClient, &sw, &lsp) - if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { - return fmt.Errorf("failed to delete logical switch port %s from switch %s: %w", portName, sw.Name, err) + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo != nil { + err = gw.deleteGWRouterPeerRouterPort() + } else { + err = gw.deleteGWRouterPeerSwitchPort() + } + if err != nil { + return err } // Remove the static mac bindings of the gateway router @@ -1417,7 +1542,7 @@ func (gw *GatewayManager) SyncGateway( if err := pbrMngr.AddSameNodeIPPolicy(node.Name, mgmtIfAddr.IP.String(), l3GatewayConfigIP, relevantHostIPs); err != nil { return fmt.Errorf("failed to configure the policy based routes for network %q: %v", gw.netInfo.GetNetworkName(), err) } - if gw.netInfo.TopologyType() == types.Layer2Topology && config.Gateway.Mode == config.GatewayModeLocal { + if gw.netInfo.TopologyType() == types.Layer2Topology && gw.transitRouterInfo == nil && config.Gateway.Mode == config.GatewayModeLocal { if err := pbrMngr.AddHostCIDRPolicy(node, mgmtIfAddr.IP.String(), subnet.String()); err != nil { return fmt.Errorf("failed to configure the hostCIDR policy for L2 network %q on local gateway: %v", gw.netInfo.GetNetworkName(), err) @@ -1434,3 +1559,47 @@ func physNetName(netInfo util.NetInfo) string { } return netInfo.GetNetworkName() } + +func (gw *GatewayManager) setTransitRouterInfo(nodeName string) error { + node, err := gw.watchFactory.GetNode(nodeName) + if err != nil { + return err + } + gw.transitRouterInfo, err = getTransitRouterInfo(gw.netInfo, node) + if err != nil { + return err + } + return nil +} + +// oldLayer2TopoCleanup cleans up the old layer2 topology for the gateway on the node. +// Idempotent, will check if nbdb needs cleanup. +func (gw *GatewayManager) oldLayer2TopoCleanup() error { + // Check if the stale gateway router port exists. + // We delete GR a last operation in this cleanup, hence if it doesn't exist, we can skip the cleanup. + gwRouterPort := &nbdb.LogicalRouterPort{ + Name: types.RouterToSwitchPrefix + gw.joinSwitchName, + } + var err error + gwRouterPort, err = libovsdbops.GetLogicalRouterPort(gw.nbClient, gwRouterPort) + if err != nil && errors.Is(err, libovsdbclient.ErrNotFound) { + // cleanup not needed, old port does not exist + return nil + } + + // 1. Delete old port from the switch + if err := gw.deleteGWRouterPeerSwitchPort(); err != nil { + return fmt.Errorf("failed to delete peer switch port %s: %v", gw.getGWRouterPeerSwitchPortName(), err) + } + // 2. Remove the static mac bindings of the gateway router (otherwise you can't delete the router) + err = gateway.DeleteDummyGWMacBindings(gw.nbClient, gw.gwRouterName, gw.netInfo) + if err != nil { + return fmt.Errorf("failed to delete GR dummy mac bindings for node %s: %w", gw.nodeName, err) + } + + // 3. Delete stale GR, this will remove stale ports, NATs, routes and routing policies + if err := libovsdbops.DeleteLogicalRouter(gw.nbClient, &nbdb.LogicalRouter{Name: gw.gwRouterName}); err != nil { + return fmt.Errorf("failed to delete GR port %s: %v", gwRouterPort.Name, err) + } + return nil +} diff --git a/go-controller/pkg/ovn/gateway_test.go b/go-controller/pkg/ovn/gateway_test.go index 893d17ad09..6b02d3b41c 100644 --- a/go-controller/pkg/ovn/gateway_test.go +++ b/go-controller/pkg/ovn/gateway_test.go @@ -500,7 +500,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -617,7 +617,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -740,7 +740,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -829,7 +829,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -911,7 +911,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1007,7 +1007,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1038,7 +1038,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { ginkgo.By("modifying the node join IP") oldJoinLRPIPs := joinLRPIPs joinLRPIPs = ovntest.MustParseIPNets("100.64.0.99/16") - gwConfig.gwLRPJoinIPs = joinLRPIPs + gwConfig.gwRouterJoinNets = joinLRPIPs expectedOVNClusterRouter.StaticRoutes = []string{} err = newGatewayManager(fakeOvn, nodeName).gatewayInit( nodeName, @@ -1117,7 +1117,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1197,7 +1197,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1284,7 +1284,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1365,7 +1365,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1479,7 +1479,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1596,7 +1596,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1686,7 +1686,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, @@ -1805,7 +1805,7 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() { annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterIPSubnets, - gwLRPJoinIPs: joinLRPIPs, + gwRouterJoinNets: joinLRPIPs, hostAddrs: nil, externalIPs: extractExternalIPs(l3GatewayConfig), ovnClusterLRPToJoinIfAddrs: defLRPIPs, diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index eb7bb05abd..0d3708a1fc 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -2,6 +2,7 @@ package ovn import ( "context" + "errors" "fmt" "net" "reflect" @@ -11,7 +12,11 @@ import ( "time" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + libovsdbclient "github.com/ovn-kubernetes/libovsdb/client" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -26,6 +31,7 @@ import ( svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/routeimport" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/topology" zoneinterconnect "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/zone_interconnect" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/persistentips" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" @@ -115,16 +121,19 @@ func (h *layer2UserDefinedNetworkControllerEventHandler) AddResource(obj interfa _, syncMgmtPort := h.oc.mgmtPortFailed.Load(node.Name) _, syncGw := h.oc.gatewaysFailed.Load(node.Name) _, syncReroute := h.oc.syncEIPNodeRerouteFailed.Load(node.Name) + _, syncNodeClusterRouterPort := h.oc.nodeClusterRouterPortFailed.Load(node.Name) nodeParams = &nodeSyncs{ - syncMgmtPort: syncMgmtPort, - syncGw: syncGw, - syncReroute: syncReroute, + syncMgmtPort: syncMgmtPort, + syncGw: syncGw, + syncReroute: syncReroute, + syncClusterRouterPort: syncNodeClusterRouterPort, } } else { nodeParams = &nodeSyncs{ - syncMgmtPort: true, - syncGw: true, - syncReroute: true, + syncMgmtPort: true, + syncGw: true, + syncReroute: true, + syncClusterRouterPort: true, } } return h.oc.addUpdateLocalNodeEvent(node, nodeParams) @@ -181,25 +190,31 @@ func (h *layer2UserDefinedNetworkControllerEventHandler) UpdateResource(oldObj, nodeGatewayMTUSupportChanged(oldNode, newNode) _, syncRerouteFailed := h.oc.syncEIPNodeRerouteFailed.Load(newNode.Name) shouldSyncReroute := syncRerouteFailed || util.NodeHostCIDRsAnnotationChanged(oldNode, newNode) + _, clusterRouterPortFailed := h.oc.nodeClusterRouterPortFailed.Load(newNode.Name) nodeSyncsParam = &nodeSyncs{ - syncMgmtPort: shouldSyncMgmtPort, - syncGw: shouldSyncGW, - syncReroute: shouldSyncReroute, + syncMgmtPort: shouldSyncMgmtPort, + syncGw: shouldSyncGW, + syncReroute: shouldSyncReroute, + syncClusterRouterPort: clusterRouterPortFailed, } } else { klog.Infof("Node %s moved from the remote zone %s to local zone %s.", newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode)) // The node is now a local zone node. Trigger a full node sync. nodeSyncsParam = &nodeSyncs{ - syncMgmtPort: true, - syncGw: true, - syncReroute: true, + syncMgmtPort: true, + syncGw: true, + syncReroute: true, + syncClusterRouterPort: true, } } return h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam) } else { _, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name) + if h.oc.remoteNodesNoRouter.Has(oldNode.Name) && util.UDNLayer2NodeUsesTransitRouter(newNode) { + syncZoneIC = true + } return h.oc.addUpdateRemoteNodeEvent(newNode, syncZoneIC) } case factory.PodType: @@ -266,15 +281,17 @@ type Layer2UserDefinedNetworkController struct { BaseLayer2UserDefinedNetworkController // Node-specific syncMaps used by node event handler - mgmtPortFailed sync.Map - gatewaysFailed sync.Map - syncZoneICFailed sync.Map - syncEIPNodeRerouteFailed sync.Map + mgmtPortFailed sync.Map + gatewaysFailed sync.Map + syncZoneICFailed sync.Map + syncEIPNodeRerouteFailed sync.Map + nodeClusterRouterPortFailed sync.Map // Cluster-wide router default Control Plane Protection (COPP) UUID defaultCOPPUUID string - gatewayManagers sync.Map + gatewayManagers sync.Map + gatewayTopologyFactory *topology.GatewayTopologyFactory // Cluster wide Load_Balancer_Group UUID. // Includes the cluster switch and all node gateway routers. @@ -296,6 +313,8 @@ type Layer2UserDefinedNetworkController struct { // reconcile the virtual machine default gateway sending GARPs and RAs defaultGatewayReconciler *kubevirt.DefaultGatewayReconciler + + remoteNodesNoRouter sets.Set[string] } // NewLayer2UserDefinedNetworkController create a new OVN controller for the given layer2 NAD @@ -352,10 +371,12 @@ func NewLayer2UserDefinedNetworkController( }, }, }, - mgmtPortFailed: sync.Map{}, - syncZoneICFailed: sync.Map{}, - gatewayManagers: sync.Map{}, - eIPController: eIPController, + mgmtPortFailed: sync.Map{}, + syncZoneICFailed: sync.Map{}, + gatewayTopologyFactory: topology.NewGatewayTopologyFactory(cnci.nbClient), + gatewayManagers: sync.Map{}, + eIPController: eIPController, + remoteNodesNoRouter: sets.New[string](), } if config.OVNKubernetesFeature.EnableInterconnect { @@ -464,6 +485,21 @@ func (oc *Layer2UserDefinedNetworkController) Cleanup() error { return true }) + // now delete cluster router + if config.Layer2UsesTransitRouter { + ops, err := libovsdbops.DeleteLogicalRouterOps(oc.nbClient, nil, + &nbdb.LogicalRouter{ + Name: oc.GetNetworkScopedClusterRouterName(), + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting routers of network %s: %v", oc.GetNetworkName(), err) + } + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) + if err != nil { + return fmt.Errorf("failed to deleting routers/switches of network %s: %v", oc.GetNetworkName(), err) + } + } + // remove load balancer groups lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { @@ -484,6 +520,15 @@ func (oc *Layer2UserDefinedNetworkController) init() error { } oc.defaultCOPPUUID = defaultCOPPUUID + if config.Layer2UsesTransitRouter && oc.IsPrimaryNetwork() { + if len(oc.GetTunnelKeys()) != 2 { + return fmt.Errorf("layer2 network %s with transit router enabled requires exactly 2 tunnel keys, got: %v", oc.GetNetworkName(), oc.GetTunnelKeys()) + } + if _, err = oc.newTransitRouter(oc.GetTunnelKeys()[1]); err != nil { + return fmt.Errorf("failed to create OVN transit router for network %q: %v", oc.GetNetworkName(), err) + } + } + clusterLBGroupUUID, switchLBGroupUUID, routerLBGroupUUID, err := initLoadBalancerGroups(oc.nbClient, oc.GetNetInfo()) if err != nil { return err @@ -581,8 +626,22 @@ func (oc *Layer2UserDefinedNetworkController) newRetryFramework( func (oc *Layer2UserDefinedNetworkController) addUpdateLocalNodeEvent(node *corev1.Node, nSyncs *nodeSyncs) error { var errs []error + var err error + + hostSubnets := make([]*net.IPNet, 0, len(oc.Subnets())) + for _, subnet := range oc.Subnets() { + hostSubnets = append(hostSubnets, subnet.CIDR) + } if util.IsNetworkSegmentationSupportEnabled() && oc.IsPrimaryNetwork() { + if nSyncs.syncClusterRouterPort && config.Layer2UsesTransitRouter { + if err = oc.syncClusterRouterPorts(node, hostSubnets); err != nil { + errs = append(errs, err) + oc.nodeClusterRouterPortFailed.Store(node.Name, true) + } else { + oc.nodeClusterRouterPortFailed.Delete(node.Name) + } + } if nSyncs.syncGw { gwManager := oc.gatewayManagerForNode(node.Name) oc.gatewayManagers.Store(node.Name, gwManager) @@ -599,7 +658,7 @@ func (oc *Layer2UserDefinedNetworkController) addUpdateLocalNodeEvent(node *core return err } isUDNAdvertised := util.IsPodNetworkAdvertisedAtNode(oc, node.Name) - err = oc.addOrUpdateUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName, isUDNAdvertised) + err = oc.addOrUpdateUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, node.Name, isUDNAdvertised) if err != nil { return err } @@ -624,16 +683,12 @@ func (oc *Layer2UserDefinedNetworkController) addUpdateLocalNodeEvent(node *core } if nSyncs.syncMgmtPort { - // Layer 2 networks have a single, large subnet, that's the one - // associated to the controller. Take the management port IP from - // there. - subnets := oc.Subnets() - hostSubnets := make([]*net.IPNet, 0, len(subnets)) - for _, subnet := range oc.Subnets() { - hostSubnets = append(hostSubnets, subnet.CIDR) + routerName := oc.GetNetworkScopedClusterRouterName() + if !config.Layer2UsesTransitRouter { + routerName = oc.GetNetworkScopedGWRouterName(node.Name) } if _, err := oc.syncNodeManagementPort(node, oc.GetNetworkScopedSwitchName(types.OVNLayer2Switch), - oc.GetNetworkScopedGWRouterName(node.Name), hostSubnets); err != nil { + routerName, hostSubnets); err != nil { errs = append(errs, err) oc.mgmtPortFailed.Store(node.Name, true) } else { @@ -661,7 +716,7 @@ func (oc *Layer2UserDefinedNetworkController) addUpdateLocalNodeEvent(node *core errs = append(errs, oc.BaseLayer2UserDefinedNetworkController.addUpdateLocalNodeEvent(node)) - err := utilerrors.Join(errs...) + err = utilerrors.Join(errs...) if err != nil { oc.recordNodeErrorEvent(node, err) } @@ -673,7 +728,11 @@ func (oc *Layer2UserDefinedNetworkController) addUpdateRemoteNodeEvent(node *cor if util.IsNetworkSegmentationSupportEnabled() && oc.IsPrimaryNetwork() { if syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { - if err := oc.addPortForRemoteNodeGR(node); err != nil { + portUpdateFn := oc.addRouterSetupForRemoteNodeGR + if !config.Layer2UsesTransitRouter { + portUpdateFn = oc.addSwitchPortForRemoteNodeGR + } + if err := portUpdateFn(node); err != nil { err = fmt.Errorf("failed to add the remote zone node %s's remote LRP, %w", node.Name, err) errs = append(errs, err) oc.syncZoneICFailed.Store(node.Name, true) @@ -692,7 +751,7 @@ func (oc *Layer2UserDefinedNetworkController) addUpdateRemoteNodeEvent(node *cor return err } -func (oc *Layer2UserDefinedNetworkController) addPortForRemoteNodeGR(node *corev1.Node) error { +func (oc *Layer2UserDefinedNetworkController) addSwitchPortForRemoteNodeGR(node *corev1.Node) error { nodeJoinSubnetIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) if err != nil { if util.IsAnnotationNotSetError(err) { @@ -745,7 +804,135 @@ func (oc *Layer2UserDefinedNetworkController) addPortForRemoteNodeGR(node *corev return nil } +func (oc *Layer2UserDefinedNetworkController) cleanupSwitchPortForRemoteNodeGR(nodeName string) error { + logicalSwitchPort := &nbdb.LogicalSwitchPort{ + Name: types.SwitchToRouterPrefix + oc.GetNetworkScopedSwitchName(types.OVNLayer2Switch) + "_" + nodeName, + } + sw := &nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(types.OVNLayer2Switch)} + return libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, sw, logicalSwitchPort) +} + +func (oc *Layer2UserDefinedNetworkController) addRouterSetupForRemoteNodeGR(node *corev1.Node) error { + if oc.remoteNodesNoRouter.Has(node.Name) { + // remote node uses old topology + if util.UDNLayer2NodeUsesTransitRouter(node) { + // node has just been upgraded + // upgrade remote node connection + // delete old switch port + if err := oc.cleanupSwitchPortForRemoteNodeGR(node.Name); err != nil { + return fmt.Errorf("failed to cleanup port for remote node %s: %v", node.Name, err) + } + if err := oc.eIPController.updateNodeNextHop(oc.GetNetInfo(), node); err != nil { + return fmt.Errorf("failed to ensure EgressIP switch policies for network %s: %v", oc.GetNetworkName(), err) + } + oc.remoteNodesNoRouter.Delete(node.Name) + } else { + // node is still using old topology + if err := oc.addSwitchPortForRemoteNodeGR(node); err != nil { + return err + } + gwRouterJoinIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) + if err != nil { + return err + } + // create joinIP via joinIP routes to send traffic via the switch port + return oc.addTransitRouterRoutes(node, gwRouterJoinIPs) + } + } + transitRouterInfo, err := getTransitRouterInfo(oc.GetNetInfo(), node) + if err != nil { + return nil + } + transitPort := nbdb.LogicalRouterPort{ + Name: types.TransitRouterToRouterPrefix + oc.GetNetworkScopedGWRouterName(node.Name), + MAC: util.IPAddrToHWAddr(transitRouterInfo.transitRouterNets[0].IP).String(), + Networks: util.IPNetsToStringSlice(transitRouterInfo.transitRouterNets), + Options: map[string]string{ + libovsdbops.RequestedTnlKey: strconv.Itoa(transitRouterInfo.nodeID), + libovsdbops.RequestedChassis: node.Name, + }, + ExternalIDs: map[string]string{ + types.NetworkExternalID: oc.GetNetworkName(), + types.TopologyExternalID: oc.TopologyType(), + types.NodeExternalID: node.Name, + }, + } + transitRouter := nbdb.LogicalRouter{Name: oc.GetNetworkScopedClusterRouterName()} + if err := libovsdbops.CreateOrUpdateLogicalRouterPort(oc.nbClient, &transitRouter, + &transitPort, nil, &transitPort.MAC, &transitPort.Networks, + &transitPort.Options, &transitPort.ExternalIDs); err != nil { + return fmt.Errorf("failed to create remote port %+v on router %+v: %v", transitPort, transitRouter, err) + } + return oc.addTransitRouterRoutes(node, transitRouterInfo.gatewayRouterNets) +} + +func (oc *Layer2UserDefinedNetworkController) addTransitRouterRoutes(node *corev1.Node, nextHops []*net.IPNet) error { + gwRouterJoinIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) + if err != nil { + return err + } + for _, nextHop := range nextHops { + gwRouterJoinIP, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6CIDR(nextHop), gwRouterJoinIPs) + if err != nil { + return fmt.Errorf("failed to add remote node join ip based "+ + "routes in distributed router %s: %v", + oc.GetNetworkScopedClusterRouterName(), err) + } + lrsr := nbdb.LogicalRouterStaticRoute{ + ExternalIDs: map[string]string{ + types.NodeExternalID: node.Name, + types.NetworkExternalID: oc.GetNetworkName(), + types.TopologyExternalID: oc.TopologyType(), + }, + IPPrefix: gwRouterJoinIP.IP.String(), + Nexthop: nextHop.IP.String(), + } + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == lrsr.IPPrefix && + libovsdbops.PolicyEqualPredicate(lrsr.Policy, item.Policy) + } + + if err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(oc.nbClient, + oc.GetNetworkScopedClusterRouterName(), &lrsr, p, &lrsr.Nexthop); err != nil { + return fmt.Errorf("error creating static route %+v in %s: %v", lrsr, oc.GetNetworkScopedClusterRouterName(), err) + } + } + return nil +} + +func (oc *Layer2UserDefinedNetworkController) cleanupRouterSetupForRemoteNodeGR(nodeName string) error { + transitPort := &nbdb.LogicalRouterPort{ + Name: types.TransitRouterToRouterPrefix + oc.GetNetworkScopedGWRouterName(nodeName), + } + var err error + transitPort, err = libovsdbops.GetLogicalRouterPort(oc.nbClient, transitPort) + if err != nil { + // logical router port doesn't exist. So nothing to cleanup. + return nil + } + + transitRouter := nbdb.LogicalRouter{ + Name: oc.GetNetworkScopedClusterRouterName(), + } + + if err = libovsdbops.DeleteLogicalRouterPorts(oc.nbClient, &transitRouter, transitPort); err != nil { + return fmt.Errorf("failed to delete logical router port %s from router %s for the node %s, error: %w", + transitPort.Name, transitRouter.Name, nodeName, err) + } + + // Delete any static routes in the transit router for this node. + p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool { + return lrsr.ExternalIDs[types.NetworkExternalID] == oc.GetNetworkName() && lrsr.ExternalIDs[types.NodeExternalID] == nodeName + } + if err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(oc.nbClient, oc.GetNetworkScopedClusterRouterName(), p); err != nil { + return fmt.Errorf("failed to cleanup static routes for the node %s: %w", nodeName, err) + } + + return nil +} + func (oc *Layer2UserDefinedNetworkController) deleteNodeEvent(node *corev1.Node) error { + // GatewayManager only exists for local nodes. if err := oc.gatewayManagerForNode(node.Name).Cleanup(); err != nil { return fmt.Errorf("failed to cleanup gateway on node %q: %w", node.Name, err) } @@ -753,6 +940,14 @@ func (oc *Layer2UserDefinedNetworkController) deleteNodeEvent(node *corev1.Node) oc.localZoneNodes.Delete(node.Name) oc.mgmtPortFailed.Delete(node.Name) oc.syncEIPNodeRerouteFailed.Delete(node.Name) + + if config.Layer2UsesTransitRouter { + // this is a no-op for local nodes + if err := oc.cleanupRouterSetupForRemoteNodeGR(node.Name); err != nil { + return fmt.Errorf("failed to cleanup remote node %q gateway: %w", node.Name, err) + } + oc.syncZoneICFailed.Delete(node.Name) + } return nil } @@ -770,8 +965,14 @@ func (oc *Layer2UserDefinedNetworkController) deleteNodeEvent(node *corev1.Node) // If isUDNAdvertised is true, then we want to SNAT all packets that are coming from pods on this network // leaving towards nodeIPs on the cluster to masqueradeIP. If network is advertise then the SNAT looks like this: // "eth.dst == 0a:58:5d:5d:00:02 && (ip4.dst == $a712973235162149816)" "169.254.0.36" "93.93.0.0/16" -func (oc *Layer2UserDefinedNetworkController) addOrUpdateUDNClusterSubnetEgressSNAT(localPodSubnets []*net.IPNet, gwRouterName string, isUDNAdvertised bool) error { - outputPort := types.GWRouterToJoinSwitchPrefix + gwRouterName +func (oc *Layer2UserDefinedNetworkController) addOrUpdateUDNClusterSubnetEgressSNAT(localPodSubnets []*net.IPNet, + nodeName string, isUDNAdvertised bool) error { + outputPort := oc.getCRToSwitchPortName(oc.GetNetworkScopedSwitchName("")) + routerName := oc.GetNetworkScopedClusterRouterName() + if !config.Layer2UsesTransitRouter { + routerName = oc.GetNetworkScopedGWRouterName(nodeName) + outputPort = types.GWRouterToJoinSwitchPrefix + routerName + } nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort, isUDNAdvertised) if err != nil { return err @@ -779,12 +980,12 @@ func (oc *Layer2UserDefinedNetworkController) addOrUpdateUDNClusterSubnetEgressS if len(nats) == 0 { return nil // nothing to do } - gwRouter := &nbdb.LogicalRouter{ - Name: gwRouterName, + router := &nbdb.LogicalRouter{ + Name: routerName, } - if err := libovsdbops.CreateOrUpdateNATs(oc.nbClient, gwRouter, nats...); err != nil { + if err := libovsdbops.CreateOrUpdateNATs(oc.nbClient, router, nats...); err != nil { return fmt.Errorf("failed to update SNAT for cluster on router: %q for network %q, error: %w", - gwRouterName, oc.GetNetworkName(), err) + routerName, oc.GetNetworkName(), err) } return nil } @@ -822,7 +1023,7 @@ func (oc *Layer2UserDefinedNetworkController) nodeGatewayConfig(node *corev1.Nod // at layer2 the GR LRP should be different per node same we do for layer3 // since they should not collide at the distributed switch later on - gwLRPJoinIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) + gwRouterJoinNets, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) if err != nil { return nil, fmt.Errorf("failed composing LRP addresses for layer2 network %s: %w", oc.GetNetworkName(), err) } @@ -833,13 +1034,20 @@ func (oc *Layer2UserDefinedNetworkController) nodeGatewayConfig(node *corev1.Nod annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: hostSubnets, - gwLRPJoinIPs: gwLRPJoinIPs, + gwRouterJoinNets: gwRouterJoinNets, hostAddrs: nil, externalIPs: externalIPs, ovnClusterLRPToJoinIfAddrs: nil, }, nil } +func (oc *Layer2UserDefinedNetworkController) newTransitRouter(tunnelKey int) (*nbdb.LogicalRouter, error) { + return oc.gatewayTopologyFactory.NewTransitRouter( + oc.GetNetInfo(), + oc.defaultCOPPUUID, strconv.Itoa(tunnelKey), + ) +} + func (oc *Layer2UserDefinedNetworkController) newGatewayManager(nodeName string) *GatewayManager { return NewGatewayManagerForLayer2Topology( nodeName, @@ -848,6 +1056,7 @@ func (oc *Layer2UserDefinedNetworkController) newGatewayManager(nodeName string) oc.nbClient, oc.GetNetInfo(), oc.watchFactory, + config.Layer2UsesTransitRouter, oc.gatewayOptions()..., ) } @@ -927,3 +1136,243 @@ func (oc *Layer2UserDefinedNetworkController) reconcileLiveMigrationTargetZone(k } return nil } + +// syncClusterRouterPorts connects the network switch to the transit router +func (oc *Layer2UserDefinedNetworkController) syncClusterRouterPorts(node *corev1.Node, hostSubnets []*net.IPNet) error { + switchName := oc.GetNetworkScopedSwitchName("") + + // Connect the switch to the router. + logicalSwitchPort := nbdb.LogicalSwitchPort{ + Name: types.SwitchToTransitRouterPrefix + switchName, + Type: "router", + Addresses: []string{"router"}, + Options: map[string]string{ + libovsdbops.RouterPort: types.TransitRouterToSwitchPrefix + switchName, + }, + ExternalIDs: map[string]string{ + types.NetworkExternalID: oc.GetNetworkName(), + types.TopologyExternalID: oc.TopologyType(), + }, + } + sw := nbdb.LogicalSwitch{Name: switchName} + err := libovsdbops.CreateOrUpdateLogicalSwitchPortsOnSwitch(oc.nbClient, &sw, &logicalSwitchPort) + if err != nil { + klog.Errorf("Failed to add logical port %+v to switch %s: %v", logicalSwitchPort, switchName, err) + return err + } + + if err = oc.syncNodeClusterRouterPort(node, hostSubnets); err != nil { + return err + } + + if len(oc.remoteNodesNoRouter) > 0 { + // now add upgrade-only connection using IP-less port + if err = oc.ensureUpgradeTopology(node); err != nil { + return fmt.Errorf("failed to ensure upgrade topology for node %s: %w", node.Name, err) + } + } else { + // cleanup upgrade topology if it exists + if err = oc.cleanupUpgradeTopology(); err != nil { + return fmt.Errorf("failed to cleanup upgrade topology for network %s: %w", oc.GetNetworkName(), err) + } + } + return nil +} + +func (oc *Layer2UserDefinedNetworkController) ensureUpgradeTopology(node *corev1.Node) error { + switchName := oc.GetNetworkScopedSwitchName("") + sw := nbdb.LogicalSwitch{Name: switchName} + + // create switch to router connection with GR MAC and dummy join IPs + upgradeRouterPortName := types.TransitRouterToSwitchPrefix + switchName + "-upgrade" + // create switch port + upgradeSwitchPort := nbdb.LogicalSwitchPort{ + Name: types.SwitchToTransitRouterPrefix + switchName + "-upgrade", + Type: "router", + Addresses: []string{"router"}, + Options: map[string]string{ + libovsdbops.RouterPort: upgradeRouterPortName, + }, + ExternalIDs: map[string]string{ + types.NetworkExternalID: oc.GetNetworkName(), + types.TopologyExternalID: oc.TopologyType(), + }, + } + tunnelID, err := util.ParseUDNLayer2NodeGRLRPTunnelIDs(node, oc.GetNetworkName()) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // wait for the annotation to be assigned + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to fetch tunnelID annotation from the node %s for network %s, err: %w", + node.Name, oc.GetNetworkName(), err) + } + upgradeSwitchPort.Options[libovsdbops.RequestedTnlKey] = strconv.Itoa(tunnelID) + + err = libovsdbops.CreateOrUpdateLogicalSwitchPortsOnSwitch(oc.nbClient, &sw, &upgradeSwitchPort) + if err != nil { + klog.Errorf("Failed to add logical port %+v to switch %s: %v", upgradeSwitchPort, switchName, err) + return err + } + // create router port + // find GW MAC + gwRouterJoinNets, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) + if err != nil { + return fmt.Errorf("failed composing LRP addresses for layer2 network %s: %w", oc.GetNetworkName(), err) + } + // add fake joinIPs + fakeJoinIPs := udn.GetLastIPsFromJoinSubnet(oc.GetNetInfo()) + + gwLRPMAC := util.IPAddrToHWAddr(gwRouterJoinNets[0].IP) + logicalRouterPort := nbdb.LogicalRouterPort{ + Name: upgradeRouterPortName, + MAC: gwLRPMAC.String(), + Networks: util.IPNetsToStringSlice(fakeJoinIPs), + } + logicalRouter := nbdb.LogicalRouter{Name: oc.GetNetworkScopedClusterRouterName()} + + err = libovsdbops.CreateOrUpdateLogicalRouterPort(oc.nbClient, &logicalRouter, &logicalRouterPort, + nil, &logicalRouterPort.MAC, &logicalRouterPort.Networks, &logicalRouterPort.Options) + if err != nil { + klog.Errorf("Failed to add logical router port %s, error: %v", upgradeRouterPortName, err) + return err + } + + // now add masq subnet to the router port, this ensures that only one port respond to the + // ARP/NDP requests for the masq IPs + lrpName := oc.getCRToSwitchPortName(switchName) + trRouterPort, err := libovsdbops.GetLogicalRouterPort(oc.nbClient, &nbdb.LogicalRouterPort{Name: lrpName}) + if err != nil { + return fmt.Errorf("failed to get logical router port %s: %w", lrpName, err) + } + masqSubnets, err := udn.GetUDNMgmtPortMasqueradeIPs(oc.GetNetworkID()) + if err != nil { + return fmt.Errorf("failed to get masquerade IPs, network %s (%d): %w", oc.GetNetworkName(), oc.GetNetworkID(), err) + } + + existingNetworkSet := sets.New[string](trRouterPort.Networks...) + newNetworksSet := sets.New[string](util.IPNetsToStringSlice(masqSubnets)...) + // Only add masq IPs if they are not already present + if existingNetworkSet.IsSuperset(newNetworksSet) { + return nil + } + trRouterPort.Networks = append(trRouterPort.Networks, newNetworksSet.UnsortedList()...) + err = libovsdbops.CreateOrUpdateLogicalRouterPort(oc.nbClient, &logicalRouter, trRouterPort, nil, &trRouterPort.Networks) + if err != nil { + return fmt.Errorf("failed to update logical router port %s with masq IPs: %w", lrpName, err) + } + return nil +} + +func (oc *Layer2UserDefinedNetworkController) cleanupUpgradeTopology() error { + // 1. Delete switch to router connection with GR MAC and dummy join IPs + switchName := oc.GetNetworkScopedSwitchName("") + sw := nbdb.LogicalSwitch{Name: switchName} + logicalRouter := nbdb.LogicalRouter{Name: oc.GetNetworkScopedClusterRouterName()} + + upgradeRouterPortName := types.TransitRouterToSwitchPrefix + switchName + "-upgrade" + upgradeSwitchPortName := types.SwitchToTransitRouterPrefix + switchName + "-upgrade" + if err := libovsdbops.DeleteLogicalSwitchPorts(oc.nbClient, &sw, &nbdb.LogicalSwitchPort{Name: upgradeSwitchPortName}); err != nil { + return fmt.Errorf("failed to delete logical switch port %s: %w", upgradeSwitchPortName, err) + } + if err := libovsdbops.DeleteLogicalRouterPorts(oc.nbClient, &logicalRouter, &nbdb.LogicalRouterPort{Name: upgradeRouterPortName}); err != nil { + return fmt.Errorf("failed to delete logical router port %s: %w", upgradeRouterPortName, err) + } + // 2. Delete masq IPs from the router port as it is no longer needed + lrpName := oc.getCRToSwitchPortName(switchName) + masqSubnets, err := udn.GetUDNGatewayMasqueradeIPs(oc.GetNetworkID()) + if err != nil { + return fmt.Errorf("failed to get masquerade IPs, network %s (%d): %w", oc.GetNetworkName(), oc.GetNetworkID(), err) + } + trRouterPort, err := libovsdbops.GetLogicalRouterPort(oc.nbClient, &nbdb.LogicalRouterPort{Name: lrpName}) + if err != nil { + return fmt.Errorf("failed to get logical router port %s: %w", lrpName, err) + } + updatedNetworks := sets.New(trRouterPort.Networks...) + staleNetworksSet := sets.New[string](util.IPNetsToStringSlice(masqSubnets)...) + if updatedNetworks.Intersection(staleNetworksSet).Len() == 0 { + // No masq IPs to remove, nothing to do + return nil + } + for network := range staleNetworksSet { + updatedNetworks.Delete(network) + } + trRouterPort.Networks = updatedNetworks.UnsortedList() + err = libovsdbops.CreateOrUpdateLogicalRouterPort(oc.nbClient, &logicalRouter, trRouterPort, nil, &trRouterPort.Networks) + if err != nil { + return fmt.Errorf("failed to update logical router port %s with masq IPs: %w", lrpName, err) + } + return nil +} + +// syncNodes finds nodes that still have LRP on the transit router, but the node doesn't exist anymore +// and clean it up. +// TODO add tests +func (oc *Layer2UserDefinedNetworkController) syncNodes(nodes []interface{}) error { + if err := oc.BaseLayer2UserDefinedNetworkController.syncNodes(nodes); err != nil { + return err + } + foundNodeNames := sets.New[string]() + foundNodes := make([]*corev1.Node, len(nodes)) + for i, obj := range nodes { + node, ok := obj.(*corev1.Node) + if !ok { + return fmt.Errorf("spurious object in syncNodes: %v", obj) + } + foundNodeNames.Insert(node.Name) + foundNodes[i] = node + } + oc.setRemoteNodesNoRouter(foundNodes) + // Get the transit router. If it's not present - no cleanup to do + tr := &nbdb.LogicalRouter{ + Name: oc.GetNetworkScopedClusterRouterName(), + } + + tr, err := libovsdbops.GetLogicalRouter(oc.nbClient, tr) + if err != nil { + if errors.Is(err, libovsdbclient.ErrNotFound) { + return nil + } + return err + } + + staleNodeNames := []string{} + for _, p := range tr.Ports { + lp := &nbdb.LogicalRouterPort{ + UUID: p, + } + + lp, err = libovsdbops.GetLogicalRouterPort(oc.nbClient, lp) + if err != nil { + continue + } + + if lp.ExternalIDs == nil { + continue + } + + lportNode := lp.ExternalIDs[types.NodeExternalID] + if !foundNodeNames.Has(lportNode) { + staleNodeNames = append(staleNodeNames, lportNode) + } + } + + for _, staleNodeName := range staleNodeNames { + if err = oc.cleanupRouterSetupForRemoteNodeGR(staleNodeName); err != nil { + klog.Errorf("Failed to cleanup the transit router resources from OVN Northbound db for the stale node %s: %v", staleNodeName, err) + } + } + return nil +} + +// setRemoteNodesNoRouter finds remote nodes that do not use transit router. +func (oc *Layer2UserDefinedNetworkController) setRemoteNodesNoRouter(nodes []*corev1.Node) { + for _, node := range nodes { + if oc.isLocalZoneNode(node) { + continue + } + if !util.UDNLayer2NodeUsesTransitRouter(node) { + oc.remoteNodesNoRouter.Insert(node.Name) + } + } +} diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go index 352ef1497f..6bd0c41373 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "net" - "strconv" "time" ipamclaimsapi "github.com/k8snetworkplumbingwg/ipamclaims/pkg/crd/ipamclaims/v1alpha1" @@ -370,7 +369,6 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { *netConf, ) Expect(err).NotTo(HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} const nodeIPv4CIDR = "192.168.126.202/24" testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR) @@ -388,7 +386,10 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { Expect(err).NotTo(HaveOccurred()) initialDB.NBData = append( initialDB.NBData, - expectedLayer2EgressEntities(networkConfig, *gwConfig, nodeName)...) + expectedGWEntitiesLayer2(nodeName, networkConfig, *gwConfig)...) + initialDB.NBData = append( + initialDB.NBData, + expectedLayer2EgressEntities(networkConfig, *gwConfig, networkConfig.Subnets()[0].CIDR)...) } initialDB.NBData = append(initialDB.NBData, nbZone) @@ -542,112 +543,118 @@ func dummyL2TestPod(nsName string, info userDefinedNetInfo, podIdx, udnNetIdx in return pod } -func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayConfig, nodeName string) []libovsdbtest.TestData { - const ( - nat1 = "nat1-UUID" - nat2 = "nat2-UUID" - nat3 = "nat3-UUID" - perPodSNAT = "pod-snat-UUID" - sr1 = "sr1-UUID" - sr2 = "sr2-UUID" - lrsr1 = "lrsr1-UUID" - routerPolicyUUID1 = "lrp1-UUID" - hostCIDRPolicyUUID = "host-cidr-policy-UUID" - masqSNATUUID1 = "masq-snat1-UUID" - ) - gwRouterName := fmt.Sprintf("GR_%s_test-node", netInfo.GetNetworkName()) - staticRouteOutputPort := ovntypes.GWRouterToExtSwitchPrefix + gwRouterName - gwRouterToNetworkSwitchPortName := ovntypes.RouterToSwitchPrefix + netInfo.GetNetworkScopedName(ovntypes.OVNLayer2Switch) - gwRouterToExtSwitchPortName := fmt.Sprintf("%s%s", ovntypes.GWRouterToExtSwitchPrefix, gwRouterName) - masqSNAT := newMasqueradeManagementNATEntry(masqSNATUUID1, netInfo) - - var nat []string - nat = append(nat, nat1, nat2, nat3, masqSNATUUID1) - gr := &nbdb.LogicalRouter{ - Name: gwRouterName, - UUID: gwRouterName + "-UUID", - Nat: nat, - Ports: []string{gwRouterToNetworkSwitchPortName + "-UUID", gwRouterToExtSwitchPortName + "-UUID"}, - StaticRoutes: []string{sr1, sr2}, - ExternalIDs: gwRouterExternalIDs(netInfo, gwConfig), - Options: gwRouterOptions(gwConfig), - Policies: []string{routerPolicyUUID1}, - } - gr.Options["lb_force_snat_ip"] = gwRouterJoinIPAddress().IP.String() - expectedEntities := []libovsdbtest.TestData{ - gr, - expectedGWToNetworkSwitchRouterPort(gwRouterToNetworkSwitchPortName, netInfo, gwRouterJoinIPAddress(), layer2SubnetGWAddr()), - expectedGRStaticRoute(sr1, dummyMasqueradeSubnet().String(), nextHopMasqueradeIP().String(), nil, &staticRouteOutputPort, netInfo), - expectedGRStaticRoute(sr2, ipv4DefaultRoute().String(), nodeGateway().IP.String(), nil, &staticRouteOutputPort, netInfo), - expectedGRToExternalSwitchLRP(gwRouterName, netInfo, nodePhysicalIPAddress(), udnGWSNATAddress()), - masqSNAT, - expectedLogicalRouterPolicy(routerPolicyUUID1, netInfo, nodeName, nodeIP().IP.String(), managementPortIP(layer2Subnet()).String()), - } +func getTestTransitRouterInfo(netInfo util.NetInfo) *transitRouterInfo { + transitRouterInfo, err := getTransitRouterInfo(netInfo, &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + // this is hardcoded in newNodeWithSecondaryNets + ovnNodeID: "4", + }, + }, + }) + Expect(err).NotTo(HaveOccurred()) + return transitRouterInfo +} +func expectedGWEntitiesLayer2(nodeName string, netInfo util.NetInfo, gwConfig util.L3GatewayConfig) []libovsdbtest.TestData { + gwRouterName := fmt.Sprintf("GR_%s_%s", netInfo.GetNetworkName(), nodeName) + trInfo := getTestTransitRouterInfo(netInfo) + expectedEntities := append( + expectedGWRouterPlusNATAndStaticRoutes(nodeName, gwRouterName, netInfo, gwConfig), + expectedGRToTransitRouterLRPLayer2(gwRouterName, gwRouterJoinIPAddress(), netInfo, trInfo), + expectedGRToExternalSwitchLRP(gwRouterName, netInfo, nodePhysicalIPAddress(), udnGWSNATAddress()), + ) expectedEntities = append(expectedEntities, expectedStaticMACBindings(gwRouterName, staticMACBindingIPs())...) - - if config.Gateway.Mode == config.GatewayModeLocal { - l2LGWLRP := expectedLogicalRouterPolicy(hostCIDRPolicyUUID, netInfo, nodeName, nodeCIDR().String(), managementPortIP(layer2Subnet()).String()) - l2LGWLRP.Match = fmt.Sprintf(`ip4.dst == %s && ip4.src == %s`, nodeCIDR().String(), layer2Subnet().String()) - l2LGWLRP.Priority, _ = strconv.Atoi(ovntypes.UDNHostCIDRPolicyPriority) - expectedEntities = append(expectedEntities, l2LGWLRP) - gr.Policies = append(gr.Policies, hostCIDRPolicyUUID) - lrsr := expectedGRStaticRoute(lrsr1, layer2Subnet().String(), managementPortIP(layer2Subnet()).String(), - &nbdb.LogicalRouterStaticRoutePolicySrcIP, nil, netInfo) - expectedEntities = append(expectedEntities, lrsr) - gr.StaticRoutes = append(gr.StaticRoutes, lrsr1) - } - expectedEntities = append(expectedEntities, expectedExternalSwitchAndLSPs(netInfo, gwConfig, nodeName)...) - expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(nat2, dummyMasqueradeIP().IP.String(), layer2Subnet().String(), standardNonDefaultNetworkExtIDs(netInfo), fmt.Sprintf("outport == %q", gwRouterToExtSwitchPortName))) - expectedEntities = append(expectedEntities, newNATEntry(nat3, dummyMasqueradeIP().IP.String(), layer2SubnetGWAddr().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) return expectedEntities } -func expectedGWToNetworkSwitchRouterPort(name string, netInfo util.NetInfo, networks ...*net.IPNet) *nbdb.LogicalRouterPort { +func expectedGRToTransitRouterLRPLayer2(gatewayRouterName string, gwRouterLRPIP *net.IPNet, netInfo util.NetInfo, + transitRouterInfo *transitRouterInfo) *nbdb.LogicalRouterPort { + lrpName := fmt.Sprintf("%s%s", ovntypes.RouterToTransitRouterPrefix, gatewayRouterName) options := map[string]string{libovsdbops.GatewayMTU: fmt.Sprintf("%d", 1400)} - lrp := expectedLogicalRouterPort(name, netInfo, options, networks...) - - if config.IPv6Mode { - lrp.Ipv6RaConfigs = map[string]string{ - "address_mode": "dhcpv6_stateful", - "mtu": "1400", - "send_periodic": "true", - "max_interval": "900", - "min_interval": "300", - "router_preference": "LOW", - } - } - return lrp -} -func layer2Subnet() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("100.200.0.0"), - Mask: net.CIDRMask(16, 32), + var ips []string + ips = append(ips, gwRouterLRPIP.String()) + ips = append(ips, transitRouterInfo.gatewayRouterNets[0].String()) + mac := util.IPAddrToHWAddr(gwRouterLRPIP.IP).String() + return &nbdb.LogicalRouterPort{ + UUID: lrpName + "-UUID", + Name: lrpName, + Networks: ips, + MAC: mac, + Options: options, + ExternalIDs: map[string]string{ + ovntypes.TopologyExternalID: netInfo.TopologyType(), + ovntypes.NetworkExternalID: netInfo.GetNetworkName(), + }, + Peer: ptr.To(ovntypes.TransitRouterToRouterPrefix + gatewayRouterName), } } -func layer2SubnetGWAddr() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("100.200.0.1"), - Mask: net.CIDRMask(16, 32), +func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayConfig, nodeSubnet *net.IPNet) []libovsdbtest.TestData { + const ( + routerPolicyUUID1 = "lrpol1-UUID" + staticRouteUUID1 = "sr1-UUID" + staticRouteUUID2 = "sr2-UUID" + masqSNATUUID1 = "masq-snat1-UUID" + ) + trInfo := getTestTransitRouterInfo(netInfo) + transitRouterName := fmt.Sprintf("%s_transit_router", netInfo.GetNetworkName()) + + rtosLRPName := fmt.Sprintf("%s%s", ovntypes.TransitRouterToSwitchPrefix, netInfo.GetNetworkScopedName(ovntypes.OVNLayer2Switch)) + rtosLRPUUID := rtosLRPName + "-UUID" + gwRouterName := fmt.Sprintf("GR_%s_%s", netInfo.GetNetworkName(), nodeName) + + rtorLRPName := fmt.Sprintf("%s%s", ovntypes.TransitRouterToRouterPrefix, gwRouterName) + rtorLRPUUID := rtorLRPName + "-UUID" + nodeIP := gwConfig.IPAddresses[0].IP.String() + masqSNAT := newNATEntry(masqSNATUUID1, "169.254.169.14", nodeSubnet.String(), standardNonDefaultNetworkExtIDs(netInfo), "") + masqSNAT.Match = getMasqueradeManagementIPSNATMatch(util.IPAddrToHWAddr(managementPortIP(nodeSubnet)).String()) + masqSNAT.LogicalPort = ptr.To(fmt.Sprintf("trtos-%s", netInfo.GetNetworkScopedName(ovntypes.OVNLayer2Switch))) + if !config.OVNKubernetesFeature.EnableInterconnect { + masqSNAT.GatewayPort = nil } -} - -func nodeGateway() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("192.168.126.1"), - Mask: net.CIDRMask(24, 32), + gwChassisName := fmt.Sprintf("%s-%s", rtosLRPName, gwConfig.ChassisID) + gatewayChassisUUID := gwChassisName + "-UUID" + lrsrNextHop := trInfo.gatewayRouterNets[0].IP.String() + if config.Gateway.Mode == config.GatewayModeLocal { + lrsrNextHop = managementPortIP(nodeSubnet).String() } -} - -func ipv4DefaultRoute() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("0.0.0.0"), - Mask: net.CIDRMask(0, 32), + expectedEntities := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: transitRouterName, + UUID: transitRouterName + "-UUID", + Ports: []string{rtosLRPUUID, rtorLRPUUID}, + StaticRoutes: []string{staticRouteUUID1, staticRouteUUID2}, + Policies: []string{routerPolicyUUID1}, + ExternalIDs: standardNonDefaultNetworkExtIDs(netInfo), + Nat: []string{masqSNATUUID1}, + }, + &nbdb.LogicalRouterPort{ + UUID: rtosLRPUUID, + Name: rtosLRPName, + Networks: []string{"100.200.0.1/16"}, + MAC: "0a:58:64:c8:00:01", + GatewayChassis: []string{gatewayChassisUUID}, + Options: map[string]string{libovsdbops.GatewayMTU: "1400"}, + }, + &nbdb.LogicalRouterPort{ + UUID: rtorLRPUUID, + Name: rtorLRPName, + Networks: []string{trInfo.transitRouterNets[0].String()}, + MAC: util.IPAddrToHWAddr(trInfo.transitRouterNets[0].IP).String(), + Options: map[string]string{libovsdbops.RequestedTnlKey: "4"}, + Peer: ptr.To(fmt.Sprintf("%s%s", ovntypes.RouterToTransitRouterPrefix, gwRouterName)), + ExternalIDs: standardNonDefaultNetworkExtIDs(netInfo), + }, + expectedGRStaticRoute(staticRouteUUID1, nodeSubnet.String(), lrsrNextHop, &nbdb.LogicalRouterStaticRoutePolicySrcIP, nil, netInfo), + expectedGRStaticRoute(staticRouteUUID2, gwRouterJoinIPAddress().IP.String(), trInfo.gatewayRouterNets[0].IP.String(), nil, nil, netInfo), + expectedLogicalRouterPolicy(routerPolicyUUID1, netInfo, nodeName, nodeIP, managementPortIP(nodeSubnet).String()), + masqSNAT, + &nbdb.GatewayChassis{UUID: gatewayChassisUUID, Name: gwChassisName, Priority: 1, ChassisName: gwConfig.ChassisID}, } + return expectedEntities } func dummyLayer2SecondaryUserDefinedNetwork(subnets string) userDefinedNetInfo { @@ -665,20 +672,6 @@ func dummyLayer2PrimaryUserDefinedNetwork(subnets string) userDefinedNetInfo { return secondaryNet } -func nodeIP() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("192.168.126.202"), - Mask: net.CIDRMask(24, 32), - } -} - -func nodeCIDR() *net.IPNet { - return &net.IPNet{ - IP: net.ParseIP("192.168.126.0"), - Mask: net.CIDRMask(24, 32), - } -} - func setupFakeOvnForLayer2Topology(fakeOvn *FakeOVN, initialDB libovsdbtest.TestSetup, netInfo userDefinedNetInfo, testNode *corev1.Node, podInfo testPod, pod *corev1.Pod, extraObjects ...runtime.Object) error { By(fmt.Sprintf("creating a network attachment definition for network: %s", netInfo.netName)) nad, err := newNetworkAttachmentDefinition( @@ -687,7 +680,6 @@ func setupFakeOvnForLayer2Topology(fakeOvn *FakeOVN, initialDB libovsdbtest.Test *netInfo.netconf(), ) Expect(err).NotTo(HaveOccurred()) - nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: userDefinedNetworkID} By("setting up the OVN DB without any entities in it") Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) @@ -703,6 +695,10 @@ func setupFakeOvnForLayer2Topology(fakeOvn *FakeOVN, initialDB libovsdbtest.Test Name: fmt.Sprintf("GR_%s_%s", networkConfig.GetNetworkName(), nodeName), ExternalIDs: standardNonDefaultNetworkExtIDs(networkConfig), }, + &nbdb.LogicalRouter{ + Name: fmt.Sprintf("%s_transit_router", netInfo.netName), + ExternalIDs: standardNonDefaultNetworkExtIDs(networkConfig), + }, newNetworkClusterPortGroup(networkConfig), ) } diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index 815a8b4c9c..0427e7cbeb 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -1059,7 +1059,7 @@ func (oc *Layer3UserDefinedNetworkController) nodeGatewayConfig(node *corev1.Nod return nil, fmt.Errorf("failed to get node %q subnet annotation for network %q: %v", node.Name, oc.GetNetworkName(), err) } - gwLRPJoinIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) + gwRouterJoinNets, err := udn.GetGWRouterIPs(node, oc.GetNetInfo()) if err != nil { return nil, fmt.Errorf("failed extracting node %q GW router join subnet IP for layer3 network %q: %w", node.Name, networkName, err) } @@ -1071,7 +1071,7 @@ func (oc *Layer3UserDefinedNetworkController) nodeGatewayConfig(node *corev1.Nod annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterSubnets, - gwLRPJoinIPs: gwLRPJoinIPs, + gwRouterJoinNets: gwRouterJoinNets, hostAddrs: hostAddrs, externalIPs: externalIPs, ovnClusterLRPToJoinIfAddrs: oc.ovnClusterLRPToJoinIfAddrs, diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go index f9cec964ae..ed17e7b9cb 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go @@ -113,7 +113,6 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { *netInfo.netconf(), ) Expect(err).NotTo(HaveOccurred()) - nad.Annotations = map[string]string{types.OvnNetworkIDAnnotation: userDefinedNetworkID} Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) n := newNamespace(ns) if netInfo.isPrimary { @@ -314,7 +313,6 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { *netConf, ) Expect(err).NotTo(HaveOccurred()) - nad.Annotations = map[string]string{types.OvnNetworkIDAnnotation: userDefinedNetworkID} mutableNetworkConfig := util.NewMutableNetInfo(networkConfig) mutableNetworkConfig.SetNADs(util.GetNADName(nad.Namespace, nad.Name)) @@ -540,9 +538,24 @@ func (sni *userDefinedNetInfo) netconf() *ovncnitypes.NetConf { const plugin = "ovn-k8s-cni-overlay" role := types.NetworkRoleSecondary + transitSubnet := "" if sni.isPrimary { role = types.NetworkRolePrimary + if sni.topology == types.Layer2Topology { + transitSubnets := []string{} + for _, clusterSubnet := range strings.Split(sni.clustersubnets, ",") { + _, cidr, err := net.ParseCIDR(clusterSubnet) + Expect(err).NotTo(HaveOccurred()) + if knet.IsIPv4CIDR(cidr) { + transitSubnets = append(transitSubnets, config.ClusterManager.V4TransitSubnet) + } else { + transitSubnets = append(transitSubnets, config.ClusterManager.V6TransitSubnet) + } + } + transitSubnet = strings.Join(transitSubnets, ",") + } } + return &ovncnitypes.NetConf{ NetConf: cnitypes.NetConf{ Name: sni.netName, @@ -553,6 +566,7 @@ func (sni *userDefinedNetInfo) netconf() *ovncnitypes.NetConf { Subnets: sni.clustersubnets, Role: role, AllowPersistentIPs: sni.allowPersistentIPs, + TransitSubnet: transitSubnet, } } @@ -703,12 +717,10 @@ func expectedGWRouterPlusNATAndStaticRoutes( gwConfig util.L3GatewayConfig, ) []libovsdbtest.TestData { gwRouterToExtLRPUUID := fmt.Sprintf("%s%s-UUID", types.GWRouterToExtSwitchPrefix, gwRouterName) - gwRouterToJoinLRPUUID := fmt.Sprintf("%s%s-UUID", types.GWRouterToJoinSwitchPrefix, gwRouterName) const ( nat1 = "abc-UUID" nat2 = "cba-UUID" - perPodSNAT = "pod-snat-UUID" staticRoute1 = "srA-UUID" staticRoute2 = "srB-UUID" staticRoute3 = "srC-UUID" @@ -716,6 +728,16 @@ func expectedGWRouterPlusNATAndStaticRoutes( ) staticRouteOutputPort := types.GWRouterToExtSwitchPrefix + netInfo.GetNetworkScopedGWRouterName(nodeName) + gwRouterLRPUUID := fmt.Sprintf("%s%s-UUID", types.GWRouterToJoinSwitchPrefix, gwRouterName) + grOptions := gwRouterOptions(gwConfig) + sr1 := expectedGRStaticRoute(staticRoute1, netInfo.Subnets()[0].CIDR.String(), dummyMasqueradeIP().IP.String(), nil, nil, netInfo) + if netInfo.TopologyType() == types.Layer2Topology { + gwRouterLRPUUID = fmt.Sprintf("%s%s-UUID", types.RouterToTransitRouterPrefix, gwRouterName) + grOptions["lb_force_snat_ip"] = gwRouterJoinIPAddress().IP.String() + transitRouteOutputPort := types.RouterToTransitRouterPrefix + netInfo.GetNetworkScopedGWRouterName(nodeName) + trInfo := getTestTransitRouterInfo(netInfo) + sr1 = expectedGRStaticRoute(staticRoute1, netInfo.Subnets()[0].CIDR.String(), trInfo.transitRouterNets[0].IP.String(), nil, &transitRouteOutputPort, netInfo) + } nextHopIP := gwConfig.NextHops[0].String() nextHopMasqIP := nextHopMasqueradeIP().String() masqSubnet := config.Gateway.V4MasqueradeSubnet @@ -726,12 +748,12 @@ func expectedGWRouterPlusNATAndStaticRoutes( Name: gwRouterName, UUID: gwRouterName + "-UUID", ExternalIDs: gwRouterExternalIDs(netInfo, gwConfig), - Options: gwRouterOptions(gwConfig), - Ports: []string{gwRouterToJoinLRPUUID, gwRouterToExtLRPUUID}, + Options: grOptions, + Ports: []string{gwRouterLRPUUID, gwRouterToExtLRPUUID}, Nat: nat, StaticRoutes: []string{staticRoute1, staticRoute2, staticRoute3}, }, - expectedGRStaticRoute(staticRoute1, netInfo.Subnets()[0].CIDR.String(), dummyMasqueradeIP().IP.String(), nil, nil, netInfo), + sr1, expectedGRStaticRoute(staticRoute2, ipv4DefaultRoute, nextHopIP, nil, &staticRouteOutputPort, netInfo), expectedGRStaticRoute(staticRoute3, masqSubnet, nextHopMasqIP, nil, &staticRouteOutputPort, netInfo), } @@ -800,7 +822,6 @@ func expectedLayer3EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC routerPolicyUUID2 = "lrpol2-UUID" staticRouteUUID1 = "sr1-UUID" staticRouteUUID2 = "sr2-UUID" - staticRouteUUID3 = "sr3-UUID" masqSNATUUID1 = "masq-snat1-UUID" ) masqIPAddr := dummyMasqueradeIP().IP.String() @@ -913,18 +934,6 @@ func udnGWSNATAddress() *net.IPNet { } } -func newMasqueradeManagementNATEntry(uuid string, netInfo util.NetInfo) *nbdb.NAT { - masqSNAT := newNATEntry( - uuid, - "169.254.169.14", - layer2Subnet().String(), - standardNonDefaultNetworkExtIDs(netInfo), - getMasqueradeManagementIPSNATMatch(util.IPAddrToHWAddr(managementPortIP(layer2Subnet())).String()), - ) - masqSNAT.LogicalPort = ptr.To(fmt.Sprintf("rtoj-GR_%s_%s", netInfo.GetNetworkName(), nodeName)) - return masqSNAT -} - func newNATEntry(uuid string, externalIP string, logicalIP string, extIDs map[string]string, match string) *nbdb.NAT { return &nbdb.NAT{ UUID: uuid, diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 432aa784be..ddbc260ecc 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -38,7 +38,7 @@ type GatewayConfig struct { annoConfig *util.L3GatewayConfig hostSubnets []*net.IPNet clusterSubnets []*net.IPNet - gwLRPJoinIPs []*net.IPNet + gwRouterJoinNets []*net.IPNet hostAddrs []string externalIPs []net.IP ovnClusterLRPToJoinIfAddrs []*net.IPNet @@ -139,7 +139,7 @@ func (oc *DefaultNetworkController) nodeGatewayConfig(node *corev1.Node) (*Gatew annoConfig: l3GatewayConfig, hostSubnets: hostSubnets, clusterSubnets: clusterSubnets, - gwLRPJoinIPs: gwLRPIPs, + gwRouterJoinNets: gwLRPIPs, hostAddrs: hostAddrs, externalIPs: externalIPs, ovnClusterLRPToJoinIfAddrs: oc.ovnClusterLRPToJoinIfAddrs, diff --git a/go-controller/pkg/ovn/multihoming_test.go b/go-controller/pkg/ovn/multihoming_test.go index cd4f07137a..84643dec7b 100644 --- a/go-controller/pkg/ovn/multihoming_test.go +++ b/go-controller/pkg/ovn/multihoming_test.go @@ -207,23 +207,20 @@ func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsW data = append(data, mgmtPort) nodeslsps[switchName] = append(nodeslsps[switchName], mgmtPortUUID) - networkSwitchToGWRouterLSPName := ovntypes.SwitchToRouterPrefix + switchName - networkSwitchToGWRouterLSPUUID := networkSwitchToGWRouterLSPName + "-UUID" + networkSwitchToTransitRouterLSPName := ovntypes.SwitchToTransitRouterPrefix + switchName + networkSwitchToGWRouterLSPUUID := networkSwitchToTransitRouterLSPName + "-UUID" lsp := &nbdb.LogicalSwitchPort{ UUID: networkSwitchToGWRouterLSPUUID, - Name: networkSwitchToGWRouterLSPName, + Name: networkSwitchToTransitRouterLSPName, Addresses: []string{"router"}, ExternalIDs: map[string]string{ "k8s.ovn.org/topology": ocInfo.bnc.TopologyType(), "k8s.ovn.org/network": ocInfo.bnc.GetNetworkName(), }, - Options: map[string]string{libovsdbops.RouterPort: ovntypes.RouterToSwitchPrefix + switchName}, + Options: map[string]string{libovsdbops.RouterPort: ovntypes.TransitRouterToSwitchPrefix + switchName}, Type: "router", } data = append(data, lsp) - if util.IsNetworkSegmentationSupportEnabled() && ocInfo.bnc.IsPrimaryNetwork() { - lsp.Options[libovsdbops.RequestedTnlKey] = "25" - } nodeslsps[switchName] = append(nodeslsps[switchName], networkSwitchToGWRouterLSPUUID) const aclUUID = "acl1-UUID" @@ -277,7 +274,8 @@ func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsW data = append(data, expectedGWEntities(pod.nodeName, ocInfo.bnc, *em.gatewayConfig)...) data = append(data, expectedLayer3EgressEntities(ocInfo.bnc, *em.gatewayConfig, subnet)...) } else { - data = append(data, expectedLayer2EgressEntities(ocInfo.bnc, *em.gatewayConfig, pod.nodeName)...) + data = append(data, expectedGWEntitiesLayer2(pod.nodeName, ocInfo.bnc, *em.gatewayConfig)...) + data = append(data, expectedLayer2EgressEntities(ocInfo.bnc, *em.gatewayConfig, subnet)...) } } if _, alreadyAdded := alreadyAddedManagementElements[pod.nodeName]; !alreadyAdded && diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 8c0fbcd54e..c4bedb48a8 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -489,8 +489,13 @@ func newNetworkAttachmentDefinition(namespace, name string, netconf ovncnitypes. if err != nil { return nil, fmt.Errorf("failed marshaling podNetworks map %v", netconf) } + meta := newObjectMeta(name, namespace) + meta.Annotations = map[string]string{types.OvnNetworkIDAnnotation: userDefinedNetworkID} + if netconf.Topology == types.Layer2Topology && netconf.Role == types.NetworkRolePrimary { + meta.Annotations[types.OvnNetworkTunnelKeysAnnotation] = "[16711685,16715780]" + } return &nettypes.NetworkAttachmentDefinition{ - ObjectMeta: newObjectMeta(name, namespace), + ObjectMeta: meta, Spec: nettypes.NetworkAttachmentDefinitionSpec{ Config: string(bytes), }, diff --git a/go-controller/pkg/ovn/topology/topologyfactory.go b/go-controller/pkg/ovn/topology/topologyfactory.go index 45738cf85f..ead14e05b2 100644 --- a/go-controller/pkg/ovn/topology/topologyfactory.go +++ b/go-controller/pkg/ovn/topology/topologyfactory.go @@ -40,6 +40,15 @@ func (gtf *GatewayTopologyFactory) NewClusterRouterWithMulticastSupport( return gtf.newClusterRouter(clusterRouterName, netInfo, coopUUID, routerOptions) } +func (gtf *GatewayTopologyFactory) NewTransitRouter( + netInfo util.NetInfo, + coopUUID string, + tunnelKey string, +) (*nbdb.LogicalRouter, error) { + routerOptions := map[string]string{libovsdbops.RequestedTnlKey: tunnelKey} + return gtf.newClusterRouter(netInfo.GetNetworkScopedClusterRouterName(), netInfo, coopUUID, routerOptions) +} + func (gtf *GatewayTopologyFactory) newClusterRouter( clusterRouterName string, netInfo util.NetInfo, diff --git a/go-controller/pkg/ovn/transit_router.go b/go-controller/pkg/ovn/transit_router.go new file mode 100644 index 0000000000..47bd806d9d --- /dev/null +++ b/go-controller/pkg/ovn/transit_router.go @@ -0,0 +1,52 @@ +package ovn + +import ( + "fmt" + "net" + + corev1 "k8s.io/api/core/v1" + + udn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +type transitRouterInfo struct { + gatewayRouterNets, transitRouterNets []*net.IPNet + nodeID int +} + +// getTransitRouterInfo calculates the gateway and cluster router networks for every node based on the node ID. +// we use netInfo.TransitSubnets() to split it into smaller networks. +// For transit-subnet: 100.88.0.0/16, and nodeID=2, we will get: +// - Transit Router IP: 100.88.0.4/31 +// - Gateway Router IP: 100.88.0.5/31 +func getTransitRouterInfo(netInfo util.NetInfo, node *corev1.Node) (*transitRouterInfo, error) { + if netInfo.TopologyType() != types.Layer2Topology || !netInfo.IsPrimaryNetwork() { + return nil, fmt.Errorf("transit router networks are only calculated for primary L2 user defined networks") + } + nodeID, _ := util.GetNodeID(node) + if nodeID == util.InvalidNodeID { + return nil, fmt.Errorf("invalid node id calculating transit router networks") + } + routerInfo := &transitRouterInfo{ + nodeID: nodeID, + } + for _, transitSubnet := range netInfo.TransitSubnets() { + ipGenerator, err := udn.NewIPGenerator(transitSubnet.String()) + if err != nil { + return nil, err + } + transitRouterIP, gatewayRouterIP, err := ipGenerator.GenerateIPPair(nodeID) + if err != nil { + return nil, err + } + + routerInfo.transitRouterNets = append(routerInfo.transitRouterNets, transitRouterIP) + routerInfo.gatewayRouterNets = append(routerInfo.gatewayRouterNets, gatewayRouterIP) + } + if len(routerInfo.transitRouterNets) == 0 || len(routerInfo.gatewayRouterNets) == 0 { + return nil, fmt.Errorf("network %s has no transit subnets defined", netInfo.GetNetworkName()) + } + return routerInfo, nil +} diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index 1549bf5481..23a310c9ab 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -163,7 +163,7 @@ func (zic *ZoneInterconnectHandler) createOrUpdateTransitSwitch(networkID int) e Name: zic.networkTransitSwitchName, ExternalIDs: externalIDs, } - zic.addTransitSwitchConfig(ts, networkID) + zic.addTransitSwitchConfig(ts, BaseTransitSwitchTunnelKey+networkID) // Create transit switch if it doesn't exist if err := libovsdbops.CreateOrUpdateLogicalSwitch(zic.nbClient, ts); err != nil { return fmt.Errorf("failed to create/update transit switch %s: %w", zic.networkTransitSwitchName, err) @@ -339,12 +339,13 @@ func (zic *ZoneInterconnectHandler) Cleanup() error { return libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName) } -func (zic *ZoneInterconnectHandler) AddTransitSwitchConfig(sw *nbdb.LogicalSwitch) error { +// AddTransitSwitchConfig is only used by the layer2 network controller +func (zic *ZoneInterconnectHandler) AddTransitSwitchConfig(sw *nbdb.LogicalSwitch, tunnelKey int) error { if zic.TopologyType() != types.Layer2Topology { return nil } - zic.addTransitSwitchConfig(sw, zic.GetNetworkID()) + zic.addTransitSwitchConfig(sw, tunnelKey) return nil } @@ -370,13 +371,13 @@ func (zic *ZoneInterconnectHandler) AddTransitPortConfig(remote bool, podAnnotat return nil } -func (zic *ZoneInterconnectHandler) addTransitSwitchConfig(sw *nbdb.LogicalSwitch, networkID int) { +func (zic *ZoneInterconnectHandler) addTransitSwitchConfig(sw *nbdb.LogicalSwitch, tunnelKey int) { if sw.OtherConfig == nil { sw.OtherConfig = map[string]string{} } sw.OtherConfig["interconn-ts"] = sw.Name - sw.OtherConfig[libovsdbops.RequestedTnlKey] = strconv.Itoa(BaseTransitSwitchTunnelKey + networkID) + sw.OtherConfig[libovsdbops.RequestedTnlKey] = strconv.Itoa(tunnelKey) sw.OtherConfig["mcast_snoop"] = "true" sw.OtherConfig["mcast_querier"] = "false" sw.OtherConfig["mcast_flood_unregistered"] = "true" diff --git a/go-controller/pkg/ovnwebhook/nodeadmission.go b/go-controller/pkg/ovnwebhook/nodeadmission.go index 15b98db2fc..e7dc733371 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission.go @@ -67,6 +67,13 @@ var interconnectNodeAnnotationChecks = map[string]checkNodeAnnot{ return fmt.Errorf("%s can only be set to %s, it cannot be removed", util.OvnNodeMigratedZoneName, nodeName) }, + util.Layer2TopologyVersion: func(v annotationChange, _ string) error { + // it is allowed for the annotation to be added or removed + if v.action == added || v.action == removed { + return nil + } + return fmt.Errorf("%s can only be added or removed, not updated", util.Layer2TopologyVersion) + }, } // hybridOverlayNodeAnnotationChecks holds annotations allowed for ovnkube-node: users hybrid overlay environments diff --git a/go-controller/pkg/testing/mocks/github.com/containernetworking/cni/pkg/types/Result.go b/go-controller/pkg/testing/mocks/github.com/containernetworking/cni/pkg/types/Result.go index 4fc1e8bd3a..a5ce63ec2a 100644 --- a/go-controller/pkg/testing/mocks/github.com/containernetworking/cni/pkg/types/Result.go +++ b/go-controller/pkg/testing/mocks/github.com/containernetworking/cni/pkg/types/Result.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -44,7 +44,7 @@ func (_m *Result) GetAsVersion(version string) (types.Result, error) { return r0, r1 } -// Print provides a mock function with given fields: +// Print provides a mock function with no fields func (_m *Result) Print() error { ret := _m.Called() @@ -80,7 +80,7 @@ func (_m *Result) PrintTo(writer io.Writer) error { return r0 } -// Version provides a mock function with given fields: +// Version provides a mock function with no fields func (_m *Result) Version() string { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/github.com/containernetworking/plugins/pkg/ns/NetNS.go b/go-controller/pkg/testing/mocks/github.com/containernetworking/plugins/pkg/ns/NetNS.go index db75f12abc..479cde7e30 100644 --- a/go-controller/pkg/testing/mocks/github.com/containernetworking/plugins/pkg/ns/NetNS.go +++ b/go-controller/pkg/testing/mocks/github.com/containernetworking/plugins/pkg/ns/NetNS.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -12,7 +12,7 @@ type NetNS struct { mock.Mock } -// Close provides a mock function with given fields: +// Close provides a mock function with no fields func (_m *NetNS) Close() error { ret := _m.Called() @@ -48,7 +48,7 @@ func (_m *NetNS) Do(toRun func(ns.NetNS) error) error { return r0 } -// Fd provides a mock function with given fields: +// Fd provides a mock function with no fields func (_m *NetNS) Fd() uintptr { ret := _m.Called() @@ -66,7 +66,7 @@ func (_m *NetNS) Fd() uintptr { return r0 } -// Path provides a mock function with given fields: +// Path provides a mock function with no fields func (_m *NetNS) Path() string { ret := _m.Called() @@ -84,7 +84,7 @@ func (_m *NetNS) Path() string { return r0 } -// Set provides a mock function with given fields: +// Set provides a mock function with no fields func (_m *NetNS) Set() error { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionInformer.go b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionInformer.go index 17da81f3f7..0feed7ef73 100644 --- a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionInformer.go +++ b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionInformer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -13,7 +13,7 @@ type NetworkAttachmentDefinitionInformer struct { mock.Mock } -// Informer provides a mock function with given fields: +// Informer provides a mock function with no fields func (_m *NetworkAttachmentDefinitionInformer) Informer() cache.SharedIndexInformer { ret := _m.Called() @@ -33,7 +33,7 @@ func (_m *NetworkAttachmentDefinitionInformer) Informer() cache.SharedIndexInfor return r0 } -// Lister provides a mock function with given fields: +// Lister provides a mock function with no fields func (_m *NetworkAttachmentDefinitionInformer) Lister() k8s_cni_cncf_iov1.NetworkAttachmentDefinitionLister { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionLister.go b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionLister.go index 1b03e0fc71..7092f9bc28 100644 --- a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionLister.go +++ b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionNamespaceLister.go b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionNamespaceLister.go index f725105c5f..9d94ac1233 100644 --- a/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionNamespaceLister.go +++ b/go-controller/pkg/testing/mocks/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1/NetworkAttachmentDefinitionNamespaceLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ManagementPort.go b/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport/Interface.go similarity index 54% rename from go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ManagementPort.go rename to go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport/Interface.go index 0c99fb1bf3..68fcc42a81 100644 --- a/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ManagementPort.go +++ b/go-controller/pkg/testing/mocks/github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport/Interface.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -8,13 +8,13 @@ import ( mock "github.com/stretchr/testify/mock" ) -// ManagementPort is an autogenerated mock type for the ManagementPort type -type ManagementPort struct { +// Interface is an autogenerated mock type for the Interface type +type Interface struct { mock.Mock } -// GetAddresses provides a mock function with given fields: -func (_m *ManagementPort) GetAddresses() []*net.IPNet { +// GetAddresses provides a mock function with no fields +func (_m *Interface) GetAddresses() []*net.IPNet { ret := _m.Called() if len(ret) == 0 { @@ -33,8 +33,8 @@ func (_m *ManagementPort) GetAddresses() []*net.IPNet { return r0 } -// GetInterfaceName provides a mock function with given fields: -func (_m *ManagementPort) GetInterfaceName() string { +// GetInterfaceName provides a mock function with no fields +func (_m *Interface) GetInterfaceName() string { ret := _m.Called() if len(ret) == 0 { @@ -51,13 +51,13 @@ func (_m *ManagementPort) GetInterfaceName() string { return r0 } -// NewManagementPort creates a new instance of ManagementPort. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// NewInterface creates a new instance of Interface. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. -func NewManagementPort(t interface { +func NewInterface(t interface { mock.TestingT Cleanup(func()) -}) *ManagementPort { - mock := &ManagementPort{} +}) *Interface { + mock := &Interface{} mock.Mock.Test(t) t.Cleanup(func() { mock.AssertExpectations(t) }) diff --git a/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink/Link.go b/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink/Link.go index 2ffa594664..956976be77 100644 --- a/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink/Link.go +++ b/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink/Link.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -12,7 +12,7 @@ type Link struct { mock.Mock } -// Attrs provides a mock function with given fields: +// Attrs provides a mock function with no fields func (_m *Link) Attrs() *netlink.LinkAttrs { ret := _m.Called() @@ -32,7 +32,7 @@ func (_m *Link) Attrs() *netlink.LinkAttrs { return r0 } -// Type provides a mock function with given fields: +// Type provides a mock function with no fields func (_m *Link) Type() string { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/NodeInformer.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/NodeInformer.go index e1bbc30420..b23a629d3d 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/NodeInformer.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/NodeInformer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -14,7 +14,7 @@ type NodeInformer struct { mock.Mock } -// Informer provides a mock function with given fields: +// Informer provides a mock function with no fields func (_m *NodeInformer) Informer() cache.SharedIndexInformer { ret := _m.Called() @@ -34,7 +34,7 @@ func (_m *NodeInformer) Informer() cache.SharedIndexInformer { return r0 } -// Lister provides a mock function with given fields: +// Lister provides a mock function with no fields func (_m *NodeInformer) Lister() corev1.NodeLister { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go index 9117816221..1fc4268995 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -14,7 +14,7 @@ type PodInformer struct { mock.Mock } -// Informer provides a mock function with given fields: +// Informer provides a mock function with no fields func (_m *PodInformer) Informer() cache.SharedIndexInformer { ret := _m.Called() @@ -34,7 +34,7 @@ func (_m *PodInformer) Informer() cache.SharedIndexInformer { return r0 } -// Lister provides a mock function with given fields: +// Lister provides a mock function with no fields func (_m *PodInformer) Lister() corev1.PodLister { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NodeLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NodeLister.go index db2fe8bad2..062997c853 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NodeLister.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NodeLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodLister.go index 6406289383..614f3771f0 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodLister.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodListerExpansion.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodListerExpansion.go index b2b51db195..e9a83a363a 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodListerExpansion.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodListerExpansion.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceLister.go index 3787637eaf..02de951efd 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceLister.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceListerExpansion.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceListerExpansion.go index 5b44e905c6..a940e46524 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceListerExpansion.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodNamespaceListerExpansion.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateLister.go index 949bf133dc..b93a4d1809 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateLister.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateListerExpansion.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateListerExpansion.go index 0b831422d3..3d0f1ebb0a 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateListerExpansion.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateListerExpansion.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceLister.go index db55859afe..eb734298b7 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceLister.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceLister.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceListerExpansion.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceListerExpansion.go index f94253da08..eb12bf0fce 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceListerExpansion.go +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/PodTemplateNamespaceListerExpansion.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Cmd.go b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Cmd.go index 4c1db92025..4cce4a405e 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Cmd.go +++ b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Cmd.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -13,7 +13,7 @@ type Cmd struct { mock.Mock } -// CombinedOutput provides a mock function with given fields: +// CombinedOutput provides a mock function with no fields func (_m *Cmd) CombinedOutput() ([]byte, error) { ret := _m.Called() @@ -43,7 +43,7 @@ func (_m *Cmd) CombinedOutput() ([]byte, error) { return r0, r1 } -// Output provides a mock function with given fields: +// Output provides a mock function with no fields func (_m *Cmd) Output() ([]byte, error) { ret := _m.Called() @@ -73,7 +73,7 @@ func (_m *Cmd) Output() ([]byte, error) { return r0, r1 } -// Run provides a mock function with given fields: +// Run provides a mock function with no fields func (_m *Cmd) Run() error { ret := _m.Called() @@ -116,7 +116,7 @@ func (_m *Cmd) SetStdout(out io.Writer) { _m.Called(out) } -// Start provides a mock function with given fields: +// Start provides a mock function with no fields func (_m *Cmd) Start() error { ret := _m.Called() @@ -134,7 +134,7 @@ func (_m *Cmd) Start() error { return r0 } -// StderrPipe provides a mock function with given fields: +// StderrPipe provides a mock function with no fields func (_m *Cmd) StderrPipe() (io.ReadCloser, error) { ret := _m.Called() @@ -164,7 +164,7 @@ func (_m *Cmd) StderrPipe() (io.ReadCloser, error) { return r0, r1 } -// StdoutPipe provides a mock function with given fields: +// StdoutPipe provides a mock function with no fields func (_m *Cmd) StdoutPipe() (io.ReadCloser, error) { ret := _m.Called() @@ -194,12 +194,12 @@ func (_m *Cmd) StdoutPipe() (io.ReadCloser, error) { return r0, r1 } -// Stop provides a mock function with given fields: +// Stop provides a mock function with no fields func (_m *Cmd) Stop() { _m.Called() } -// Wait provides a mock function with given fields: +// Wait provides a mock function with no fields func (_m *Cmd) Wait() error { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/ExitError.go b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/ExitError.go index 1acd93ed11..837411f2da 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/ExitError.go +++ b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/ExitError.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -9,7 +9,7 @@ type ExitError struct { mock.Mock } -// Error provides a mock function with given fields: +// Error provides a mock function with no fields func (_m *ExitError) Error() string { ret := _m.Called() @@ -27,7 +27,7 @@ func (_m *ExitError) Error() string { return r0 } -// ExitStatus provides a mock function with given fields: +// ExitStatus provides a mock function with no fields func (_m *ExitError) ExitStatus() int { ret := _m.Called() @@ -45,7 +45,7 @@ func (_m *ExitError) ExitStatus() int { return r0 } -// Exited provides a mock function with given fields: +// Exited provides a mock function with no fields func (_m *ExitError) Exited() bool { ret := _m.Called() @@ -63,7 +63,7 @@ func (_m *ExitError) Exited() bool { return r0 } -// String provides a mock function with given fields: +// String provides a mock function with no fields func (_m *ExitError) String() string { ret := _m.Called() diff --git a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Interface.go b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Interface.go index 0b4b0a8f9c..a1523dc53b 100644 --- a/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Interface.go +++ b/go-controller/pkg/testing/mocks/k8s.io/utils/exec/Interface.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 20fdf23d31..2852cb624f 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -32,10 +32,6 @@ const ( // access to local service LocalNetworkName = "locnet" - // Local Bridge used for DGP access - LocalBridgeName = "br-local" - LocalnetGatewayNextHopPort = "ovn-k8s-gw0" - // OVS Bridge Datapath types DatapathUserspace = "netdev" @@ -43,25 +39,19 @@ const ( OVNClusterRouter = "ovn_cluster_router" OVNJoinSwitch = "join" - JoinSwitchPrefix = "join_" - ExternalSwitchPrefix = "ext_" - GWRouterPrefix = "GR_" - GWRouterLocalLBPostfix = "_local" - RouterToSwitchPrefix = "rtos-" - InterPrefix = "inter-" - HybridSubnetPrefix = "hybrid-subnet-" - SwitchToRouterPrefix = "stor-" - JoinSwitchToGWRouterPrefix = "jtor-" - GWRouterToJoinSwitchPrefix = "rtoj-" - DistRouterToJoinSwitchPrefix = "dtoj-" - JoinSwitchToDistRouterPrefix = "jtod-" - EXTSwitchToGWRouterPrefix = "etor-" - GWRouterToExtSwitchPrefix = "rtoe-" - EgressGWSwitchPrefix = "exgw-" - PatchPortPrefix = "patch-" - PatchPortSuffix = "-to-br-int" - - NodeLocalSwitch = "node_local_switch" + JoinSwitchPrefix = "join_" + ExternalSwitchPrefix = "ext_" + GWRouterPrefix = "GR_" + RouterToSwitchPrefix = "rtos-" + HybridSubnetPrefix = "hybrid-subnet-" + SwitchToRouterPrefix = "stor-" + JoinSwitchToGWRouterPrefix = "jtor-" + GWRouterToJoinSwitchPrefix = "rtoj-" + EXTSwitchToGWRouterPrefix = "etor-" + GWRouterToExtSwitchPrefix = "rtoe-" + EgressGWSwitchPrefix = "exgw-" + PatchPortPrefix = "patch-" + PatchPortSuffix = "-to-br-int" // types.OVNLayer2Switch is the name of layer2 topology switch OVNLayer2Switch = "ovn_layer2_switch" @@ -73,6 +63,11 @@ const ( TransitSwitch = "transit_switch" TransitSwitchToRouterPrefix = "tstor-" RouterToTransitSwitchPrefix = "rtots-" + TransitRouter = "transit_router" + TransitRouterToRouterPrefix = "trtor-" + RouterToTransitRouterPrefix = "rtotr-" + TransitRouterToSwitchPrefix = "trtos-" + SwitchToTransitRouterPrefix = "stotr-" // DefaultACLTier Priorities @@ -169,7 +164,9 @@ const ( // OvnNetworkIDAnnotation is a unique network identifier annotated on the // NAD by cluster manager nad controller OvnNetworkIDAnnotation = OvnK8sPrefix + "/network-id" - + // OvnNetworkTunnelKeysAnnotation is used to assign tunnel keys for the distributed switches and routers + // Assigned to the NADs for now + OvnNetworkTunnelKeysAnnotation = OvnK8sPrefix + "/tunnel-keys" // Deprecated: we used to set topology version as an annotation on the node. We don't do this anymore. OvnK8sTopoAnno = OvnK8sPrefix + "/" + "topology-version" OvnK8sSmallMTUTaintKey = OvnK8sPrefix + "/" + "mtu-too-small" diff --git a/go-controller/pkg/util/mocks/DNSOps.go b/go-controller/pkg/util/mocks/DNSOps.go index 1ca8487d09..b2e5d6cda6 100644 --- a/go-controller/pkg/util/mocks/DNSOps.go +++ b/go-controller/pkg/util/mocks/DNSOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/util/mocks/ExecRunner.go b/go-controller/pkg/util/mocks/ExecRunner.go index 88afca4180..a8fe015ea2 100644 --- a/go-controller/pkg/util/mocks/ExecRunner.go +++ b/go-controller/pkg/util/mocks/ExecRunner.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/util/mocks/FileSystemOps.go b/go-controller/pkg/util/mocks/FileSystemOps.go index c7c8bdbcb8..e075dbf0ed 100644 --- a/go-controller/pkg/util/mocks/FileSystemOps.go +++ b/go-controller/pkg/util/mocks/FileSystemOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/util/mocks/NetLinkOps.go b/go-controller/pkg/util/mocks/NetLinkOps.go index d9cd045b54..a72e496715 100644 --- a/go-controller/pkg/util/mocks/NetLinkOps.go +++ b/go-controller/pkg/util/mocks/NetLinkOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -230,7 +230,7 @@ func (_m *NetLinkOps) LinkDelete(link netlink.Link) error { return r0 } -// LinkList provides a mock function with given fields: +// LinkList provides a mock function with no fields func (_m *NetLinkOps) LinkList() ([]netlink.Link, error) { ret := _m.Called() diff --git a/go-controller/pkg/util/mocks/SriovnetOps.go b/go-controller/pkg/util/mocks/SriovnetOps.go index ea6f4560b3..ba798aa9b3 100644 --- a/go-controller/pkg/util/mocks/SriovnetOps.go +++ b/go-controller/pkg/util/mocks/SriovnetOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/util/mocks/VdpaDevice.go b/go-controller/pkg/util/mocks/VdpaDevice.go index f3b21787df..c18e8039c8 100644 --- a/go-controller/pkg/util/mocks/VdpaDevice.go +++ b/go-controller/pkg/util/mocks/VdpaDevice.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -12,7 +12,7 @@ type VdpaDevice struct { mock.Mock } -// Driver provides a mock function with given fields: +// Driver provides a mock function with no fields func (_m *VdpaDevice) Driver() string { ret := _m.Called() @@ -30,7 +30,7 @@ func (_m *VdpaDevice) Driver() string { return r0 } -// MgmtDev provides a mock function with given fields: +// MgmtDev provides a mock function with no fields func (_m *VdpaDevice) MgmtDev() kvdpa.MgmtDev { ret := _m.Called() @@ -50,7 +50,7 @@ func (_m *VdpaDevice) MgmtDev() kvdpa.MgmtDev { return r0 } -// Name provides a mock function with given fields: +// Name provides a mock function with no fields func (_m *VdpaDevice) Name() string { ret := _m.Called() @@ -68,7 +68,7 @@ func (_m *VdpaDevice) Name() string { return r0 } -// ParentDevicePath provides a mock function with given fields: +// ParentDevicePath provides a mock function with no fields func (_m *VdpaDevice) ParentDevicePath() (string, error) { ret := _m.Called() @@ -96,7 +96,7 @@ func (_m *VdpaDevice) ParentDevicePath() (string, error) { return r0, r1 } -// VhostVdpa provides a mock function with given fields: +// VhostVdpa provides a mock function with no fields func (_m *VdpaDevice) VhostVdpa() kvdpa.VhostVdpa { ret := _m.Called() @@ -116,7 +116,7 @@ func (_m *VdpaDevice) VhostVdpa() kvdpa.VhostVdpa { return r0 } -// VirtioNet provides a mock function with given fields: +// VirtioNet provides a mock function with no fields func (_m *VdpaDevice) VirtioNet() kvdpa.VirtioNet { ret := _m.Called() diff --git a/go-controller/pkg/util/mocks/VdpaOps.go b/go-controller/pkg/util/mocks/VdpaOps.go index 7f82bb5183..ac5b475c07 100644 --- a/go-controller/pkg/util/mocks/VdpaOps.go +++ b/go-controller/pkg/util/mocks/VdpaOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks diff --git a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go index 42e5808356..50edd3ef0a 100644 --- a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go +++ b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.43.2. DO NOT EDIT. +// Code generated by mockery v2.53.4. DO NOT EDIT. package mocks @@ -16,45 +16,7 @@ type NetInfo struct { mock.Mock } -func (_m *NetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { - ret := _m.Called(hostSubnet) - - if len(ret) == 0 { - panic("no return value specified for GetNodeGatewayIP") - } - - var r0 *net.IPNet - if rf, ok := ret.Get(0).(func(*net.IPNet) *net.IPNet); ok { - r0 = rf(hostSubnet) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*net.IPNet) - } - } - - return r0 -} - -func (_m *NetInfo) GetNodeManagementIP(hostSubnet *net.IPNet) *net.IPNet { - ret := _m.Called(hostSubnet) - - if len(ret) == 0 { - panic("no return value specified for GetNodeManagementIP") - } - - var r0 *net.IPNet - if rf, ok := ret.Get(0).(func(*net.IPNet) *net.IPNet); ok { - r0 = rf(hostSubnet) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*net.IPNet) - } - } - - return r0 -} - -// AllowsPersistentIPs provides a mock function with given fields: +// AllowsPersistentIPs provides a mock function with no fields func (_m *NetInfo) AllowsPersistentIPs() bool { ret := _m.Called() @@ -96,7 +58,7 @@ func (_m *NetInfo) EqualNADs(nads ...string) bool { return r0 } -// ExcludeSubnets provides a mock function with given fields: +// ExcludeSubnets provides a mock function with no fields func (_m *NetInfo) ExcludeSubnets() []*net.IPNet { ret := _m.Called() @@ -116,47 +78,7 @@ func (_m *NetInfo) ExcludeSubnets() []*net.IPNet { return r0 } -// InfrastructureSubnets provides a mock function with given fields: -func (_m *NetInfo) InfrastructureSubnets() []*net.IPNet { - ret := _m.Called() - - if len(ret) == 0 { - panic("no return value specified for InfrastructureSubnets") - } - - var r0 []*net.IPNet - if rf, ok := ret.Get(0).(func() []*net.IPNet); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]*net.IPNet) - } - } - - return r0 -} - -// ReservedSubnets provides a mock function with given fields: -func (_m *NetInfo) ReservedSubnets() []*net.IPNet { - ret := _m.Called() - - if len(ret) == 0 { - panic("no return value specified for ReservedSubnets") - } - - var r0 []*net.IPNet - if rf, ok := ret.Get(0).(func() []*net.IPNet); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]*net.IPNet) - } - } - - return r0 -} - -// GetEgressIPAdvertisedNodes provides a mock function with given fields: +// GetEgressIPAdvertisedNodes provides a mock function with no fields func (_m *NetInfo) GetEgressIPAdvertisedNodes() []string { ret := _m.Called() @@ -196,7 +118,7 @@ func (_m *NetInfo) GetEgressIPAdvertisedOnNodeVRFs(node string) []string { return r0 } -// GetEgressIPAdvertisedVRFs provides a mock function with given fields: +// GetEgressIPAdvertisedVRFs provides a mock function with no fields func (_m *NetInfo) GetEgressIPAdvertisedVRFs() map[string][]string { ret := _m.Called() @@ -216,7 +138,7 @@ func (_m *NetInfo) GetEgressIPAdvertisedVRFs() map[string][]string { return r0 } -// GetNADNamespaces provides a mock function with given fields: +// GetNADNamespaces provides a mock function with no fields func (_m *NetInfo) GetNADNamespaces() []string { ret := _m.Called() @@ -236,7 +158,7 @@ func (_m *NetInfo) GetNADNamespaces() []string { return r0 } -// GetNADs provides a mock function with given fields: +// GetNADs provides a mock function with no fields func (_m *NetInfo) GetNADs() []string { ret := _m.Called() @@ -256,7 +178,7 @@ func (_m *NetInfo) GetNADs() []string { return r0 } -// GetNetInfo provides a mock function with given fields: +// GetNetInfo provides a mock function with no fields func (_m *NetInfo) GetNetInfo() util.NetInfo { ret := _m.Called() @@ -276,7 +198,7 @@ func (_m *NetInfo) GetNetInfo() util.NetInfo { return r0 } -// GetNetworkID provides a mock function with given fields: +// GetNetworkID provides a mock function with no fields func (_m *NetInfo) GetNetworkID() int { ret := _m.Called() @@ -294,7 +216,7 @@ func (_m *NetInfo) GetNetworkID() int { return r0 } -// GetNetworkName provides a mock function with given fields: +// GetNetworkName provides a mock function with no fields func (_m *NetInfo) GetNetworkName() string { ret := _m.Called() @@ -312,7 +234,7 @@ func (_m *NetInfo) GetNetworkName() string { return r0 } -// GetNetworkScopedClusterRouterName provides a mock function with given fields: +// GetNetworkScopedClusterRouterName provides a mock function with no fields func (_m *NetInfo) GetNetworkScopedClusterRouterName() string { ret := _m.Called() @@ -330,24 +252,6 @@ func (_m *NetInfo) GetNetworkScopedClusterRouterName() string { return r0 } -// GetNetworkScopedClusterSubnetSNATMatch provides a mock function with given fields: nodeName -func (_m *NetInfo) GetNetworkScopedClusterSubnetSNATMatch(nodeName string) string { - ret := _m.Called(nodeName) - - if len(ret) == 0 { - panic("no return value specified for GetNetworkScopedClusterSubnetSNATMatch") - } - - var r0 string - if rf, ok := ret.Get(0).(func(string) string); ok { - r0 = rf(nodeName) - } else { - r0 = ret.Get(0).(string) - } - - return r0 -} - // GetNetworkScopedExtPortName provides a mock function with given fields: bridgeID, nodeName func (_m *NetInfo) GetNetworkScopedExtPortName(bridgeID string, nodeName string) string { ret := _m.Called(bridgeID, nodeName) @@ -402,7 +306,7 @@ func (_m *NetInfo) GetNetworkScopedGWRouterName(nodeName string) string { return r0 } -// GetNetworkScopedJoinSwitchName provides a mock function with given fields: +// GetNetworkScopedJoinSwitchName provides a mock function with no fields func (_m *NetInfo) GetNetworkScopedJoinSwitchName() string { ret := _m.Called() @@ -528,6 +432,46 @@ func (_m *NetInfo) GetNetworkScopedSwitchName(nodeName string) string { return r0 } +// GetNodeGatewayIP provides a mock function with given fields: hostSubnet +func (_m *NetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { + ret := _m.Called(hostSubnet) + + if len(ret) == 0 { + panic("no return value specified for GetNodeGatewayIP") + } + + var r0 *net.IPNet + if rf, ok := ret.Get(0).(func(*net.IPNet) *net.IPNet); ok { + r0 = rf(hostSubnet) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*net.IPNet) + } + } + + return r0 +} + +// GetNodeManagementIP provides a mock function with given fields: hostSubnet +func (_m *NetInfo) GetNodeManagementIP(hostSubnet *net.IPNet) *net.IPNet { + ret := _m.Called(hostSubnet) + + if len(ret) == 0 { + panic("no return value specified for GetNodeManagementIP") + } + + var r0 *net.IPNet + if rf, ok := ret.Get(0).(func(*net.IPNet) *net.IPNet); ok { + r0 = rf(hostSubnet) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*net.IPNet) + } + } + + return r0 +} + // GetPodNetworkAdvertisedOnNodeVRFs provides a mock function with given fields: node func (_m *NetInfo) GetPodNetworkAdvertisedOnNodeVRFs(node string) []string { ret := _m.Called(node) @@ -548,7 +492,7 @@ func (_m *NetInfo) GetPodNetworkAdvertisedOnNodeVRFs(node string) []string { return r0 } -// GetPodNetworkAdvertisedVRFs provides a mock function with given fields: +// GetPodNetworkAdvertisedVRFs provides a mock function with no fields func (_m *NetInfo) GetPodNetworkAdvertisedVRFs() map[string][]string { ret := _m.Called() @@ -568,6 +512,26 @@ func (_m *NetInfo) GetPodNetworkAdvertisedVRFs() map[string][]string { return r0 } +// GetTunnelKeys provides a mock function with no fields +func (_m *NetInfo) GetTunnelKeys() []int { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for GetTunnelKeys") + } + + var r0 []int + if rf, ok := ret.Get(0).(func() []int); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]int) + } + } + + return r0 +} + // HasNAD provides a mock function with given fields: nadName func (_m *NetInfo) HasNAD(nadName string) bool { ret := _m.Called(nadName) @@ -586,7 +550,7 @@ func (_m *NetInfo) HasNAD(nadName string) bool { return r0 } -// IPMode provides a mock function with given fields: +// IPMode provides a mock function with no fields func (_m *NetInfo) IPMode() (bool, bool) { ret := _m.Called() @@ -614,7 +578,27 @@ func (_m *NetInfo) IPMode() (bool, bool) { return r0, r1 } -// IsDefault provides a mock function with given fields: +// InfrastructureSubnets provides a mock function with no fields +func (_m *NetInfo) InfrastructureSubnets() []*net.IPNet { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for InfrastructureSubnets") + } + + var r0 []*net.IPNet + if rf, ok := ret.Get(0).(func() []*net.IPNet); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*net.IPNet) + } + } + + return r0 +} + +// IsDefault provides a mock function with no fields func (_m *NetInfo) IsDefault() bool { ret := _m.Called() @@ -632,7 +616,7 @@ func (_m *NetInfo) IsDefault() bool { return r0 } -// IsPrimaryNetwork provides a mock function with given fields: +// IsPrimaryNetwork provides a mock function with no fields func (_m *NetInfo) IsPrimaryNetwork() bool { ret := _m.Called() @@ -650,7 +634,7 @@ func (_m *NetInfo) IsPrimaryNetwork() bool { return r0 } -// IsSecondary provides a mock function with given fields: +// IsUserDefinedNetwork provides a mock function with no fields func (_m *NetInfo) IsUserDefinedNetwork() bool { ret := _m.Called() @@ -668,7 +652,7 @@ func (_m *NetInfo) IsUserDefinedNetwork() bool { return r0 } -// JoinSubnetV4 provides a mock function with given fields: +// JoinSubnetV4 provides a mock function with no fields func (_m *NetInfo) JoinSubnetV4() *net.IPNet { ret := _m.Called() @@ -688,7 +672,7 @@ func (_m *NetInfo) JoinSubnetV4() *net.IPNet { return r0 } -// JoinSubnetV6 provides a mock function with given fields: +// JoinSubnetV6 provides a mock function with no fields func (_m *NetInfo) JoinSubnetV6() *net.IPNet { ret := _m.Called() @@ -708,7 +692,7 @@ func (_m *NetInfo) JoinSubnetV6() *net.IPNet { return r0 } -// JoinSubnets provides a mock function with given fields: +// JoinSubnets provides a mock function with no fields func (_m *NetInfo) JoinSubnets() []*net.IPNet { ret := _m.Called() @@ -728,7 +712,7 @@ func (_m *NetInfo) JoinSubnets() []*net.IPNet { return r0 } -// MTU provides a mock function with given fields: +// MTU provides a mock function with no fields func (_m *NetInfo) MTU() int { ret := _m.Called() @@ -746,7 +730,7 @@ func (_m *NetInfo) MTU() int { return r0 } -// PhysicalNetworkName provides a mock function with given fields: +// PhysicalNetworkName provides a mock function with no fields func (_m *NetInfo) PhysicalNetworkName() string { ret := _m.Called() @@ -782,7 +766,27 @@ func (_m *NetInfo) RemoveNetworkScopeFromName(name string) string { return r0 } -// Subnets provides a mock function with given fields: +// ReservedSubnets provides a mock function with no fields +func (_m *NetInfo) ReservedSubnets() []*net.IPNet { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for ReservedSubnets") + } + + var r0 []*net.IPNet + if rf, ok := ret.Get(0).(func() []*net.IPNet); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*net.IPNet) + } + } + + return r0 +} + +// Subnets provides a mock function with no fields func (_m *NetInfo) Subnets() []config.CIDRNetworkEntry { ret := _m.Called() @@ -802,7 +806,7 @@ func (_m *NetInfo) Subnets() []config.CIDRNetworkEntry { return r0 } -// TopologyType provides a mock function with given fields: +// TopologyType provides a mock function with no fields func (_m *NetInfo) TopologyType() string { ret := _m.Called() @@ -820,7 +824,27 @@ func (_m *NetInfo) TopologyType() string { return r0 } -// Vlan provides a mock function with given fields: +// TransitSubnets provides a mock function with no fields +func (_m *NetInfo) TransitSubnets() []*net.IPNet { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for TransitSubnets") + } + + var r0 []*net.IPNet + if rf, ok := ret.Get(0).(func() []*net.IPNet); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*net.IPNet) + } + } + + return r0 +} + +// Vlan provides a mock function with no fields func (_m *NetInfo) Vlan() uint { ret := _m.Called() diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 6a8075f2b9..0c45070159 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -1,10 +1,12 @@ package util import ( + "encoding/json" "errors" "fmt" "net" "reflect" + "slices" "strconv" "strings" "sync" @@ -17,6 +19,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" knet "k8s.io/utils/net" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" @@ -34,6 +37,7 @@ type NetInfo interface { // static information, not expected to change. GetNetworkName() string GetNetworkID() int + GetTunnelKeys() []int IsDefault() bool IsPrimaryNetwork() bool IsUserDefinedNetwork() bool @@ -47,6 +51,7 @@ type NetInfo interface { JoinSubnetV4() *net.IPNet JoinSubnetV6() *net.IPNet JoinSubnets() []*net.IPNet + TransitSubnets() []*net.IPNet Vlan() uint AllowsPersistentIPs() bool PhysicalNetworkName() string @@ -107,6 +112,7 @@ type MutableNetInfo interface { // SetNetworkID sets the network ID before any controller handles the // network SetNetworkID(id int) + SetTunnelKeys(keys []int) // NADs referencing a network SetNADs(nadName ...string) @@ -221,7 +227,8 @@ type mutableNetInfo struct { // id of the network. It's mutable because is set on day-1 but it can't be // changed or reconciled on day-2 - id int + id int + tunnelKeys []int nads sets.Set[string] podNetworkAdvertisements map[string][]string @@ -264,6 +271,7 @@ func (l *mutableNetInfo) equals(r *mutableNetInfo) bool { r.RLock() defer r.RUnlock() return reflect.DeepEqual(l.id, r.id) && + reflect.DeepEqual(l.tunnelKeys, r.tunnelKeys) && reflect.DeepEqual(l.nads, r.nads) && reflect.DeepEqual(l.podNetworkAdvertisements, r.podNetworkAdvertisements) && reflect.DeepEqual(l.eipAdvertisements, r.eipAdvertisements) @@ -276,6 +284,7 @@ func (l *mutableNetInfo) copyFrom(r *mutableNetInfo) { aux := mutableNetInfo{} r.RLock() aux.id = r.id + aux.tunnelKeys = slices.Clone(r.tunnelKeys) aux.nads = r.nads.Clone() aux.setPodNetworkAdvertisedOnVRFs(r.podNetworkAdvertisements) aux.setEgressIPAdvertisedAtNodes(r.eipAdvertisements) @@ -284,6 +293,7 @@ func (l *mutableNetInfo) copyFrom(r *mutableNetInfo) { l.Lock() defer l.Unlock() l.id = aux.id + l.tunnelKeys = aux.tunnelKeys l.nads = aux.nads l.podNetworkAdvertisements = aux.podNetworkAdvertisements l.eipAdvertisements = aux.eipAdvertisements @@ -302,6 +312,18 @@ func (nInfo *mutableNetInfo) SetNetworkID(id int) { nInfo.id = id } +func (nInfo *mutableNetInfo) GetTunnelKeys() []int { + nInfo.RLock() + defer nInfo.RUnlock() + return nInfo.tunnelKeys +} + +func (nInfo *mutableNetInfo) SetTunnelKeys(tunnelKeys []int) { + nInfo.Lock() + defer nInfo.Unlock() + nInfo.tunnelKeys = tunnelKeys +} + func (nInfo *mutableNetInfo) SetPodNetworkAdvertisedVRFs(podAdvertisements map[string][]string) { nInfo.Lock() defer nInfo.Unlock() @@ -629,6 +651,12 @@ func (nInfo *DefaultNetInfo) JoinSubnets() []*net.IPNet { return defaultJoinSubnets } +// TransitSubnets should not be used for the default network. +// It will return an empty list since transit networks are not set for this type of network. +func (nInfo *DefaultNetInfo) TransitSubnets() []*net.IPNet { + return []*net.IPNet{} +} + // Vlan returns the defaultNetConfInfo's Vlan value func (nInfo *DefaultNetInfo) Vlan() uint { return config.Gateway.VLANID @@ -671,6 +699,7 @@ type userDefinedNetInfo struct { reservedSubnets []*net.IPNet infrastructureSubnets []*net.IPNet joinSubnets []*net.IPNet + transitSubnets []*net.IPNet physicalNetworkName string defaultGatewayIPs []net.IP @@ -721,6 +750,9 @@ func (nInfo *userDefinedNetInfo) GetNetworkScopedK8sMgmtIntfName(nodeName string } func (nInfo *userDefinedNetInfo) GetNetworkScopedClusterRouterName() string { + if nInfo.TopologyType() == types.Layer2Topology { + return nInfo.GetNetworkScopedName(types.TransitRouter) + } return nInfo.GetNetworkScopedName(types.OVNClusterRouter) } @@ -864,6 +896,12 @@ func (nInfo *userDefinedNetInfo) JoinSubnets() []*net.IPNet { return nInfo.joinSubnets } +// TransitSubnets returns the userDefinedNetInfo's transit subnet values (both v4&v6) +// For now it is only set for Primary Layer2 UDNs, otherwise is empty +func (nInfo *userDefinedNetInfo) TransitSubnets() []*net.IPNet { + return nInfo.transitSubnets +} + func (nInfo *userDefinedNetInfo) canReconcile(other NetInfo) bool { if (nInfo == nil) != (other == nil) { return false @@ -913,7 +951,10 @@ func (nInfo *userDefinedNetInfo) canReconcile(other NetInfo) bool { if !cmp.Equal(nInfo.infrastructureSubnets, other.InfrastructureSubnets(), cmpopts.SortSlices(lessIPNet)) { return false } - return cmp.Equal(nInfo.joinSubnets, other.JoinSubnets(), cmpopts.SortSlices(lessIPNet)) + if !cmp.Equal(nInfo.joinSubnets, other.JoinSubnets(), cmpopts.SortSlices(lessIPNet)) { + return false + } + return cmp.Equal(nInfo.transitSubnets, other.TransitSubnets(), cmpopts.SortSlices(lessIPNet)) } func (nInfo *userDefinedNetInfo) copy() *userDefinedNetInfo { @@ -932,6 +973,7 @@ func (nInfo *userDefinedNetInfo) copy() *userDefinedNetInfo { reservedSubnets: nInfo.reservedSubnets, infrastructureSubnets: nInfo.infrastructureSubnets, joinSubnets: nInfo.joinSubnets, + transitSubnets: nInfo.transitSubnets, physicalNetworkName: nInfo.physicalNetworkName, defaultGatewayIPs: nInfo.defaultGatewayIPs, managementIPs: nInfo.managementIPs, @@ -1005,6 +1047,11 @@ func newLayer2NetConfInfo(netconf *ovncnitypes.NetConf) (MutableNetInfo, error) return nil, err } + transitSubnets, err := parseTransitSubnet(netconf.Role, netconf.TransitSubnet) + if err != nil { + return nil, fmt.Errorf("invalid transit subnet for %s netconf %s: %v", netconf.Topology, netconf.Name, err) + } + // Allocate infrastructure IPs for primary networks var defaultGatewayIPs, managementIPs []net.IP if IsPreconfiguredUDNAddressesEnabled() && netconf.Role == types.NetworkRolePrimary { @@ -1020,6 +1067,7 @@ func newLayer2NetConfInfo(netconf *ovncnitypes.NetConf) (MutableNetInfo, error) topology: types.Layer2Topology, subnets: subnets, joinSubnets: joinSubnets, + transitSubnets: transitSubnets, excludeSubnets: excludes, reservedSubnets: reserved, infrastructureSubnets: infra, @@ -1158,6 +1206,15 @@ func parseJoinSubnet(joinSubnet string) ([]*net.IPNet, error) { return joinSubnets, nil } +func parseTransitSubnet(netconfRole, transitSubnet string) ([]*net.IPNet, error) { + transitSubnets := []*net.IPNet{} + if netconfRole != types.NetworkRolePrimary { + // only primary networks can have transit subnet + return transitSubnets, nil + } + return parseSubnetList(transitSubnet) +} + func getIPMode(subnets []config.CIDRNetworkEntry) (bool, bool) { var ipv6Mode, ipv4Mode bool for _, subnet := range subnets { @@ -1261,9 +1318,15 @@ func ParseNADInfo(nad *nettypes.NetworkAttachmentDefinition) (NetInfo, error) { return nil, fmt.Errorf("failed to parse annotated network ID: %w", err) } } - n.SetNetworkID(id) + if nad.Annotations[types.OvnNetworkTunnelKeysAnnotation] != "" { + tunnelKeys, err := ParseTunnelKeysAnnotation(nad.Annotations[types.OvnNetworkTunnelKeysAnnotation]) + if err != nil { + return nil, fmt.Errorf("failed to parse annotated tunnel keys: %w", err) + } + n.SetTunnelKeys(tunnelKeys) + } return n, nil } @@ -1333,8 +1396,15 @@ func ValidateNetConf(nadName string, netconf *ovncnitypes.NetConf) error { return fmt.Errorf("defaultGatewayIPs is only supported for layer2 topology") } + if netconf.TransitSubnet == "" && netconf.Role == types.NetworkRolePrimary && netconf.Topology == types.Layer2Topology { + klog.Warningf("transitSubnet is not specified for layer2 primary NAD %s, dynamic transit subnet will be used", netconf.Name) + if err := SetTransitSubnets(netconf); err != nil { + return fmt.Errorf("failed to set dynamic transit subnet for layer2 primary NAD %s: %v", netconf.Name, err) + } + } + if netconf.Topology != types.LocalnetTopology && netconf.Name != types.DefaultNetworkName { - if err := subnetOverlapCheck(netconf); err != nil { + if _, _, err := SubnetOverlapCheck(netconf); err != nil { return fmt.Errorf("invalid subnet configuration: %w", err) } } @@ -1342,10 +1412,11 @@ func ValidateNetConf(nadName string, netconf *ovncnitypes.NetConf) error { return nil } -// subnetOverlapCheck validates whether POD and join subnet mentioned in a net-attach-def with -// topology "layer2" and "layer3" does not overlap with ClusterSubnets, ServiceCIDRs, join subnet, -// and masquerade subnet. It also considers excluded subnets mentioned in a net-attach-def. -func subnetOverlapCheck(netconf *ovncnitypes.NetConf) error { +// SubnetOverlapCheck validates whether user-configured networks (e.g. POD and join subnet) mentioned in +// a net-attach-def with topology "layer2" and "layer3" overlaps with internal and reserved networks +// (e.g. ClusterSubnets, ServiceCIDRs, join subnet, etc.). +// It also considers excluded subnets mentioned in a net-attach-def. +func SubnetOverlapCheck(netconf *ovncnitypes.NetConf) (*net.IPNet, *net.IPNet, error) { allSubnets := config.NewConfigSubnets() for _, subnet := range config.Default.ClusterSubnets { allSubnets.Append(config.ConfigSubnetCluster, subnet.CIDR) @@ -1365,9 +1436,10 @@ func subnetOverlapCheck(netconf *ovncnitypes.NetConf) error { allSubnets.Append(config.ConfigSubnetMasquerade, v4MasqueradeCIDR) allSubnets.Append(config.ConfigSubnetMasquerade, v6MasqueradeCIDR) + // Layer3 network only uses pre-defined transit subnets if netconf.Topology == types.Layer3Topology { - _, v4TransitCIDR, _ := net.ParseCIDR(config.ClusterManager.V4TransitSwitchSubnet) - _, v6TransitCIDR, _ := net.ParseCIDR(config.ClusterManager.V6TransitSwitchSubnet) + _, v4TransitCIDR, _ := net.ParseCIDR(config.ClusterManager.V4TransitSubnet) + _, v6TransitCIDR, _ := net.ParseCIDR(config.ClusterManager.V6TransitSubnet) allSubnets.Append(config.ConfigSubnetTransit, v4TransitCIDR) allSubnets.Append(config.ConfigSubnetTransit, v6TransitCIDR) @@ -1375,15 +1447,18 @@ func subnetOverlapCheck(netconf *ovncnitypes.NetConf) error { ni, err := NewNetInfo(netconf) if err != nil { - return fmt.Errorf("error while parsing subnets: %v", err) + return nil, nil, fmt.Errorf("error while parsing subnets: %v", err) } for _, subnet := range ni.Subnets() { allSubnets.Append(config.UserDefinedSubnets, subnet.CIDR) } - for _, subnet := range ni.JoinSubnets() { allSubnets.Append(config.UserDefinedJoinSubnet, subnet) } + // dynamic transit subnets are only set for primary Layer2 UDNs for now + for _, subnet := range ni.TransitSubnets() { + allSubnets.Append(config.ConfigSubnetTransit, subnet) + } if ni.ExcludeSubnets() != nil { for i, configSubnet := range allSubnets.Subnets { if IsContainedInAnyCIDR(configSubnet.Subnet, ni.ExcludeSubnets()...) { @@ -1391,12 +1466,12 @@ func subnetOverlapCheck(netconf *ovncnitypes.NetConf) error { } } } - err = allSubnets.CheckForOverlaps() + subnet1, subnet2, err := allSubnets.CheckForOverlaps() if err != nil { - return fmt.Errorf("pod or join subnet overlaps with already configured internal subnets: %w", err) + return subnet1, subnet2, fmt.Errorf("pod or join subnet overlaps with already configured internal subnets: %w", err) } - return nil + return nil, nil, nil } // GetPodNADToNetworkMapping sees if the given pod needs to plumb over this given network specified by netconf, @@ -1830,3 +1905,89 @@ func getFirstAvailableIP(subnets []*net.IPNet, excludeIPs sets.Set[string]) net. } return nil } + +func ParseTunnelKeysAnnotation(annotation string) ([]int, error) { + tunnelKeys := []int{} + if err := json.Unmarshal([]byte(annotation), &tunnelKeys); err != nil { + return nil, fmt.Errorf("failed to parse annotated network tunnel keys: %w", err) + } + return tunnelKeys, nil +} + +func FormatTunnelKeysAnnotation(tunnelKeys []int) (string, error) { + annotationBytes, err := json.Marshal(tunnelKeys) + if err != nil { + return "", fmt.Errorf("failed to format tunnel keys annotation: %w", err) + } + return string(annotationBytes), nil +} + +// SetTransitSubnets generates transit subnet for primary layer2 UDNs and sets for a given netconf. +// It should be called with the final version of netconf to make sure that util.SubnetOverlapCheck(netconf) passes. +func SetTransitSubnets(netconf *ovncnitypes.NetConf) error { + transitSubnets := []string{} + for _, subnetStr := range strings.Split(netconf.Subnets, ",") { + _, subnet, err := net.ParseCIDR(subnetStr) + if err != nil { + return fmt.Errorf("can't generate transit subnets: failed to parse CIDR %q: %w", subnetStr, err) + } + transitSubnet, err := getTransitSubnet(netconf, knet.IsIPv4CIDR(subnet)) + if err != nil { + return err + } + transitSubnets = append(transitSubnets, transitSubnet) + } + netconf.TransitSubnet = strings.Join(transitSubnets, ",") + return nil +} + +func getTransitSubnet(netconf *ovncnitypes.NetConf, isIPv4 bool) (string, error) { + var transitSubnet *net.IPNet + var err error + if isIPv4 { + _, transitSubnet, err = net.ParseCIDR(config.ClusterManager.V4TransitSubnet) + } else { + _, transitSubnet, err = net.ParseCIDR(config.ClusterManager.V6TransitSubnet) + } + if err != nil { + return "", fmt.Errorf("can't generate transit subnets: failed to parse default transit subnet: %w", err) + } + // repeat until we find a non-overlapping subnet, + // but limit the number of iterations to avoid infinite loop + for i := 0; i < 10; i++ { + // only add current transit subnet to the netconf for overlap check (for requested ipFamily), + // final assignment should be done for all ipFamilies outside this function. + netconf.TransitSubnet = transitSubnet.String() + // check if there is subnet overlap + subnet1, subnet2, err := SubnetOverlapCheck(netconf) + if err == nil { + return transitSubnet.String(), nil + } + if subnet1 == nil || subnet2 == nil || subnet1.String() != transitSubnet.String() && subnet2.String() != transitSubnet.String() { + // there is another problem with the config + // or overlap is not with transit subnet + return "", err + } + transitSubnet = getFirstNonOverlappingSubnet(subnet1, subnet2, transitSubnet.Mask) + } + // if the previous loop didn't return the result, we failed to find a non-overlapping subnet + return "", fmt.Errorf("can't generate transit subnets: failed to find non-overlapping transit subnet: %w", err) +} + +// getFirstNonOverlappingSubnet finds the first subnet with the same netmask as netMask that does not overlap with either subnet1 or subnet2. +// It expects that subnet1 and subnet2 overlap with each other, and that netmask is one of the two subnet's netmask. +// If these conditions are not met, the result won't be correct. +func getFirstNonOverlappingSubnet(subnet1, subnet2 *net.IPNet, netMask net.IPMask) *net.IPNet { + // find the bigger network, and get the first subnet outside of it with the same netmask as default transit subnet + subnet1MaskSize, _ := subnet1.Mask.Size() + subnet2MaskSize, _ := subnet2.Mask.Size() + // bigger mask size means smaller network + baseSubnet := subnet1 + if subnet2MaskSize < subnet1MaskSize { + baseSubnet = subnet2 + } + // now find the first subnet outside the baseSubnet with the same mask + baseSubnetLastIP := GetLastIPOfSubnet(baseSubnet, 0) + nextIP := iputils.NextIP(baseSubnetLastIP.IP) + return &net.IPNet{IP: nextIP, Mask: netMask} +} diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 5fee0f9c42..67947de39d 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -405,13 +405,14 @@ func TestParseNetconf(t *testing.T) { } `, expectedNetConf: &ovncnitypes.NetConf{ - Topology: "layer2", - NADName: "ns1/nad1", - MTU: 1400, - Role: "primary", - Subnets: "192.168.200.0/16", - NetConf: cnitypes.NetConf{Name: "tenant-red", Type: "ovn-k8s-cni-overlay"}, - JoinSubnet: "100.66.0.0/16,fd99::/64", + Topology: "layer2", + NADName: "ns1/nad1", + MTU: 1400, + Role: "primary", + Subnets: "192.168.200.0/16", + TransitSubnet: config.ClusterManager.V4TransitSubnet, + NetConf: cnitypes.NetConf{Name: "tenant-red", Type: "ovn-k8s-cni-overlay"}, + JoinSubnet: "100.66.0.0/16,fd99::/64", }, }, { @@ -1364,8 +1365,8 @@ func TestSubnetOverlapCheck(t *testing.T) { config.Gateway.V6MasqueradeSubnet = "fd69::/125" config.Gateway.V4JoinSubnet = "100.64.0.0/16" config.Gateway.V6JoinSubnet = "fd98::/64" - config.ClusterManager.V4TransitSwitchSubnet = "100.88.0.0/16" - config.ClusterManager.V6TransitSwitchSubnet = "fd97::/64" + config.ClusterManager.V4TransitSubnet = "100.88.0.0/16" + config.ClusterManager.V6TransitSubnet = "fd97::/64" type testConfig struct { desc string inputNetAttachDefConfigSpec string @@ -1388,7 +1389,7 @@ func TestSubnetOverlapCheck(t *testing.T) { `, expectedError: config.NewSubnetOverlapError( config.ConfigSubnet{SubnetType: config.UserDefinedSubnets, Subnet: MustParseCIDR("100.88.0.0/17")}, - config.ConfigSubnet{SubnetType: config.ConfigSubnetTransit, Subnet: MustParseCIDR(config.ClusterManager.V4TransitSwitchSubnet)}), + config.ConfigSubnet{SubnetType: config.ConfigSubnetTransit, Subnet: MustParseCIDR(config.ClusterManager.V4TransitSubnet)}), }, { desc: "return error when IPv4 POD subnet in net-attach-def overlaps other subnets", diff --git a/go-controller/pkg/util/ndp/ra.go b/go-controller/pkg/util/ndp/ra.go index 425d096335..53ac6a20c0 100644 --- a/go-controller/pkg/util/ndp/ra.go +++ b/go-controller/pkg/util/ndp/ra.go @@ -1,6 +1,7 @@ package ndp import ( + "encoding/binary" "fmt" "net" "syscall" @@ -11,11 +12,21 @@ import ( "golang.org/x/sys/unix" ) +// PrefixInformation represents a Prefix Information Option for Router Advertisements +type PrefixInformation struct { + Prefix net.IPNet + ValidLifetime uint32 + PreferredLifetime uint32 + OnLink bool + Autonomous bool +} + // RouterAdvertisement with mac, ips and lifetime field to send type RouterAdvertisement struct { SourceMAC, DestinationMAC net.HardwareAddr SourceIP, DestinationIP net.IP Lifetime uint16 + PrefixInfos []PrefixInformation } // SendRouterAdvertisements sends one or more Router Advertisements (RAs) on the specified network interface. @@ -45,6 +56,21 @@ func SendRouterAdvertisements(interfaceName string, ras ...RouterAdvertisement) } defer c.Close() + serializedRAs, err := generateRouterAdvertisements(ras...) + if err != nil { + return fmt.Errorf("failed to generate Router Advertisements: %w", err) + } + + // Send each serialized Router Advertisement using the raw socket. + for _, serializedRA := range serializedRAs { + if err := c.Sendto(serializedRA, &unix.SockaddrLinklayer{Ifindex: iface.Index}, 0); err != nil { + return err + } + } + return nil +} + +func generateRouterAdvertisements(ras ...RouterAdvertisement) ([][]byte, error) { serializedRAs := [][]byte{} for _, ra := range ras { serializeBuffer := gopacket.NewSerializeBuffer() @@ -70,7 +96,7 @@ func SendRouterAdvertisements(interfaceName string, ras ...RouterAdvertisement) TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeRouterAdvertisement, 0), } if err := icmp6Layer.SetNetworkLayerForChecksum(&ip6Layer); err != nil { - return err + return nil, err } // https://datatracker.ietf.org/doc/html/rfc4861#section-4.2 @@ -91,16 +117,27 @@ func SendRouterAdvertisements(interfaceName string, ras ...RouterAdvertisement) } // Create the ICMPv6 Router Advertisement layer. + options := layers.ICMPv6Options{{ + Type: layers.ICMPv6OptSourceAddress, + Data: ra.SourceMAC, + }} + + // Add Prefix Information Options if specified + for _, prefixInfo := range ra.PrefixInfos { + prefixData := createPrefixInfoData(&prefixInfo) + options = append(options, layers.ICMPv6Option{ + Type: layers.ICMPv6OptPrefixInfo, + Data: prefixData, + }) + } + raLayer := layers.ICMPv6RouterAdvertisement{ HopLimit: 255, Flags: managedAddressFlag | defaultRoutePreferenceFlag, RouterLifetime: ra.Lifetime, ReachableTime: 0, RetransTimer: 0, - Options: layers.ICMPv6Options{{ - Type: layers.ICMPv6OptSourceAddress, - Data: ra.SourceMAC, - }}, + Options: options, } // Serialize the layers into a byte slice. @@ -110,16 +147,75 @@ func SendRouterAdvertisements(interfaceName string, ras ...RouterAdvertisement) &icmp6Layer, &raLayer, ); err != nil { - return err + return nil, err } serializedRAs = append(serializedRAs, serializeBuffer.Bytes()) } + return serializedRAs, nil +} - // Send each serialized Router Advertisement using the raw socket. - for _, serializedRA := range serializedRAs { - if err := c.Sendto(serializedRA, &unix.SockaddrLinklayer{Ifindex: iface.Index}, 0); err != nil { - return err - } +// createPrefixInfoData creates the data payload for a Prefix Information Option +// according to RFC 4861 Section 4.6.2 +func createPrefixInfoData(prefixInfo *PrefixInformation) []byte { + data := make([]byte, 30) + + // Prefix Length (8 bits) + prefixLen, _ := prefixInfo.Prefix.Mask.Size() + data[0] = uint8(prefixLen) + + // Flags (8 bits) + var flags uint8 + if prefixInfo.OnLink { + flags |= 0x80 // L flag } - return nil + if prefixInfo.Autonomous { + flags |= 0x40 // A flag + } + data[1] = flags + + // Valid Lifetime (32 bits) + binary.BigEndian.PutUint32(data[2:6], prefixInfo.ValidLifetime) + + // Preferred Lifetime (32 bits) + binary.BigEndian.PutUint32(data[6:10], prefixInfo.PreferredLifetime) + + // Reserved (32 bits) - already zero from make + + // Prefix (128 bits) + copy(data[14:], prefixInfo.Prefix.IP.To16()) + + return data +} + +// createPrefixInfoData creates the data payload for a Prefix Information Option +// according to RFC 4861 Section 4.6.2 +func createPrefixInfoData(prefixInfo *PrefixInformation) []byte { + data := make([]byte, 30) + + // Prefix Length (8 bits) + prefixLen, _ := prefixInfo.Prefix.Mask.Size() + data[0] = uint8(prefixLen) + + // Flags (8 bits) + var flags uint8 + if prefixInfo.OnLink { + flags |= 0x80 // L flag + } + if prefixInfo.Autonomous { + flags |= 0x40 // A flag + } + data[1] = flags + + // Valid Lifetime (32 bits) + binary.BigEndian.PutUint32(data[2:6], prefixInfo.ValidLifetime) + + // Preferred Lifetime (32 bits) + binary.BigEndian.PutUint32(data[6:10], prefixInfo.PreferredLifetime) + + // Reserved (32 bits) - already zero from make + + // Prefix (128 bits) + copy(data[14:], prefixInfo.Prefix.IP.To16()) + + return data } diff --git a/go-controller/pkg/util/ndp/ra_test.go b/go-controller/pkg/util/ndp/ra_test.go new file mode 100644 index 0000000000..20931b28ba --- /dev/null +++ b/go-controller/pkg/util/ndp/ra_test.go @@ -0,0 +1,420 @@ +package ndp + +import ( + "encoding/binary" + "net" + "testing" + + "github.com/google/gopacket" + "github.com/google/gopacket/layers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCreatePrefixInfoData(t *testing.T) { + tests := []struct { + name string + prefix string + expected struct { + prefixLen uint8 + flags uint8 + validLifetime uint32 + preferredLifetime uint32 + } + prefixInfo PrefixInformation + }{ + { + name: "fd99::/64 with both flags set", + prefix: "fd99::/64", + expected: struct { + prefixLen uint8 + flags uint8 + validLifetime uint32 + preferredLifetime uint32 + }{ + prefixLen: 64, + flags: 0xC0, // L=1, A=1 (0x80 | 0x40) + validLifetime: 65535, + preferredLifetime: 0, + }, + prefixInfo: PrefixInformation{ + ValidLifetime: 65535, + PreferredLifetime: 0, + OnLink: true, + Autonomous: true, + }, + }, + { + name: "2001:db8::/32 with only OnLink flag", + prefix: "2001:db8::/32", + expected: struct { + prefixLen uint8 + flags uint8 + validLifetime uint32 + preferredLifetime uint32 + }{ + prefixLen: 32, + flags: 0x80, // L=1, A=0 + validLifetime: 3600, + preferredLifetime: 1800, + }, + prefixInfo: PrefixInformation{ + ValidLifetime: 3600, + PreferredLifetime: 1800, + OnLink: true, + Autonomous: false, + }, + }, + { + name: "::1/128 with no flags", + prefix: "::1/128", + expected: struct { + prefixLen uint8 + flags uint8 + validLifetime uint32 + preferredLifetime uint32 + }{ + prefixLen: 128, + flags: 0x00, // L=0, A=0 + validLifetime: 0, + preferredLifetime: 0, + }, + prefixInfo: PrefixInformation{ + ValidLifetime: 0, + PreferredLifetime: 0, + OnLink: false, + Autonomous: false, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, prefixNet, err := net.ParseCIDR(tt.prefix) + require.NoError(t, err) + + tt.prefixInfo.Prefix = *prefixNet + data := createPrefixInfoData(&tt.prefixInfo) + + // Verify the data length (should be 30 bytes) + assert.Len(t, data, 30, "PrefixInfo data should be 30 bytes") + + // Verify prefix length + assert.Equal(t, tt.expected.prefixLen, data[0], "Prefix length mismatch") + + // Verify flags + assert.Equal(t, tt.expected.flags, data[1], "Flags mismatch") + + // Verify valid lifetime + actualValidLifetime := binary.BigEndian.Uint32(data[2:6]) + assert.Equal(t, tt.expected.validLifetime, actualValidLifetime, "Valid lifetime mismatch") + + // Verify preferred lifetime + actualPreferredLifetime := binary.BigEndian.Uint32(data[6:10]) + assert.Equal(t, tt.expected.preferredLifetime, actualPreferredLifetime, "Preferred lifetime mismatch") + + // Verify reserved field is zero + reserved := binary.BigEndian.Uint32(data[10:14]) + assert.Equal(t, uint32(0), reserved, "Reserved field should be zero") + + // Verify prefix IP + expectedPrefix := prefixNet.IP.To16() + actualPrefix := data[14:30] + assert.Equal(t, []byte(expectedPrefix), actualPrefix, "Prefix IP mismatch") + }) + } +} + +func TestRouterAdvertisementSerialization(t *testing.T) { + sourceMAC := net.HardwareAddr{0x00, 0x11, 0x22, 0x33, 0x44, 0x55} + destinationMAC := net.HardwareAddr{0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff} + sourceIP := net.ParseIP("fe80::211:22ff:fe33:4455") + destinationIP := net.ParseIP("fe80::aabb:ccff:fedd:eeff") + + // Create test prefix information + _, prefix1, err := net.ParseCIDR("fd99::/64") + require.NoError(t, err) + + prefixInfos := []PrefixInformation{ + { + Prefix: *prefix1, + ValidLifetime: 65535, + PreferredLifetime: 0, + OnLink: true, + Autonomous: true, + }, + } + + ra := RouterAdvertisement{ + SourceMAC: sourceMAC, + SourceIP: sourceIP, + DestinationMAC: destinationMAC, + DestinationIP: destinationIP, + Lifetime: 65535, + PrefixInfos: prefixInfos, + } + +<<<<<<< HEAD + // Test serialization logic (without actually sending) + serializeBuffer := gopacket.NewSerializeBuffer() + + // Create the Ethernet layer + ethernetLayer := layers.Ethernet{ + DstMAC: ra.DestinationMAC, + SrcMAC: ra.SourceMAC, + EthernetType: layers.EthernetTypeIPv6, + } + + // Create the IPv6 layer + ip6Layer := layers.IPv6{ + Version: 6, + NextHeader: layers.IPProtocolICMPv6, + HopLimit: 255, + SrcIP: ra.SourceIP, + DstIP: ra.DestinationIP, + } + + // Create the ICMPv6 layer + icmp6Layer := layers.ICMPv6{ + TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeRouterAdvertisement, 0), + } + err = icmp6Layer.SetNetworkLayerForChecksum(&ip6Layer) + require.NoError(t, err) + + // Create options including PrefixInfos + options := layers.ICMPv6Options{{ + Type: layers.ICMPv6OptSourceAddress, + Data: ra.SourceMAC, + }} + + for _, prefixInfo := range ra.PrefixInfos { + prefixData := createPrefixInfoData(&prefixInfo) + options = append(options, layers.ICMPv6Option{ + Type: layers.ICMPv6OptPrefixInfo, + Data: prefixData, + }) + } + + // Flags + managedAddressFlag := uint8(0x80) + defaultRoutePreferenceFlag := uint8(0x08) + if ra.Lifetime == 0 { + defaultRoutePreferenceFlag = uint8(0x00) + } + + raLayer := layers.ICMPv6RouterAdvertisement{ + HopLimit: 255, + Flags: managedAddressFlag | defaultRoutePreferenceFlag, + RouterLifetime: ra.Lifetime, + ReachableTime: 0, + RetransTimer: 0, + Options: options, + } + + // Serialize the layers + err = gopacket.SerializeLayers(serializeBuffer, gopacket.SerializeOptions{ComputeChecksums: true, FixLengths: true}, + ðernetLayer, + &ip6Layer, + &icmp6Layer, + &raLayer, + ) + require.NoError(t, err) + + serializedData := serializeBuffer.Bytes() + assert.NotEmpty(t, serializedData, "Serialized data should not be empty") + + // Parse the serialized data to verify structure + packet := gopacket.NewPacket(serializedData, layers.LayerTypeEthernet, gopacket.Default) +======= + serializedData, err := generateRouterAdvertisements(ra) + require.NoError(t, err) + + // Parse the serialized data to verify structure + packet := gopacket.NewPacket(serializedData[0], layers.LayerTypeEthernet, gopacket.Default) +>>>>>>> c94039ca3 (kv: Add join subnet ipv6 pio with lifetime 0) + + // Verify Ethernet layer + ethLayer := packet.Layer(layers.LayerTypeEthernet) + require.NotNil(t, ethLayer) + eth := ethLayer.(*layers.Ethernet) + assert.Equal(t, destinationMAC, eth.DstMAC) + assert.Equal(t, sourceMAC, eth.SrcMAC) + assert.Equal(t, layers.EthernetTypeIPv6, eth.EthernetType) + + // Verify IPv6 layer + ipv6Layer := packet.Layer(layers.LayerTypeIPv6) + require.NotNil(t, ipv6Layer) + ipv6 := ipv6Layer.(*layers.IPv6) + assert.Equal(t, sourceIP, ipv6.SrcIP) + assert.Equal(t, destinationIP, ipv6.DstIP) + assert.Equal(t, layers.IPProtocolICMPv6, ipv6.NextHeader) + + // Verify ICMPv6 layer + icmpv6Layer := packet.Layer(layers.LayerTypeICMPv6) + require.NotNil(t, icmpv6Layer) + icmpv6 := icmpv6Layer.(*layers.ICMPv6) + assert.Equal(t, uint8(layers.ICMPv6TypeRouterAdvertisement), uint8(icmpv6.TypeCode.Type())) + + // Verify Router Advertisement layer + raLayerParsed := packet.Layer(layers.LayerTypeICMPv6RouterAdvertisement) + require.NotNil(t, raLayerParsed) + raParsed := raLayerParsed.(*layers.ICMPv6RouterAdvertisement) + assert.Equal(t, uint16(65535), raParsed.RouterLifetime) + + // Verify we have the expected options (source address + prefix info) + assert.Len(t, raParsed.Options, 2, "Should have 2 options: source address and prefix info") + + // Check for source address option + foundSourceOpt := false + foundPrefixOpt := false + for _, opt := range raParsed.Options { + if opt.Type == layers.ICMPv6OptSourceAddress { + foundSourceOpt = true + assert.Equal(t, sourceMAC, net.HardwareAddr(opt.Data)) + } + if opt.Type == layers.ICMPv6OptPrefixInfo { + foundPrefixOpt = true + assert.Len(t, opt.Data, 30, "Prefix info data should be 30 bytes") + // Verify prefix length + assert.Equal(t, uint8(64), opt.Data[0]) + // Verify flags (OnLink=1, Autonomous=1) + assert.Equal(t, uint8(0xC0), opt.Data[1]) + } + } + assert.True(t, foundSourceOpt, "Should have source address option") + assert.True(t, foundPrefixOpt, "Should have prefix info option") +} + +func TestMultiplePrefixInfosSerialization(t *testing.T) { + sourceMAC := net.HardwareAddr{0x00, 0x11, 0x22, 0x33, 0x44, 0x55} + destinationMAC := net.HardwareAddr{0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff} + sourceIP := net.ParseIP("fe80::211:22ff:fe33:4455") + destinationIP := net.ParseIP("fe80::aabb:ccff:fedd:eeff") + + // Create multiple prefix information entries + _, prefix1, err := net.ParseCIDR("fd99::/64") + require.NoError(t, err) + _, prefix2, err := net.ParseCIDR("2001:db8::/32") + require.NoError(t, err) + _, prefix3, err := net.ParseCIDR("fc00::/7") + require.NoError(t, err) + + prefixInfos := []PrefixInformation{ + { + Prefix: *prefix1, + ValidLifetime: 65535, + PreferredLifetime: 0, + OnLink: true, + Autonomous: true, + }, + { + Prefix: *prefix2, + ValidLifetime: 3600, + PreferredLifetime: 1800, + OnLink: true, + Autonomous: false, + }, + { + Prefix: *prefix3, + ValidLifetime: 7200, + PreferredLifetime: 3600, + OnLink: false, + Autonomous: true, + }, + } + + ra := RouterAdvertisement{ + SourceMAC: sourceMAC, + SourceIP: sourceIP, + DestinationMAC: destinationMAC, + DestinationIP: destinationIP, + Lifetime: 65535, + PrefixInfos: prefixInfos, + } + +<<<<<<< HEAD + // Test serialization with multiple PIOs + serializeBuffer := gopacket.NewSerializeBuffer() + + ethernetLayer := layers.Ethernet{ + DstMAC: ra.DestinationMAC, + SrcMAC: ra.SourceMAC, + EthernetType: layers.EthernetTypeIPv6, + } + + ip6Layer := layers.IPv6{ + Version: 6, + NextHeader: layers.IPProtocolICMPv6, + HopLimit: 255, + SrcIP: ra.SourceIP, + DstIP: ra.DestinationIP, + } + + icmp6Layer := layers.ICMPv6{ + TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeRouterAdvertisement, 0), + } + err = icmp6Layer.SetNetworkLayerForChecksum(&ip6Layer) + require.NoError(t, err) + + // Create options including all PrefixInfos + options := layers.ICMPv6Options{{ + Type: layers.ICMPv6OptSourceAddress, + Data: ra.SourceMAC, + }} + + for _, prefixInfo := range ra.PrefixInfos { + prefixData := createPrefixInfoData(&prefixInfo) + options = append(options, layers.ICMPv6Option{ + Type: layers.ICMPv6OptPrefixInfo, + Data: prefixData, + }) + } + + managedAddressFlag := uint8(0x80) + defaultRoutePreferenceFlag := uint8(0x08) + + raLayer := layers.ICMPv6RouterAdvertisement{ + HopLimit: 255, + Flags: managedAddressFlag | defaultRoutePreferenceFlag, + RouterLifetime: ra.Lifetime, + ReachableTime: 0, + RetransTimer: 0, + Options: options, + } + + err = gopacket.SerializeLayers(serializeBuffer, gopacket.SerializeOptions{ComputeChecksums: true, FixLengths: true}, + ðernetLayer, + &ip6Layer, + &icmp6Layer, + &raLayer, + ) + require.NoError(t, err) + + serializedData := serializeBuffer.Bytes() + assert.NotEmpty(t, serializedData) + + // Parse and verify + packet := gopacket.NewPacket(serializedData, layers.LayerTypeEthernet, gopacket.Default) +======= + serializedData, err := generateRouterAdvertisements(ra) + require.NoError(t, err) + + // Parse and verify + packet := gopacket.NewPacket(serializedData[0], layers.LayerTypeEthernet, gopacket.Default) +>>>>>>> c94039ca3 (kv: Add join subnet ipv6 pio with lifetime 0) + raLayerParsed := packet.Layer(layers.LayerTypeICMPv6RouterAdvertisement) + require.NotNil(t, raLayerParsed) + raParsed := raLayerParsed.(*layers.ICMPv6RouterAdvertisement) + + // Should have 1 source address option + 3 prefix info options + assert.Len(t, raParsed.Options, 4, "Should have 4 options: 1 source address + 3 prefix infos") + + prefixOptCount := 0 + for _, opt := range raParsed.Options { + if opt.Type == layers.ICMPv6OptPrefixInfo { + prefixOptCount++ + assert.Len(t, opt.Data, 30, "Each prefix info should be 30 bytes") + } + } + assert.Equal(t, 3, prefixOptCount, "Should have exactly 3 prefix info options") +} diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 94f7610cd0..6428940418 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -5,6 +5,7 @@ import ( "crypto/sha256" "errors" "fmt" + "math/big" "net" "strconv" "strings" @@ -354,6 +355,22 @@ func IPNetsIPToStringSlice(ips []*net.IPNet) []string { return ipAddrs } +func IPNetsToStringSlice(ips []*net.IPNet) []string { + ipNets := make([]string, 0) + for _, ip := range ips { + ipNets = append(ipNets, ip.String()) + } + return ipNets +} + +func IPNetsToIPs(ipNets []*net.IPNet) []net.IP { + ips := make([]net.IP, 0) + for _, ipNet := range ipNets { + ips = append(ips, ipNet.IP) + } + return ips +} + // CalculateRouteTableID will calculate route table ID based on the network // interface index func CalculateRouteTableID(ifIndex int) int { @@ -431,3 +448,18 @@ func ParseIPList(ipsStr string) ([]net.IP, error) { } return ips, nil } + +// GetLastIPOfSubnet returns the `indexFromLast`th IP address of a given subnet. +// For example, if indexFromLast is 0 and subnet is 10.0.0.0/24, it returns +// 10.0.0.255/24. +func GetLastIPOfSubnet(subnet *net.IPNet, indexFromLast int) *net.IPNet { + mask, total := subnet.Mask.Size() + base := big.NewInt(1) + totalIPs := new(big.Int).Lsh(base, uint(total-mask)) + lastIPIndex := totalIPs.Sub(totalIPs, big.NewInt(int64(indexFromLast+1))) + // this is copied from utilnet.AddIPOffset but to allow big.Int offset + r := big.NewInt(0).Add(utilnet.BigForIP(subnet.IP), lastIPIndex).Bytes() + r = append(make([]byte, 16), r...) + lastIP := net.IP(r[len(r)-16:]) + return &net.IPNet{IP: lastIP, Mask: subnet.Mask} +} diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 5e77a26acf..7bede0987f 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -155,6 +155,8 @@ const ( // }", ovnUDNLayer2NodeGRLRPTunnelIDs = "k8s.ovn.org/udn-layer2-node-gateway-router-lrp-tunnel-ids" + Layer2TopologyVersion = "k8s.ovn.org/layer2-topology-version" + // ovnNodeEncapIPs is used to indicate encap IPs set on the node OVNNodeEncapIPs = "k8s.ovn.org/node-encap-ips" @@ -510,6 +512,10 @@ func UpdateUDNLayer2NodeGRLRPTunnelIDs(annotations map[string]string, netName st return annotations, nil } +func UDNLayer2NodeUsesTransitRouter(node *corev1.Node) bool { + return node.Annotations[Layer2TopologyVersion] == "2.0" +} + // PrimaryIfAddrAnnotation represents IPv4 and/or IPv6 addresses stored in node annotations. // It is used for JSON marshalling/unmarshalling of node interface address information, // including primary interface addresses and other node IP configurations. diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index 48b9fd36f5..da8b42e48f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -159,10 +159,10 @@ spec: value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} - name: OVN_NETWORK_QOS_ENABLE value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - - name: OVN_V4_TRANSIT_SWITCH_SUBNET - value: {{ default "" .Values.global.v4TransitSwitchSubnet | quote }} - - name: OVN_V6_TRANSIT_SWITCH_SUBNET - value: {{ default "" .Values.global.v6TransitSwitchSubnet | quote }} + - name: OVN_V4_TRANSIT_SUBNET + value: {{ default "" .Values.global.v4TransitSubnet | quote }} + - name: OVN_V6_TRANSIT_SUBNET + value: {{ default "" .Values.global.v6TransitSubnet | quote }} - name: OVN_ENABLE_PERSISTENT_IPS value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index 8056461256..59b97df8fd 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -40,14 +40,14 @@ global: v4JoinSubnet: "100.64.0.0/16" # -- The v4 masquerade subnet used for assigning masquerade IPv4 addresses v4MasqueradeSubnet: "169.254.0.0/17" - # -- The v4 subnet for transit switch - v4TransitSwitchSubnet: "100.88.0.0/16" + # -- The v4 subnet for transit switches and routers + v4TransitSubnet: "100.88.0.0/16" # -- The v6 join subnet used for assigning join switch IPv6 addresses v6JoinSubnet: "fd98::/64" # -- The v6 masquerade subnet used for assigning masquerade IPv6 addresses v6MasqueradeSubnet: "fd69::/112" - # -- The v6 subnet for transit switch - v6TransitSwitchSubnet: "fd97::/64" + # -- The v6 subnet for transit switches and routers + v6TransitSubnet: "fd97::/64" # -- Whether or not enable ovnkube identity webhook enableOvnKubeIdentity: false # -- Indicate if ovnkube run master and node in one process diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index 516b77220b..50cdfe37f3 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -40,14 +40,14 @@ global: v4JoinSubnet: "100.64.0.0/16" # -- The v4 masquerade subnet used for assigning masquerade IPv4 addresses v4MasqueradeSubnet: "169.254.0.0/17" - # -- The v4 subnet for transit switch - v4TransitSwitchSubnet: "100.88.0.0/16" + # -- The v4 subnet for transit switches and routers + v4TransitSubnet: "100.88.0.0/16" # -- The v6 join subnet used for assigning join switch IPv6 addresses v6JoinSubnet: "fd98::/64" # -- The v6 masquerade subnet used for assigning masquerade IPv6 addresses v6MasqueradeSubnet: "fd69::/112" - # -- The v6 subnet for transit switch - v6TransitSwitchSubnet: "fd97::/64" + # -- The v6 subnet for transit switches and routers + v6TransitSubnet: "fd97::/64" # -- Whether or not enable ovnkube identity webhook enableOvnKubeIdentity: true # -- Indicate if ovnkube run master and node in one process diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index 2dfaf5e2e2..553421b119 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -1332,13 +1332,13 @@ fi }, 30*time.Second, time.Second).Should(Equal("Accepted")) } - getJoinIPs = func(cudn *udnv1.ClusterUserDefinedNetwork) []string { + getCUDNSubnets = func(cudn *udnv1.ClusterUserDefinedNetwork) []string { nad, err := nadClient.NetworkAttachmentDefinitions(namespace).Get(context.TODO(), cudn.Name, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred()) var result map[string]interface{} err = json.Unmarshal([]byte(nad.Spec.Config), &result) Expect(err).NotTo(HaveOccurred()) - return strings.Split(result["joinSubnet"].(string), ",") + return strings.Split(result["subnets"].(string), ",") } ) BeforeEach(func() { @@ -1893,10 +1893,7 @@ ip route add %[3]s via %[4]s if isIPv6Supported(fr.ClientSet) && isInterconnectEnabled() { step = by(vmi.Name, fmt.Sprintf("Checking IPv6 gateway before %s %s", td.resource.description, td.test.description)) - nodeRunningVMI, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.NodeName, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), step) - - expectedIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(nodeRunningVMI, getJoinIPs(cudn)) + expectedIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(getCUDNSubnets(cudn)) Expect(err).NotTo(HaveOccurred()) Eventually(kubevirt.RetrieveIPv6Gateways). WithArguments(virtClient, vmi). @@ -1964,10 +1961,7 @@ ip route add %[3]s via %[4]s step = by(vmi.Name, fmt.Sprintf("Checking IPv4 gateway cached mac after %s %s", td.resource.description, td.test.description)) Expect(crClient.Get(context.TODO(), crclient.ObjectKeyFromObject(vmi), vmi)).To(Succeed()) - targetNode, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.MigrationState.TargetNode, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), step) - - expectedGatewayMAC, err := kubevirt.GenerateGatewayMAC(targetNode, getJoinIPs(cudn)) + expectedGatewayMAC, err := kubevirt.GenerateGatewayMAC(getCUDNSubnets(cudn)) Expect(err).NotTo(HaveOccurred(), step) Expect(err).NotTo(HaveOccurred(), step) @@ -1980,10 +1974,7 @@ ip route add %[3]s via %[4]s if isIPv6Supported(fr.ClientSet) { step = by(vmi.Name, fmt.Sprintf("Checking IPv6 gateway after %s %s", td.resource.description, td.test.description)) - targetNode, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.MigrationState.TargetNode, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), step) - - targetNodeIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(targetNode, getJoinIPs(cudn)) + targetNodeIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(getCUDNSubnets(cudn)) Expect(err).NotTo(HaveOccurred()) Eventually(kubevirt.RetrieveIPv6Gateways). WithArguments(virtClient, vmi). diff --git a/test/e2e/kubevirt/net.go b/test/e2e/kubevirt/net.go index c89f146f91..f28275a25d 100644 --- a/test/e2e/kubevirt/net.go +++ b/test/e2e/kubevirt/net.go @@ -9,8 +9,6 @@ import ( iputils "github.com/containernetworking/plugins/pkg/ip" - corev1 "k8s.io/api/core/v1" - kubevirtv1 "kubevirt.io/api/core/v1" v1 "kubevirt.io/api/core/v1" @@ -67,61 +65,51 @@ func RetrieveIPv6Gateways(cli *Client, vmi *v1.VirtualMachineInstance) ([]string return paths, nil } -func GenerateGatewayMAC(node *corev1.Node, joinSubnets []string) (string, error) { +func GenerateGatewayMAC(subnets []string) (string, error) { config.IPv4Mode = true - lrpJoinAddress, err := GetDefaultUDNGWRouterIPs(node, joinSubnets) + defaultGWIPs, err := GetLayer2UDNDefaultGWIPs(subnets) if err != nil { return "", err } - if len(lrpJoinAddress) == 0 { - return "", fmt.Errorf("missing lrp join ip at node %q", node.Name) + if len(defaultGWIPs) == 0 { + return "", fmt.Errorf("can't find default GW IP for subnets %v", subnets) } - return util.IPAddrToHWAddr(*lrpJoinAddress[0]).String(), nil + return util.IPAddrToHWAddr(*defaultGWIPs[0]).String(), nil } -func GenerateGatewayIPv6RouterLLA(node *corev1.Node, joinSubnets []string) (string, error) { +func GenerateGatewayIPv6RouterLLA(subnets []string) (string, error) { config.IPv4Mode = true - joinAddresses, err := GetDefaultUDNGWRouterIPs(node, joinSubnets) + defaultGWIPs, err := GetLayer2UDNDefaultGWIPs(subnets) if err != nil { return "", err } - if len(joinAddresses) == 0 { - return "", fmt.Errorf("missing join addresses at node %q", node.Name) + if len(defaultGWIPs) == 0 { + return "", fmt.Errorf("can't find default GW IP for subnets %v", subnets) } - return util.HWAddrToIPv6LLA(util.IPAddrToHWAddr(*joinAddresses[0])).String(), nil + return util.HWAddrToIPv6LLA(util.IPAddrToHWAddr(*defaultGWIPs[0])).String(), nil } -func GetDefaultUDNGWRouterIPs(node *corev1.Node, joinSubnets []string) ([]*net.IP, error) { - nodeID, err := util.GetNodeID(node) - if err != nil { - // Don't consider this node as cluster-manager has not allocated node id yet. - return nil, err - } +// GetLayer2UDNDefaultGWIPs returns the default gateway IPs (.1) for a Layer2 UDN subnet +func GetLayer2UDNDefaultGWIPs(subnets []string) ([]*net.IP, error) { var udnJoinNetv4, udnJoinNetv6 net.IP - for _, subnet := range joinSubnets { + for _, subnet := range subnets { ip, _, err := net.ParseCIDR(subnet) if err != nil { return nil, fmt.Errorf("failed to parse CIDR %q: %v", subnet, err) } if ip.To4() != nil { - udnJoinNetv4 = ip + udnJoinNetv4 = iputils.NextIP(ip) } else { - udnJoinNetv6 = ip + udnJoinNetv6 = iputils.NextIP(ip) } } res := []*net.IP{} if config.IPv4Mode { - for range nodeID { - udnJoinNetv4 = iputils.NextIP(udnJoinNetv4) - } res = append(res, &udnJoinNetv4) } if config.IPv6Mode { - for range nodeID { - udnJoinNetv6 = iputils.NextIP(udnJoinNetv6) - } res = append(res, &udnJoinNetv6) } return res, nil diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index 95dfc5e7c3..e5ac64155b 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -1109,7 +1109,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" }), ginkgo.Entry("UDN pod to the same node nodeport service in different UDN network should not work", // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 only) networks. + // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> @@ -1118,8 +1118,6 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" // On the GR we DNAT to backend pod and SNAT to joinIP. // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply // fails as it doesn't know how to reach this masqueradeIP. - // There is also inconsistency in behaviour within Layer2 networks for how IPv4 works and how IPv6 works where the traffic - // works on ipv6 because of the flows described below. func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { clientPod := podsNetA[0] node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) @@ -1130,23 +1128,8 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" nodeIP = nodeIPv6 } nodePort := svcNetB.Spec.Ports[0].NodePort - out := curlConnectionTimeoutCode - errBool := true - if ipFamily == utilnet.IPv6 && cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { - // For Layer2 networks, we have these flows we add on breth0: - // cookie=0xdeff105, duration=173.245s, table=1, n_packets=0, n_bytes=0, idle_age=173, priority=14,icmp6,icmp_type=134 actions=FLOOD - // cookie=0xdeff105, duration=173.245s, table=1, n_packets=8, n_bytes=640, idle_age=4, priority=14,icmp6,icmp_type=136 actions=FLOOD - // which floods the Router Advertisement (RA, type 134) and Neighbor Advertisement (NA, type 136) - // Given on Layer2 the GR has the SNATs for both masqueradeIPs this works perfectly well and - // the networks are able to NDP for the masqueradeIPs for the other networks. - // This doesn't work on Layer3 networks since masqueradeIP SNATs are present on the ovn-cluster-router in that case. - // See the tcpdump on the issue: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 for more details. - out = "" - errBool = false - } - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", out, errBool + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true }), ginkgo.Entry("UDN pod to a different node nodeport service in different UDN network should work", func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) {