Skip to content

Commit d3bb075

Browse files
authored
Merge pull request #5291 from tssurya/fix-bgp-host-to-pod-isolation
[UDN,BGP] Fix the host drop rules to match on new state
2 parents eea781b + 21e4f0e commit d3bb075

File tree

2 files changed

+78
-5
lines changed

2 files changed

+78
-5
lines changed

go-controller/pkg/node/gateway_shared_intf.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3040,8 +3040,8 @@ func getIPv(ipnet *net.IPNet) string {
30403040
// chain udn-bgp-drop {
30413041
// comment "Drop traffic generated locally towards advertised UDN subnets"
30423042
// type filter hook output priority filter; policy accept;
3043-
// ip daddr @advertised-udn-subnets-v4 counter packets 0 bytes 0 drop
3044-
// ip6 daddr @advertised-udn-subnets-v6 counter packets 0 bytes 0 drop
3043+
// ct state new ip daddr @advertised-udn-subnets-v4 counter packets 0 bytes 0 drop
3044+
// ct state new ip6 daddr @advertised-udn-subnets-v6 counter packets 0 bytes 0 drop
30453045
// }
30463046
func configureAdvertisedUDNIsolationNFTables() error {
30473047
counterIfDebug := ""
@@ -3083,11 +3083,11 @@ func configureAdvertisedUDNIsolationNFTables() error {
30833083

30843084
tx.Add(&knftables.Rule{
30853085
Chain: nftablesUDNBGPOutputChain,
3086-
Rule: knftables.Concat(fmt.Sprintf("ip daddr @%s", nftablesAdvertisedUDNsSetV4), counterIfDebug, "drop"),
3086+
Rule: knftables.Concat("ct state new", fmt.Sprintf("ip daddr @%s", nftablesAdvertisedUDNsSetV4), counterIfDebug, "drop"),
30873087
})
30883088
tx.Add(&knftables.Rule{
30893089
Chain: nftablesUDNBGPOutputChain,
3090-
Rule: knftables.Concat(fmt.Sprintf("ip6 daddr @%s", nftablesAdvertisedUDNsSetV6), counterIfDebug, "drop"),
3090+
Rule: knftables.Concat("ct state new", fmt.Sprintf("ip6 daddr @%s", nftablesAdvertisedUDNsSetV6), counterIfDebug, "drop"),
30913091
})
30923092
return nft.Run(context.TODO(), tx)
30933093
}

test/e2e/route_advertisements.go

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package e2e
33
import (
44
"context"
55
"fmt"
6+
"math/rand"
67
"net"
78
"strings"
89

@@ -19,6 +20,7 @@ import (
1920
infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api"
2021

2122
corev1 "k8s.io/api/core/v1"
23+
v1 "k8s.io/api/core/v1"
2224
apierrors "k8s.io/apimachinery/pkg/api/errors"
2325
"k8s.io/apimachinery/pkg/api/meta"
2426
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -532,7 +534,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
532534
var svcNetA, svcNetB, svcNetDefault *corev1.Service
533535
var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork
534536
var ra *rav1.RouteAdvertisements
535-
537+
var hostNetworkPort int
536538
ginkgo.BeforeEach(func() {
537539
if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 && isLocalGWModeEnabled() {
538540
e2eskipper.Skipf("Advertising Layer2 UDNs is not currently supported in LGW")
@@ -584,6 +586,30 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
584586
nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet)
585587
gomega.Expect(err).NotTo(gomega.HaveOccurred())
586588
gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2))
589+
// create host networked pod
590+
ginkgo.By("Creating host network pods on each node")
591+
// get random port in case the test retries and port is already in use on host node
592+
min := 25000
593+
max := 25999
594+
hostNetworkPort = rand.Intn(max-min+1) + min
595+
framework.Logf("Random host networked port chosen: %d", hostNetworkPort)
596+
for _, node := range nodes.Items {
597+
// this creates a udp / http netexec listener which is able to receive the "hostname"
598+
// command. We use this to validate that each endpoint is received at least once
599+
args := []string{
600+
"netexec",
601+
fmt.Sprintf("--http-port=%d", hostNetworkPort),
602+
fmt.Sprintf("--udp-port=%d", hostNetworkPort),
603+
}
604+
605+
// create host networked Pods
606+
_, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) {
607+
p.Spec.Containers[0].Args = args
608+
p.Spec.HostNetwork = true
609+
})
610+
611+
framework.ExpectNoError(err)
612+
}
587613

588614
ginkgo.By("Setting up pods and services")
589615
podsNetA = []*corev1.Pod{}
@@ -901,6 +927,53 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
901927
framework.ExpectNoError(err)
902928
return clientNode, "", net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true
903929
}),
930+
ginkgo.Entry("UDN pod to local node should not work",
931+
func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) {
932+
clientPod := podsNetA[0]
933+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{})
934+
framework.ExpectNoError(err)
935+
nodeIP := node.Status.Addresses[ipFamilyIndex].Address
936+
// FIXME: add the host process socket to the VRF for this test to work.
937+
// This scenario is something that is not supported yet. So the test will continue to fail.
938+
// This works the same on both normal UDNs and advertised UDNs.
939+
// So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7.
940+
// 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264,
941+
// offset 0, flags [DF], proto TCP (6), length 60)
942+
// 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280,
943+
// options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0
944+
// 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264,
945+
// offset 0, flags [DF], proto TCP (6), length 60)
946+
// 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280,
947+
// options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0
948+
// 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0,
949+
// offset 0, flags [DF], proto TCP (6), length 40)
950+
// 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0
951+
// 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0,
952+
// offset 0, flags [DF], proto TCP (6), length 40)
953+
// 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0
954+
return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true
955+
}),
956+
ginkgo.Entry("UDN pod to a different node should work",
957+
func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) {
958+
clientPod := podsNetA[0]
959+
// podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination
960+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{})
961+
framework.ExpectNoError(err)
962+
nodeIP := node.Status.Addresses[ipFamilyIndex].Address
963+
errBool := false
964+
out := ""
965+
if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 {
966+
// FIXME: fix assymmetry in L2 UDNs
967+
// bad behaviour: packet is coming from other node -> entering eth0 -> bretho and here kernel drops the packet since
968+
// rp_filter is set to 1 in breth0 and there is an iprule that sends the packet to mpX interface so kernel sees the packet
969+
// having return path different from the incoming interface.
970+
// The SNAT to nodeIP should fix this.
971+
// this causes curl timeout with code 28
972+
errBool = true
973+
out = curlConnectionTimeoutCode
974+
}
975+
return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", out, errBool
976+
}),
904977
)
905978

906979
},

0 commit comments

Comments
 (0)