Skip to content

Commit 21e4f0e

Browse files
committed
Fix the host drop rules to match on new state
When we did the NFT rules to block traffic going from host to advertised UDN pod subnets, we did not mean to also block replies from host to advertised UDN pod subnets for traffic initiated by UDN pods. Given the rules lie in OUTPUT table this would match on replies as well, so traffic like pod to kube-apiserver host-networked pod backend is broken because of this. Let's change the rule to only match on NEW state which is what we wanted to do in the original change. The current rules unintentionally block traffic in reverse direction. Signed-off-by: Surya Seetharaman <[email protected]>
1 parent eea781b commit 21e4f0e

File tree

2 files changed

+78
-5
lines changed

2 files changed

+78
-5
lines changed

go-controller/pkg/node/gateway_shared_intf.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3040,8 +3040,8 @@ func getIPv(ipnet *net.IPNet) string {
30403040
// chain udn-bgp-drop {
30413041
// comment "Drop traffic generated locally towards advertised UDN subnets"
30423042
// type filter hook output priority filter; policy accept;
3043-
// ip daddr @advertised-udn-subnets-v4 counter packets 0 bytes 0 drop
3044-
// ip6 daddr @advertised-udn-subnets-v6 counter packets 0 bytes 0 drop
3043+
// ct state new ip daddr @advertised-udn-subnets-v4 counter packets 0 bytes 0 drop
3044+
// ct state new ip6 daddr @advertised-udn-subnets-v6 counter packets 0 bytes 0 drop
30453045
// }
30463046
func configureAdvertisedUDNIsolationNFTables() error {
30473047
counterIfDebug := ""
@@ -3083,11 +3083,11 @@ func configureAdvertisedUDNIsolationNFTables() error {
30833083

30843084
tx.Add(&knftables.Rule{
30853085
Chain: nftablesUDNBGPOutputChain,
3086-
Rule: knftables.Concat(fmt.Sprintf("ip daddr @%s", nftablesAdvertisedUDNsSetV4), counterIfDebug, "drop"),
3086+
Rule: knftables.Concat("ct state new", fmt.Sprintf("ip daddr @%s", nftablesAdvertisedUDNsSetV4), counterIfDebug, "drop"),
30873087
})
30883088
tx.Add(&knftables.Rule{
30893089
Chain: nftablesUDNBGPOutputChain,
3090-
Rule: knftables.Concat(fmt.Sprintf("ip6 daddr @%s", nftablesAdvertisedUDNsSetV6), counterIfDebug, "drop"),
3090+
Rule: knftables.Concat("ct state new", fmt.Sprintf("ip6 daddr @%s", nftablesAdvertisedUDNsSetV6), counterIfDebug, "drop"),
30913091
})
30923092
return nft.Run(context.TODO(), tx)
30933093
}

test/e2e/route_advertisements.go

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package e2e
33
import (
44
"context"
55
"fmt"
6+
"math/rand"
67
"net"
78
"strings"
89

@@ -19,6 +20,7 @@ import (
1920
infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api"
2021

2122
corev1 "k8s.io/api/core/v1"
23+
v1 "k8s.io/api/core/v1"
2224
apierrors "k8s.io/apimachinery/pkg/api/errors"
2325
"k8s.io/apimachinery/pkg/api/meta"
2426
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -532,7 +534,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
532534
var svcNetA, svcNetB, svcNetDefault *corev1.Service
533535
var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork
534536
var ra *rav1.RouteAdvertisements
535-
537+
var hostNetworkPort int
536538
ginkgo.BeforeEach(func() {
537539
if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 && isLocalGWModeEnabled() {
538540
e2eskipper.Skipf("Advertising Layer2 UDNs is not currently supported in LGW")
@@ -584,6 +586,30 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
584586
nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet)
585587
gomega.Expect(err).NotTo(gomega.HaveOccurred())
586588
gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2))
589+
// create host networked pod
590+
ginkgo.By("Creating host network pods on each node")
591+
// get random port in case the test retries and port is already in use on host node
592+
min := 25000
593+
max := 25999
594+
hostNetworkPort = rand.Intn(max-min+1) + min
595+
framework.Logf("Random host networked port chosen: %d", hostNetworkPort)
596+
for _, node := range nodes.Items {
597+
// this creates a udp / http netexec listener which is able to receive the "hostname"
598+
// command. We use this to validate that each endpoint is received at least once
599+
args := []string{
600+
"netexec",
601+
fmt.Sprintf("--http-port=%d", hostNetworkPort),
602+
fmt.Sprintf("--udp-port=%d", hostNetworkPort),
603+
}
604+
605+
// create host networked Pods
606+
_, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) {
607+
p.Spec.Containers[0].Args = args
608+
p.Spec.HostNetwork = true
609+
})
610+
611+
framework.ExpectNoError(err)
612+
}
587613

588614
ginkgo.By("Setting up pods and services")
589615
podsNetA = []*corev1.Pod{}
@@ -901,6 +927,53 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks"
901927
framework.ExpectNoError(err)
902928
return clientNode, "", net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true
903929
}),
930+
ginkgo.Entry("UDN pod to local node should not work",
931+
func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) {
932+
clientPod := podsNetA[0]
933+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{})
934+
framework.ExpectNoError(err)
935+
nodeIP := node.Status.Addresses[ipFamilyIndex].Address
936+
// FIXME: add the host process socket to the VRF for this test to work.
937+
// This scenario is something that is not supported yet. So the test will continue to fail.
938+
// This works the same on both normal UDNs and advertised UDNs.
939+
// So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7.
940+
// 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264,
941+
// offset 0, flags [DF], proto TCP (6), length 60)
942+
// 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280,
943+
// options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0
944+
// 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264,
945+
// offset 0, flags [DF], proto TCP (6), length 60)
946+
// 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280,
947+
// options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0
948+
// 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0,
949+
// offset 0, flags [DF], proto TCP (6), length 40)
950+
// 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0
951+
// 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0,
952+
// offset 0, flags [DF], proto TCP (6), length 40)
953+
// 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0
954+
return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true
955+
}),
956+
ginkgo.Entry("UDN pod to a different node should work",
957+
func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) {
958+
clientPod := podsNetA[0]
959+
// podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination
960+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{})
961+
framework.ExpectNoError(err)
962+
nodeIP := node.Status.Addresses[ipFamilyIndex].Address
963+
errBool := false
964+
out := ""
965+
if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 {
966+
// FIXME: fix assymmetry in L2 UDNs
967+
// bad behaviour: packet is coming from other node -> entering eth0 -> bretho and here kernel drops the packet since
968+
// rp_filter is set to 1 in breth0 and there is an iprule that sends the packet to mpX interface so kernel sees the packet
969+
// having return path different from the incoming interface.
970+
// The SNAT to nodeIP should fix this.
971+
// this causes curl timeout with code 28
972+
errBool = true
973+
out = curlConnectionTimeoutCode
974+
}
975+
return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", out, errBool
976+
}),
904977
)
905978

906979
},

0 commit comments

Comments
 (0)