Skip to content

Commit b83ec12

Browse files
authored
Merge pull request kubernetes#126847 from aroradaman/conntrack-netlink
Remove conntrack binary dependency from kube-proxy
2 parents a105f36 + c34b20f commit b83ec12

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+11019
-1382
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ require (
5757
github.com/spf13/cobra v1.8.1
5858
github.com/spf13/pflag v1.0.5
5959
github.com/stretchr/testify v1.9.0
60-
github.com/vishvananda/netlink v1.1.0
60+
github.com/vishvananda/netlink v1.3.0
6161
github.com/vishvananda/netns v0.0.4
6262
go.etcd.io/etcd/api/v3 v3.5.15
6363
go.etcd.io/etcd/client/pkg/v3 v3.5.15

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -572,9 +572,8 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7
572572
github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk=
573573
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
574574
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
575-
github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
576-
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
577-
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
575+
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
576+
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
578577
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
579578
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
580579
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
@@ -699,7 +698,6 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
699698
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
700699
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
701700
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
702-
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
703701
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
704702
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
705703
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -715,6 +713,8 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc
715713
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
716714
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
717715
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
716+
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
717+
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
718718
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
719719
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
720720
golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457/go.mod h1:pRgIJT+bRLFKnoM1ldnzKoxTIn14Yxz928LQRYYgIN0=

pkg/proxy/conntrack/cleanup.go

Lines changed: 93 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ limitations under the License.
2020
package conntrack
2121

2222
import (
23+
"github.com/vishvananda/netlink"
24+
"golang.org/x/sys/unix"
25+
2326
v1 "k8s.io/api/core/v1"
2427
"k8s.io/apimachinery/pkg/util/sets"
2528
"k8s.io/klog/v2"
@@ -29,20 +32,20 @@ import (
2932
)
3033

3134
// CleanStaleEntries takes care of flushing stale conntrack entries for services and endpoints.
32-
func CleanStaleEntries(ct Interface, svcPortMap proxy.ServicePortMap,
35+
func CleanStaleEntries(ct Interface, ipFamily v1.IPFamily, svcPortMap proxy.ServicePortMap,
3336
serviceUpdateResult proxy.UpdateServiceMapResult, endpointsUpdateResult proxy.UpdateEndpointsMapResult) {
34-
deleteStaleServiceConntrackEntries(ct, svcPortMap, serviceUpdateResult, endpointsUpdateResult)
35-
deleteStaleEndpointConntrackEntries(ct, svcPortMap, endpointsUpdateResult)
37+
deleteStaleServiceConntrackEntries(ct, ipFamily, svcPortMap, serviceUpdateResult, endpointsUpdateResult)
38+
deleteStaleEndpointConntrackEntries(ct, ipFamily, svcPortMap, endpointsUpdateResult)
3639
}
3740

3841
// deleteStaleServiceConntrackEntries takes care of flushing stale conntrack entries related
3942
// to UDP Service IPs. When a service has no endpoints and we drop traffic to it, conntrack
4043
// may create "black hole" entries for that IP+port. When the service gets endpoints we
4144
// need to delete those entries so further traffic doesn't get dropped.
42-
func deleteStaleServiceConntrackEntries(ct Interface, svcPortMap proxy.ServicePortMap, serviceUpdateResult proxy.UpdateServiceMapResult, endpointsUpdateResult proxy.UpdateEndpointsMapResult) {
45+
func deleteStaleServiceConntrackEntries(ct Interface, ipFamily v1.IPFamily, svcPortMap proxy.ServicePortMap, serviceUpdateResult proxy.UpdateServiceMapResult, endpointsUpdateResult proxy.UpdateEndpointsMapResult) {
46+
var filters []netlink.CustomConntrackFilter
4347
conntrackCleanupServiceIPs := serviceUpdateResult.DeletedUDPClusterIPs
4448
conntrackCleanupServiceNodePorts := sets.New[int]()
45-
isIPv6 := false
4649

4750
// merge newly active services gathered from endpointsUpdateResult
4851
// a UDP service that changes from 0 to non-0 endpoints is newly active.
@@ -59,57 +62,116 @@ func deleteStaleServiceConntrackEntries(ct Interface, svcPortMap proxy.ServicePo
5962
nodePort := svcInfo.NodePort()
6063
if svcInfo.Protocol() == v1.ProtocolUDP && nodePort != 0 {
6164
conntrackCleanupServiceNodePorts.Insert(nodePort)
62-
isIPv6 = netutils.IsIPv6(svcInfo.ClusterIP())
6365
}
6466
}
6567
}
6668

6769
klog.V(4).InfoS("Deleting conntrack stale entries for services", "IPs", conntrackCleanupServiceIPs.UnsortedList())
6870
for _, svcIP := range conntrackCleanupServiceIPs.UnsortedList() {
69-
if err := ct.ClearEntriesForIP(svcIP, v1.ProtocolUDP); err != nil {
70-
klog.ErrorS(err, "Failed to delete stale service connections", "IP", svcIP)
71-
}
71+
filters = append(filters, filterForIP(svcIP, v1.ProtocolUDP))
7272
}
7373
klog.V(4).InfoS("Deleting conntrack stale entries for services", "nodePorts", conntrackCleanupServiceNodePorts.UnsortedList())
7474
for _, nodePort := range conntrackCleanupServiceNodePorts.UnsortedList() {
75-
err := ct.ClearEntriesForPort(nodePort, isIPv6, v1.ProtocolUDP)
76-
if err != nil {
77-
klog.ErrorS(err, "Failed to clear udp conntrack", "nodePort", nodePort)
78-
}
75+
filters = append(filters, filterForPort(nodePort, v1.ProtocolUDP))
76+
}
77+
78+
if err := ct.ClearEntries(ipFamilyMap[ipFamily], filters...); err != nil {
79+
klog.ErrorS(err, "Failed to delete stale service connections")
7980
}
8081
}
8182

8283
// deleteStaleEndpointConntrackEntries takes care of flushing stale conntrack entries related
8384
// to UDP endpoints. After a UDP endpoint is removed we must flush any conntrack entries
8485
// for it so that if the same client keeps sending, the packets will get routed to a new endpoint.
85-
func deleteStaleEndpointConntrackEntries(ct Interface, svcPortMap proxy.ServicePortMap, endpointsUpdateResult proxy.UpdateEndpointsMapResult) {
86+
func deleteStaleEndpointConntrackEntries(ct Interface, ipFamily v1.IPFamily, svcPortMap proxy.ServicePortMap, endpointsUpdateResult proxy.UpdateEndpointsMapResult) {
87+
var filters []netlink.CustomConntrackFilter
8688
for _, epSvcPair := range endpointsUpdateResult.DeletedUDPEndpoints {
8789
if svcInfo, ok := svcPortMap[epSvcPair.ServicePortName]; ok {
8890
endpointIP := proxyutil.IPPart(epSvcPair.Endpoint)
8991
nodePort := svcInfo.NodePort()
90-
var err error
9192
if nodePort != 0 {
92-
err = ct.ClearEntriesForPortNAT(endpointIP, nodePort, v1.ProtocolUDP)
93-
if err != nil {
94-
klog.ErrorS(err, "Failed to delete nodeport-related endpoint connections", "servicePortName", epSvcPair.ServicePortName)
95-
}
96-
}
97-
err = ct.ClearEntriesForNAT(svcInfo.ClusterIP().String(), endpointIP, v1.ProtocolUDP)
98-
if err != nil {
99-
klog.ErrorS(err, "Failed to delete endpoint connections", "servicePortName", epSvcPair.ServicePortName)
93+
filters = append(filters, filterForPortNAT(endpointIP, nodePort, v1.ProtocolUDP))
94+
10095
}
96+
filters = append(filters, filterForNAT(svcInfo.ClusterIP().String(), endpointIP, v1.ProtocolUDP))
10197
for _, extIP := range svcInfo.ExternalIPs() {
102-
err := ct.ClearEntriesForNAT(extIP.String(), endpointIP, v1.ProtocolUDP)
103-
if err != nil {
104-
klog.ErrorS(err, "Failed to delete endpoint connections for externalIP", "servicePortName", epSvcPair.ServicePortName, "externalIP", extIP)
105-
}
98+
filters = append(filters, filterForNAT(extIP.String(), endpointIP, v1.ProtocolUDP))
10699
}
107100
for _, lbIP := range svcInfo.LoadBalancerVIPs() {
108-
err := ct.ClearEntriesForNAT(lbIP.String(), endpointIP, v1.ProtocolUDP)
109-
if err != nil {
110-
klog.ErrorS(err, "Failed to delete endpoint connections for LoadBalancerIP", "servicePortName", epSvcPair.ServicePortName, "loadBalancerIP", lbIP)
111-
}
101+
filters = append(filters, filterForNAT(lbIP.String(), endpointIP, v1.ProtocolUDP))
112102
}
113103
}
114104
}
105+
106+
if err := ct.ClearEntries(ipFamilyMap[ipFamily], filters...); err != nil {
107+
klog.ErrorS(err, "Failed to delete stale endpoint connections")
108+
}
109+
}
110+
111+
// ipFamilyMap maps v1.IPFamily to the corresponding unix constant.
112+
var ipFamilyMap = map[v1.IPFamily]uint8{
113+
v1.IPv4Protocol: unix.AF_INET,
114+
v1.IPv6Protocol: unix.AF_INET6,
115+
}
116+
117+
// protocolMap maps v1.Protocol to the Assigned Internet Protocol Number.
118+
// https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml
119+
var protocolMap = map[v1.Protocol]uint8{
120+
v1.ProtocolTCP: unix.IPPROTO_TCP,
121+
v1.ProtocolUDP: unix.IPPROTO_UDP,
122+
v1.ProtocolSCTP: unix.IPPROTO_SCTP,
123+
}
124+
125+
// filterForIP returns *conntrackFilter to delete the conntrack entries for connections
126+
// specified by the destination IP (original direction).
127+
func filterForIP(ip string, protocol v1.Protocol) *conntrackFilter {
128+
klog.V(4).InfoS("Adding conntrack filter for cleanup", "org-dst", ip, "protocol", protocol)
129+
return &conntrackFilter{
130+
protocol: protocolMap[protocol],
131+
original: &connectionTuple{
132+
dstIP: netutils.ParseIPSloppy(ip),
133+
},
134+
}
135+
}
136+
137+
// filterForPort returns *conntrackFilter to delete the conntrack entries for connections
138+
// specified by the destination Port (original direction).
139+
func filterForPort(port int, protocol v1.Protocol) *conntrackFilter {
140+
klog.V(4).InfoS("Adding conntrack filter for cleanup", "org-port-dst", port, "protocol", protocol)
141+
return &conntrackFilter{
142+
protocol: protocolMap[protocol],
143+
original: &connectionTuple{
144+
dstPort: uint16(port),
145+
},
146+
}
147+
}
148+
149+
// filterForNAT returns *conntrackFilter to delete the conntrack entries for connections
150+
// specified by the destination IP (original direction) and source IP (reply direction).
151+
func filterForNAT(origin, dest string, protocol v1.Protocol) *conntrackFilter {
152+
klog.V(4).InfoS("Adding conntrack filter for cleanup", "org-dst", origin, "reply-src", dest, "protocol", protocol)
153+
return &conntrackFilter{
154+
protocol: protocolMap[protocol],
155+
original: &connectionTuple{
156+
dstIP: netutils.ParseIPSloppy(origin),
157+
},
158+
reply: &connectionTuple{
159+
srcIP: netutils.ParseIPSloppy(dest),
160+
},
161+
}
162+
}
163+
164+
// filterForPortNAT returns *conntrackFilter to delete the conntrack entries for connections
165+
// specified by the destination Port (original direction) and source IP (reply direction).
166+
func filterForPortNAT(dest string, port int, protocol v1.Protocol) *conntrackFilter {
167+
klog.V(4).InfoS("Adding conntrack filter for cleanup", "org-port-dst", port, "reply-src", dest, "protocol", protocol)
168+
return &conntrackFilter{
169+
protocol: protocolMap[protocol],
170+
original: &connectionTuple{
171+
dstPort: uint16(port),
172+
},
173+
reply: &connectionTuple{
174+
srcIP: netutils.ParseIPSloppy(dest),
175+
},
176+
}
115177
}

pkg/proxy/conntrack/cleanup_test.go

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,19 @@ import (
2424
"reflect"
2525
"testing"
2626

27+
"github.com/stretchr/testify/require"
28+
"github.com/vishvananda/netlink"
29+
2730
v1 "k8s.io/api/core/v1"
2831
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2932
"k8s.io/apimachinery/pkg/types"
3033
"k8s.io/apimachinery/pkg/util/sets"
3134
"k8s.io/kubernetes/pkg/proxy"
35+
netutils "k8s.io/utils/net"
3236
)
3337

3438
const (
39+
testIPFamily = v1.IPv4Protocol
3540
testClusterIP = "172.30.1.1"
3641
testExternalIP = "192.168.99.100"
3742
testLoadBalancerIP = "1.2.3.4"
@@ -245,7 +250,7 @@ func TestCleanStaleEntries(t *testing.T) {
245250
for _, tc := range testCases {
246251
t.Run(tc.description, func(t *testing.T) {
247252
fake := NewFake()
248-
CleanStaleEntries(fake, svcPortMap, tc.serviceUpdates, tc.endpointsUpdates)
253+
CleanStaleEntries(fake, testIPFamily, svcPortMap, tc.serviceUpdates, tc.endpointsUpdates)
249254
if !fake.ClearedIPs.Equal(tc.result.ClearedIPs) {
250255
t.Errorf("Expected ClearedIPs=%v, got %v", tc.result.ClearedIPs, fake.ClearedIPs)
251256
}
@@ -261,3 +266,152 @@ func TestCleanStaleEntries(t *testing.T) {
261266
})
262267
}
263268
}
269+
270+
func TestFilterForIP(t *testing.T) {
271+
testCases := []struct {
272+
name string
273+
ip string
274+
protocol v1.Protocol
275+
expectedFamily netlink.InetFamily
276+
expectedFilter *conntrackFilter
277+
}{
278+
{
279+
name: "ipv4 + UDP",
280+
ip: "10.96.0.10",
281+
protocol: v1.ProtocolUDP,
282+
expectedFilter: &conntrackFilter{
283+
protocol: 17,
284+
original: &connectionTuple{dstIP: netutils.ParseIPSloppy("10.96.0.10")},
285+
},
286+
},
287+
{
288+
name: "ipv6 + TCP",
289+
ip: "2001:db8:1::2",
290+
protocol: v1.ProtocolTCP,
291+
expectedFilter: &conntrackFilter{
292+
protocol: 6,
293+
original: &connectionTuple{dstIP: netutils.ParseIPSloppy("2001:db8:1::2")},
294+
},
295+
},
296+
}
297+
298+
for _, tc := range testCases {
299+
t.Run(tc.name, func(t *testing.T) {
300+
require.Equal(t, tc.expectedFilter, filterForIP(tc.ip, tc.protocol))
301+
})
302+
}
303+
}
304+
305+
func TestFilterForPort(t *testing.T) {
306+
testCases := []struct {
307+
name string
308+
port int
309+
protocol v1.Protocol
310+
expectedFilter *conntrackFilter
311+
}{
312+
{
313+
name: "UDP",
314+
port: 5000,
315+
protocol: v1.ProtocolUDP,
316+
317+
expectedFilter: &conntrackFilter{
318+
protocol: 17,
319+
original: &connectionTuple{dstPort: 5000},
320+
},
321+
},
322+
{
323+
name: "SCTP",
324+
port: 3000,
325+
protocol: v1.ProtocolSCTP,
326+
expectedFilter: &conntrackFilter{
327+
protocol: 132,
328+
original: &connectionTuple{dstPort: 3000},
329+
},
330+
},
331+
}
332+
333+
for _, tc := range testCases {
334+
t.Run(tc.name, func(t *testing.T) {
335+
require.Equal(t, tc.expectedFilter, filterForPort(tc.port, tc.protocol))
336+
})
337+
}
338+
}
339+
340+
func TestFilterForNAT(t *testing.T) {
341+
testCases := []struct {
342+
name string
343+
orig string
344+
dest string
345+
protocol v1.Protocol
346+
expectedFilter *conntrackFilter
347+
}{
348+
{
349+
name: "ipv4 + SCTP",
350+
orig: "10.96.0.10",
351+
dest: "10.244.0.3",
352+
protocol: v1.ProtocolSCTP,
353+
expectedFilter: &conntrackFilter{
354+
protocol: 132,
355+
original: &connectionTuple{dstIP: netutils.ParseIPSloppy("10.96.0.10")},
356+
reply: &connectionTuple{srcIP: netutils.ParseIPSloppy("10.244.0.3")},
357+
},
358+
},
359+
{
360+
name: "ipv6 + UDP",
361+
orig: "2001:db8:1::2",
362+
dest: "4001:ab8::2",
363+
protocol: v1.ProtocolUDP,
364+
expectedFilter: &conntrackFilter{
365+
protocol: 17,
366+
original: &connectionTuple{dstIP: netutils.ParseIPSloppy("2001:db8:1::2")},
367+
reply: &connectionTuple{srcIP: netutils.ParseIPSloppy("4001:ab8::2")},
368+
},
369+
},
370+
}
371+
372+
for _, tc := range testCases {
373+
t.Run(tc.name, func(t *testing.T) {
374+
require.Equal(t, tc.expectedFilter, filterForNAT(tc.orig, tc.dest, tc.protocol))
375+
})
376+
}
377+
}
378+
379+
func TestFilterForPortNAT(t *testing.T) {
380+
testCases := []struct {
381+
name string
382+
dest string
383+
port int
384+
protocol v1.Protocol
385+
expectedFamily netlink.InetFamily
386+
expectedFilter *conntrackFilter
387+
}{
388+
{
389+
name: "ipv4 + TCP",
390+
dest: "10.96.0.10",
391+
port: 80,
392+
protocol: v1.ProtocolTCP,
393+
expectedFilter: &conntrackFilter{
394+
protocol: 6,
395+
original: &connectionTuple{dstPort: 80},
396+
reply: &connectionTuple{srcIP: netutils.ParseIPSloppy("10.96.0.10")},
397+
},
398+
},
399+
{
400+
name: "ipv6 + UDP",
401+
dest: "2001:db8:1::2",
402+
port: 8000,
403+
protocol: v1.ProtocolUDP,
404+
expectedFilter: &conntrackFilter{
405+
protocol: 17,
406+
original: &connectionTuple{dstPort: 8000},
407+
reply: &connectionTuple{srcIP: netutils.ParseIPSloppy("2001:db8:1::2")},
408+
},
409+
},
410+
}
411+
412+
for _, tc := range testCases {
413+
t.Run(tc.name, func(t *testing.T) {
414+
require.Equal(t, tc.expectedFilter, filterForPortNAT(tc.dest, tc.port, tc.protocol))
415+
})
416+
}
417+
}

0 commit comments

Comments
 (0)