Skip to content

Commit db1990f

Browse files
authored
Merge pull request kubernetes#86932 from spiffxp/revert-86708
Revert "fix flakiness on e2e test TCP CLOSE_WAIT timeout"
2 parents f8d6d85 + b8cac87 commit db1990f

File tree

1 file changed

+46
-98
lines changed

1 file changed

+46
-98
lines changed

test/e2e/network/kube_proxy.go

Lines changed: 46 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ import (
2626

2727
v1 "k8s.io/api/core/v1"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29-
"k8s.io/apimachinery/pkg/util/wait"
3029

3130
"k8s.io/kubernetes/test/e2e/framework"
3231
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
3332
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
33+
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
3434
"k8s.io/kubernetes/test/images/agnhost/net/nat"
3535
imageutils "k8s.io/kubernetes/test/utils/image"
3636

@@ -42,10 +42,10 @@ var kubeProxyE2eImage = imageutils.GetE2EImage(imageutils.Agnhost)
4242

4343
var _ = SIGDescribe("Network", func() {
4444
const (
45-
testDaemonHTTPPort = 11301
46-
testDaemonTCPPort = 11302
47-
deadlineTimeoutSeconds = 10
48-
postFinTimeoutSeconds = 30
45+
testDaemonHTTPPort = 11301
46+
testDaemonTCPPort = 11302
47+
timeoutSeconds = 10
48+
postFinTimeoutSeconds = 5
4949
)
5050

5151
fr := framework.NewDefaultFramework("network")
@@ -81,63 +81,16 @@ var _ = SIGDescribe("Network", func() {
8181

8282
zero := int64(0)
8383

84-
// Create a pod to check the conntrack entries on the host node
85-
// It mounts the host /proc/net folder to be able to access
86-
// the nf_conntrack file with the host conntrack entries
87-
privileged := true
88-
89-
hostExecPod := &v1.Pod{
90-
ObjectMeta: metav1.ObjectMeta{
91-
Name: "e2e-net-exec",
92-
Namespace: fr.Namespace.Name,
93-
Labels: map[string]string{"app": "e2e-net-exec"},
94-
},
95-
Spec: v1.PodSpec{
96-
HostNetwork: true,
97-
NodeName: clientNodeInfo.name,
98-
Containers: []v1.Container{
99-
{
100-
Name: "e2e-net-exec",
101-
Image: kubeProxyE2eImage,
102-
ImagePullPolicy: "Always",
103-
Args: []string{"pause"},
104-
VolumeMounts: []v1.VolumeMount{
105-
{
106-
Name: "proc-net",
107-
MountPath: "/rootfs/proc/net",
108-
ReadOnly: true,
109-
},
110-
},
111-
SecurityContext: &v1.SecurityContext{
112-
Privileged: &privileged,
113-
},
114-
},
115-
},
116-
Volumes: []v1.Volume{
117-
{
118-
Name: "proc-net",
119-
VolumeSource: v1.VolumeSource{
120-
HostPath: &v1.HostPathVolumeSource{
121-
Path: "/proc/net",
122-
},
123-
},
124-
},
125-
},
126-
TerminationGracePeriodSeconds: &zero,
127-
},
128-
}
129-
fr.PodClient().CreateSync(hostExecPod)
130-
13184
// Some distributions (Ubuntu 16.04 etc.) don't support the proc file.
132-
_, err = framework.RunHostCmd(fr.Namespace.Name, "e2e-net-exec",
133-
"ls /rootfs/proc/net/nf_conntrack")
85+
_, err = e2essh.IssueSSHCommandWithResult(
86+
"ls /proc/net/nf_conntrack",
87+
framework.TestContext.Provider,
88+
clientNodeInfo.node)
13489
if err != nil && strings.Contains(err.Error(), "No such file or directory") {
135-
framework.Skipf("The node %s does not support /proc/net/nf_conntrack",
136-
clientNodeInfo.name)
90+
framework.Skipf("The node %s does not support /proc/net/nf_conntrack", clientNodeInfo.name)
13791
}
13892
framework.ExpectNoError(err)
13993

140-
// Create the client and server pods
14194
clientPodSpec := &v1.Pod{
14295
ObjectMeta: metav1.ObjectMeta{
14396
Name: "e2e-net-client",
@@ -152,7 +105,7 @@ var _ = SIGDescribe("Network", func() {
152105
Image: kubeProxyE2eImage,
153106
ImagePullPolicy: "Always",
154107
Args: []string{
155-
"net", "--serve", fmt.Sprintf(":%d", testDaemonHTTPPort),
108+
"net", "--serve", fmt.Sprintf("0.0.0.0:%d", testDaemonHTTPPort),
156109
},
157110
},
158111
},
@@ -177,7 +130,7 @@ var _ = SIGDescribe("Network", func() {
177130
"net",
178131
"--runner", "nat-closewait-server",
179132
"--options",
180-
fmt.Sprintf(`{"LocalAddr":":%v", "PostFinTimeoutSeconds":%v}`,
133+
fmt.Sprintf(`{"LocalAddr":"0.0.0.0:%v", "PostFindTimeoutSeconds":%v}`,
181134
testDaemonTCPPort,
182135
postFinTimeoutSeconds),
183136
},
@@ -213,8 +166,8 @@ var _ = SIGDescribe("Network", func() {
213166
options := nat.CloseWaitClientOptions{
214167
RemoteAddr: fmt.Sprintf("%v:%v",
215168
serverNodeInfo.nodeIP, testDaemonTCPPort),
216-
TimeoutSeconds: deadlineTimeoutSeconds,
217-
PostFinTimeoutSeconds: postFinTimeoutSeconds,
169+
TimeoutSeconds: timeoutSeconds,
170+
PostFinTimeoutSeconds: 0,
218171
LeakConnection: true,
219172
}
220173

@@ -226,52 +179,47 @@ var _ = SIGDescribe("Network", func() {
226179
`'%v' 2>/dev/null`,
227180
testDaemonHTTPPort,
228181
string(jsonBytes))
229-
// Run the closewait command in a subroutine so it keeps waiting during postFinTimeoutSeconds
230-
// otherwise the pod is deleted and the connection is closed loosing the conntrack entry
231-
go func() {
232-
framework.RunHostCmdOrDie(fr.Namespace.Name, "e2e-net-client", cmd)
233-
}()
182+
framework.RunHostCmdOrDie(fr.Namespace.Name, "e2e-net-client", cmd)
234183

235184
<-time.After(time.Duration(1) * time.Second)
236185

237186
ginkgo.By("Checking /proc/net/nf_conntrack for the timeout")
187+
// If test flakes occur here, then this check should be performed
188+
// in a loop as there may be a race with the client connecting.
189+
e2essh.IssueSSHCommandWithResult(
190+
fmt.Sprintf("sudo cat /proc/net/nf_conntrack | grep 'dport=%v'",
191+
testDaemonTCPPort),
192+
framework.TestContext.Provider,
193+
clientNodeInfo.node)
194+
195+
// Timeout in seconds is available as the fifth column from
196+
// /proc/net/nf_conntrack.
197+
result, err := e2essh.IssueSSHCommandWithResult(
198+
fmt.Sprintf(
199+
"sudo cat /proc/net/nf_conntrack "+
200+
"| grep 'CLOSE_WAIT.*dst=%v.*dport=%v' "+
201+
"| tail -n 1"+
202+
"| awk '{print $5}' ",
203+
serverNodeInfo.nodeIP,
204+
testDaemonTCPPort),
205+
framework.TestContext.Provider,
206+
clientNodeInfo.node)
207+
framework.ExpectNoError(err)
208+
209+
timeoutSeconds, err := strconv.Atoi(strings.TrimSpace(result.Stdout))
210+
framework.ExpectNoError(err)
211+
238212
// These must be synchronized from the default values set in
239213
// pkg/apis/../defaults.go ConntrackTCPCloseWaitTimeout. The
240214
// current defaults are hidden in the initialization code.
241215
const epsilonSeconds = 60
242216
const expectedTimeoutSeconds = 60 * 60
243-
// Obtain the corresponding conntrack entry on the host checking
244-
// the nf_conntrack file from the pod e2e-net-exec.
245-
// It retries in a loop if the entry is not found.
246-
cmd = fmt.Sprintf("cat /rootfs/proc/net/nf_conntrack "+
247-
"| grep -m 1 'CLOSE_WAIT.*dst=%v.*dport=%v' ",
248-
serverNodeInfo.nodeIP,
249-
testDaemonTCPPort)
250-
if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) {
251-
result, err := framework.RunHostCmd(fr.Namespace.Name, "e2e-net-exec", cmd)
252-
// retry if we can't obtain the conntrack entry
253-
if err != nil {
254-
framework.Logf("failed to obtain conntrack entry: %v %v", result, err)
255-
return false, nil
256-
}
257-
framework.Logf("conntrack entry for node %v and port %v: %v", serverNodeInfo.nodeIP, testDaemonTCPPort, result)
258-
// Timeout in seconds is available as the fifth column of
259-
// the matched entry in /proc/net/nf_conntrack.
260-
line := strings.Fields(result)
261-
if len(line) < 5 {
262-
return false, fmt.Errorf("conntrack entry does not have a timeout field: %v", line)
263-
}
264-
timeoutSeconds, err := strconv.Atoi(line[4])
265-
if err != nil {
266-
return false, fmt.Errorf("failed to convert matched timeout %s to integer: %v", line[4], err)
267-
}
268-
if math.Abs(float64(timeoutSeconds-expectedTimeoutSeconds)) < epsilonSeconds {
269-
return true, nil
270-
}
271-
return false, fmt.Errorf("wrong TCP CLOSE_WAIT timeout: %v expected: %v", timeoutSeconds, expectedTimeoutSeconds)
272-
}); err != nil {
273-
framework.Failf("no conntrack entry for port %d on node %s", testDaemonTCPPort, serverNodeInfo.nodeIP)
274-
}
217+
218+
framework.Logf("conntrack entry timeout was: %v, expected: %v",
219+
timeoutSeconds, expectedTimeoutSeconds)
220+
221+
gomega.Expect(math.Abs(float64(timeoutSeconds - expectedTimeoutSeconds))).Should(
222+
gomega.BeNumerically("<", (epsilonSeconds)))
275223
})
276224

277225
// Regression test for #74839, where:

0 commit comments

Comments
 (0)