Skip to content

Commit 140224c

Browse files
committed
e2e: attempt to fix GRPC health-check flakes
Some tests where interleaving ACCEPT and DROP iptables rules while others where just checking if the DROP rule was there. The check would pass if the DROP was in place but not necessaily hit if it had an ACCEPT rule over it. Change all tests to latter mechanism. Signed-off-by: Jaime Caamaño Ruiz <[email protected]>
1 parent 26220ee commit 140224c

File tree

2 files changed

+21
-49
lines changed

2 files changed

+21
-49
lines changed

test/e2e/egressip.go

Lines changed: 13 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -406,18 +406,12 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa
406406
waitForStatus(node, setReady)
407407
}
408408

409-
setNodeReachable := func(iptablesCmd, node string, setReachable bool) {
410-
if !setReachable {
411-
_, err := runCommand("docker", "exec", node, iptablesCmd, "-I", "INPUT", "-p", "tcp", "--dport", "9107", "-j", "DROP")
412-
if err != nil {
413-
framework.Failf("failed to block port 9107 on node: %s, err: %v", node, err)
414-
}
415-
} else {
416-
_, err := runCommand("docker", "exec", node, iptablesCmd, "-I", "INPUT", "-p", "tcp", "--dport", "9107", "-j", "ACCEPT")
417-
if err != nil {
418-
framework.Failf("failed to allow port 9107 on node: %s, err: %v", node, err)
419-
}
409+
setNodeReachable := func(node string, setReachable bool) {
410+
op := "Drop"
411+
if setReachable {
412+
op = "Allow"
420413
}
414+
allowOrDropNodeInputTrafficOnPort(op, node, "tcp", "9107")
421415
}
422416

423417
getSpecificEgressIPStatusItems := func(eipName string) []egressIPStatus {
@@ -687,10 +681,7 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa
687681
// ensure all nodes are ready and reachable
688682
for _, node := range nodes.Items {
689683
setNodeReady(node.Name, true)
690-
setNodeReachable("iptables", node.Name, true)
691-
if isV6 {
692-
setNodeReachable("ip6tables", node.Name, true)
693-
}
684+
setNodeReachable(node.Name, true)
694685
waitForNoTaint(node.Name, "node.kubernetes.io/unreachable")
695686
waitForNoTaint(node.Name, "node.kubernetes.io/not-ready")
696687
}
@@ -730,10 +721,7 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa
730721
// ensure all nodes are ready and reachable
731722
for _, node := range []string{egress1Node.name, egress2Node.name} {
732723
setNodeReady(node, true)
733-
setNodeReachable("iptables", node, true)
734-
if isIPv6TestRun {
735-
setNodeReachable("ip6tables", node, true)
736-
}
724+
setNodeReachable(node, true)
737725
waitForNoTaint(node, "node.kubernetes.io/unreachable")
738726
waitForNoTaint(node, "node.kubernetes.io/not-ready")
739727
}
@@ -1644,10 +1632,7 @@ spec:
16441632
createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel)
16451633

16461634
ginkgo.By(fmt.Sprintf("4. Make egress node: %s unreachable", node1))
1647-
setNodeReachable("iptables", node1, false)
1648-
if isIPv6TestRun {
1649-
setNodeReachable("ip6tables", node1, false)
1650-
}
1635+
setNodeReachable(node1, false)
16511636

16521637
otherNode := egress1Node.name
16531638
if node1 == egress1Node.name {
@@ -1674,10 +1659,7 @@ spec:
16741659
framework.Logf("Skipping API server reachability check because IP family does not equal IP family of the EgressIP")
16751660
}
16761661
ginkgo.By("8, Make node 2 unreachable")
1677-
setNodeReachable("iptables", node2, false)
1678-
if isIPv6TestRun {
1679-
setNodeReachable("ip6tables", node2, false)
1680-
}
1662+
setNodeReachable(node2, false)
16811663

16821664
ginkgo.By("9. Check that egress IP is un-assigned (empty status)")
16831665
verifyEgressIPStatusLengthEquals(0, nil)
@@ -1687,10 +1669,7 @@ spec:
16871669
framework.ExpectNoError(err, "10. Check connectivity from pod to an external \"node\" and verify that the IP is the node IP, failed, err: %v", err)
16881670

16891671
ginkgo.By("11. Make node 1 reachable again")
1690-
setNodeReachable("iptables", node1, true)
1691-
if isIPv6TestRun {
1692-
setNodeReachable("ip6tables", node1, true)
1693-
}
1672+
setNodeReachable(node1, true)
16941673

16951674
ginkgo.By("12. Check that egress IP is assigned to node 1 again")
16961675
statuses = verifyEgressIPStatusLengthEquals(1, func(statuses []egressIPStatus) bool {
@@ -1703,10 +1682,7 @@ spec:
17031682
framework.ExpectNoError(err, "13. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err)
17041683

17051684
ginkgo.By("14. Make node 2 reachable again")
1706-
setNodeReachable("iptables", node2, true)
1707-
if isIPv6TestRun {
1708-
setNodeReachable("ip6tables", node2, true)
1709-
}
1685+
setNodeReachable(node2, true)
17101686

17111687
ginkgo.By("15. Check that egress IP remains assigned to node 1. We should not be moving the egress IP to node 2 if the node 1 works fine, as to reduce cluster entropy - read: changes.")
17121688
statuses = verifyEgressIPStatusLengthEquals(1, func(statuses []egressIPStatus) bool {
@@ -1728,10 +1704,7 @@ spec:
17281704
framework.ExpectNoError(err, "19. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err)
17291705

17301706
ginkgo.By("20. Make node 1 not reachable")
1731-
setNodeReachable("iptables", node1, false)
1732-
if isIPv6TestRun {
1733-
setNodeReachable("ip6tables", node1, false)
1734-
}
1707+
setNodeReachable(node1, false)
17351708

17361709
ginkgo.By("21. Unlabel node 2")
17371710
e2enode.RemoveLabelOffNode(f.ClientSet, node2, "k8s.ovn.org/egress-assignable")
@@ -1746,10 +1719,7 @@ spec:
17461719
verifyEgressIPStatusLengthEquals(0, nil)
17471720

17481721
ginkgo.By("25. Make node 1 reachable again")
1749-
setNodeReachable("iptables", node1, true)
1750-
if isIPv6TestRun {
1751-
setNodeReachable("ip6tables", node1, true)
1752-
}
1722+
setNodeReachable(node1, true)
17531723

17541724
ginkgo.By("26. Check that egress IP is assigned to node 1 again")
17551725
statuses = verifyEgressIPStatusLengthEquals(1, func(statuses []egressIPStatus) bool {

test/e2e/util.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,15 +1163,13 @@ func allowOrDropNodeInputTrafficOnPort(op, nodeName, protocol, port string) {
11631163
}
11641164

11651165
func updateIPTablesRulesForNode(op, nodeName string, ipTablesArgs []string, ipv6 bool) {
1166-
args := []string{"get", "pods", "--selector=app=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", nodeName), "-o", "jsonpath={.items..metadata.name}"}
1167-
ovnKubePodName := e2ekubectl.RunKubectlOrDie(ovnNamespace, args...)
11681166
iptables := "iptables"
11691167
if ipv6 {
11701168
iptables = "ip6tables"
11711169
}
11721170

1173-
args = []string{"exec", ovnKubePodName, "-c", getNodeContainerName(), "--", iptables, "--check"}
1174-
_, err := e2ekubectl.RunKubectl(ovnNamespace, append(args, ipTablesArgs...)...)
1171+
args := []string{"docker", "exec", nodeName, iptables, "-v", "--check"}
1172+
_, err := runCommand(append(args, ipTablesArgs...)...)
11751173
// errors known to be equivalent to not found
11761174
notFound1 := "No chain/target/match by that name"
11771175
notFound2 := "does a matching rule exist in that chain?"
@@ -1186,9 +1184,13 @@ func updateIPTablesRulesForNode(op, nodeName string, ipTablesArgs []string, ipv6
11861184
// rule is already there
11871185
return
11881186
}
1189-
args = []string{"exec", ovnKubePodName, "-c", getNodeContainerName(), "--", iptables, "--" + op}
1187+
1188+
args = []string{"docker", "exec", nodeName, iptables, "--" + op}
11901189
framework.Logf("%s %s rule: %q on node %s", op, iptables, strings.Join(ipTablesArgs, ","), nodeName)
1191-
e2ekubectl.RunKubectlOrDie(ovnNamespace, append(args, ipTablesArgs...)...)
1190+
_, err = runCommand(append(args, ipTablesArgs...)...)
1191+
if err != nil {
1192+
framework.Failf("failed to update %s rule on node %s: %v", iptables, nodeName, err)
1193+
}
11921194
}
11931195

11941196
func randStr(n int) string {

0 commit comments

Comments
 (0)