Skip to content

Commit c48774b

Browse files
committed
fix: [NPM] [LINUX] iptables-nft segfault at high scale
Signed-off-by: Hunter Gregory <[email protected]>
1 parent 287cce8 commit c48774b

File tree

2 files changed

+154
-53
lines changed

2 files changed

+154
-53
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: azure-npm
5+
namespace: kube-system
6+
labels:
7+
app: azure-npm
8+
addonmanager.kubernetes.io/mode: EnsureExists
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
k8s-app: azure-npm
14+
template:
15+
metadata:
16+
labels:
17+
k8s-app: azure-npm
18+
annotations:
19+
scheduler.alpha.kubernetes.io/critical-pod: ""
20+
azure.npm/scrapeable: ""
21+
spec:
22+
priorityClassName: system-node-critical
23+
tolerations:
24+
- operator: "Exists"
25+
effect: NoExecute
26+
- operator: "Exists"
27+
effect: NoSchedule
28+
- key: CriticalAddonsOnly
29+
operator: Exists
30+
containers:
31+
- name: azure-npm
32+
image: acnpublic.azurecr.io/azure-npm:11-14-2024-test-nft-commands-only
33+
resources:
34+
limits:
35+
cpu: 250m
36+
memory: 400Mi
37+
requests:
38+
cpu: 50m
39+
memory: 300Mi
40+
securityContext:
41+
privileged: false
42+
capabilities:
43+
add:
44+
- NET_ADMIN
45+
readOnlyRootFilesystem: true
46+
env:
47+
- name: HOSTNAME
48+
valueFrom:
49+
fieldRef:
50+
apiVersion: v1
51+
fieldPath: spec.nodeName
52+
- name: NPM_CONFIG
53+
value: /etc/azure-npm/azure-npm.json
54+
volumeMounts:
55+
- name: log
56+
mountPath: /var/log
57+
- name: xtables-lock
58+
mountPath: /run/xtables.lock
59+
- name: protocols
60+
mountPath: /etc/protocols
61+
- name: azure-npm-config
62+
mountPath: /etc/azure-npm
63+
- name: tmp
64+
mountPath: /tmp
65+
hostNetwork: true
66+
hostUsers: false
67+
nodeSelector:
68+
kubernetes.io/os: linux
69+
azure-npm-debug: "true"
70+
volumes:
71+
- name: log
72+
hostPath:
73+
path: /var/log
74+
type: Directory
75+
- name: xtables-lock
76+
hostPath:
77+
path: /run/xtables.lock
78+
type: File
79+
- name: protocols
80+
hostPath:
81+
path: /etc/protocols
82+
type: File
83+
- name: azure-npm-config
84+
configMap:
85+
name: azure-npm-config
86+
- name: tmp
87+
emptyDir: {}
88+
serviceAccountName: azure-npm

npm/util/const.go

Lines changed: 66 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ package util
44

55
import (
66
"bytes"
7-
"fmt"
8-
"strings"
7+
"os"
8+
"time"
99

1010
"github.com/Azure/azure-container-networking/common"
11+
12+
"k8s.io/klog"
13+
utilexec "k8s.io/utils/exec"
1114
)
1215

1316
// kubernetes related constants.
@@ -273,61 +276,42 @@ const (
273276
)
274277

275278
func DetectIptablesVersion(ioShim *common.IOShim) {
276-
cmd := ioShim.Exec.Command(IptablesSaveNft, "-t", "mangle")
277-
278-
output, err := cmd.CombinedOutput()
279-
if err != nil {
280-
fmt.Printf("Error running iptables-nft-save: %s", err)
279+
listChainArgs := []string{"-w", "60", "-t", "mangle", "-n", "-L"}
280+
hintArgs := make([]string, 0, len(listChainArgs)+1)
281+
hintArgs = append(hintArgs, listChainArgs...)
282+
hintArgs = append(hintArgs, "KUBE-IPTABLES-HINT")
283+
canaryArgs := make([]string, 0, len(listChainArgs)+1)
284+
canaryArgs = append(canaryArgs, listChainArgs...)
285+
canaryArgs = append(canaryArgs, "KUBE-KUBELET-CANARY")
286+
287+
cmdHint := ioShim.Exec.Command(IptablesNft, hintArgs...)
288+
cmdCanary := ioShim.Exec.Command(IptablesNft, canaryArgs...)
289+
290+
hintOutput, hintErr := cmdHint.CombinedOutput()
291+
canaryOutput, canaryErr := cmdCanary.CombinedOutput()
292+
if hintErr != nil && canaryErr != nil {
293+
klog.Infof("DEBUGME: iptables-nft hint failed. hintErr: %s. canaryErr: %s", hintErr.Error(), canaryErr.Error())
281294
return
282295
}
283296

284-
if strings.Contains(string(output), "KUBE-IPTABLES-HINT") || strings.Contains(string(output), "KUBE-KUBELET-CANARY") {
285-
Iptables = IptablesNft
286-
IptablesSave = IptablesSaveNft
287-
IptablesRestore = IptablesRestoreNft
288-
} else {
289-
lCmd := ioShim.Exec.Command(IptablesSaveLegacy, "-t", "mangle")
290-
291-
loutput, err := lCmd.CombinedOutput()
292-
if err != nil {
293-
fmt.Printf("Error running iptables-legacy-save: %s", err)
294-
return
295-
}
296-
297-
if strings.Contains(string(loutput), "KUBE-IPTABLES-HINT") || strings.Contains(string(loutput), "KUBE-KUBELET-CANARY") {
298-
Iptables = IptablesLegacy
299-
IptablesSave = IptablesSaveLegacy
300-
IptablesRestore = IptablesRestoreLegacy
301-
} else {
302-
lsavecmd := ioShim.Exec.Command(IptablesSaveNft)
303-
lsaveoutput, err := lsavecmd.CombinedOutput()
304-
if err != nil {
305-
fmt.Printf("Error running iptables-nft-save: %s", err)
306-
return
307-
}
308-
309-
lcount := countLines(lsaveoutput)
310-
311-
savecmd := ioShim.Exec.Command(IptablesSaveLegacy)
312-
saveoutput, err := savecmd.CombinedOutput()
313-
if err != nil {
314-
fmt.Printf("Error running iptables-legacy-save: %s", err)
315-
return
316-
}
317-
318-
count := countLines(saveoutput)
319-
320-
if lcount > count {
321-
Iptables = IptablesLegacy
322-
IptablesSave = IptablesSaveLegacy
323-
IptablesRestore = IptablesRestoreLegacy
324-
} else {
325-
Iptables = IptablesNft
326-
IptablesSave = IptablesSaveNft
327-
IptablesRestore = IptablesRestoreNft
328-
}
329-
}
297+
if hintErr != nil {
298+
klog.Infof("DEBUGME: iptables-nft hint failed: %v", hintErr)
299+
} else if canaryErr != nil {
300+
klog.Infof("DEBUGME: iptables-nft canary failed: %v", canaryErr)
330301
}
302+
303+
klog.Info("DEBUGME: found matches")
304+
305+
klog.Info("DEBUGME: line count (hint): %d", len(bytes.Split(hintOutput, []byte("\n"))))
306+
klog.Info("DEBUGME: line count (canary): %d", len(bytes.Split(canaryOutput, []byte("\n"))))
307+
308+
Iptables = IptablesNft
309+
IptablesSave = IptablesSaveNft
310+
IptablesRestore = IptablesRestoreNft
311+
312+
time.Sleep(30 * time.Second)
313+
klog.Info("DEBUGME: crashing for test image after running iptables-nft commands")
314+
os.Exit(1)
331315
}
332316

333317
func countLines(output []byte) int {
@@ -339,3 +323,32 @@ func countLines(output []byte) int {
339323
}
340324
return count
341325
}
326+
327+
func pipeCommandToGrep(command, grepCommand utilexec.Cmd) (searchResults []byte, gotMatches bool, commandError error) {
328+
pipe, commandError := command.StdoutPipe()
329+
if commandError != nil {
330+
return
331+
}
332+
closePipe := func() { _ = pipe.Close() } // appease go lint
333+
defer closePipe()
334+
335+
grepCommand.SetStdin(pipe)
336+
commandError = command.Start()
337+
if commandError != nil {
338+
return
339+
}
340+
341+
// Without this wait, defunct iptable child process are created
342+
wait := func() { _ = command.Wait() } // appease go lint
343+
defer wait()
344+
345+
output, err := grepCommand.CombinedOutput()
346+
if err != nil {
347+
// grep returns err status 1 if nothing is found
348+
// but the other command's exit status gets propagated through this CombinedOutput, so we might have errors undetected
349+
return
350+
}
351+
searchResults = output
352+
gotMatches = true
353+
return
354+
}

0 commit comments

Comments
 (0)