Skip to content

Commit cf9b6cf

Browse files
authored
Merge pull request #213 from hchenxa/hchenxa
update the deployment file and use configmap
2 parents 5e56f2b + 16933f8 commit cf9b6cf

File tree

3 files changed

+91
-4
lines changed

3 files changed

+91
-4
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ to another registry.
103103

104104
## Start DaemonSet
105105
* Edit [node-problem-detector.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector.yaml) to fit your environment: Set `log` volume to your system log directory. (Used by SystemLogMonitor). For **kubernetes <1.9** use [node-problem-detector-old.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-old.yaml)
106-
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`
107106
* If needed, you can use [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/)
108-
to overwrite the `config/`.
107+
to overwrite the `config/`, Edit [node-problem-detector-config.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-config.yaml) to fit your environment. and create the ConfigMap with `kubectl create -f node-problem-detector-config.yaml`.
108+
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`.
109109

110110
## Start Standalone
111111
To run node-problem-detector standalone, you should set `inClusterConfig` to `false` and
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
apiVersion: v1
2+
data:
3+
kernel-monitor.json: |
4+
{
5+
"plugin": "journald",
6+
"pluginConfig": {
7+
"source": "kernel"
8+
},
9+
"logPath": "/var/log/journal",
10+
"lookback": "5m",
11+
"bufferSize": 10,
12+
"source": "kernel-monitor",
13+
"conditions": [
14+
{
15+
"type": "KernelDeadlock",
16+
"reason": "KernelHasNoDeadlock",
17+
"message": "kernel has no deadlock"
18+
}
19+
],
20+
"rules": [
21+
{
22+
"type": "temporary",
23+
"reason": "OOMKilling",
24+
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*"
25+
},
26+
{
27+
"type": "temporary",
28+
"reason": "TaskHung",
29+
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
30+
},
31+
{
32+
"type": "temporary",
33+
"reason": "UnregisterNetDevice",
34+
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
35+
},
36+
{
37+
"type": "temporary",
38+
"reason": "KernelOops",
39+
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
40+
},
41+
{
42+
"type": "temporary",
43+
"reason": "KernelOops",
44+
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
45+
},
46+
{
47+
"type": "permanent",
48+
"condition": "KernelDeadlock",
49+
"reason": "AUFSUmountHung",
50+
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
51+
},
52+
{
53+
"type": "permanent",
54+
"condition": "KernelDeadlock",
55+
"reason": "DockerHung",
56+
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
57+
}
58+
]
59+
}
60+
docker-monitor.json: |
61+
{
62+
"plugin": "journald",
63+
"pluginConfig": {
64+
"source": "docker"
65+
},
66+
"logPath": "/var/log/journal",
67+
"lookback": "5m",
68+
"bufferSize": 10,
69+
"source": "docker-monitor",
70+
"conditions": [],
71+
"rules": [
72+
{
73+
"type": "temporary",
74+
"reason": "CorruptDockerImage",
75+
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*"
76+
}
77+
]
78+
}
79+
kind: ConfigMap
80+
metadata:
81+
name: node-problem-detector-config
82+
namespace: default

deployment/node-problem-detector.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ spec:
1616
command:
1717
- /node-problem-detector
1818
- --logtostderr
19-
- --kernel-monitor=/config/kernel-monitor.json
20-
image: k8s.gcr.io/node-problem-detector:v0.2
19+
- --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json
20+
image: k8s.gcr.io/node-problem-detector:v0.5.0
2121
imagePullPolicy: Always
2222
securityContext:
2323
privileged: true
@@ -55,3 +55,8 @@ spec:
5555
- name: config
5656
configMap:
5757
name: node-problem-detector-config
58+
items:
59+
- key: kernel-monitor.json
60+
path: kernel-monitor.json
61+
- key: docker-monitor.json
62+
path: docker-monitor.json

0 commit comments

Comments
 (0)