Skip to content

Commit d874dbf

Browse files
committed
Bump NPD version to v0.7 for GCI
1 parent 07e0cce commit d874dbf

File tree

3 files changed

+30
-5
lines changed

3 files changed

+30
-5
lines changed

cluster/gce/gci/configure-helper.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,13 +1396,17 @@ function start-node-problem-detector {
13961396
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
13971397
# TODO(random-liu): Handle this for alternative container runtime.
13981398
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
1399+
local -r sm_config="${KUBE_HOME}/node-problem-detector/config/systemd-monitor.json"
1400+
local -r ssm_config="${KUBE_HOME}/node-problem-detector/config/system-stats-monitor.json"
1401+
13991402
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json"
1400-
local -r custom_dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
14011403
local -r custom_sm_config="${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json"
1404+
14021405
flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
14031406
flags+=" --logtostderr"
1404-
flags+=" --system-log-monitors=${km_config},${dm_config}"
1405-
flags+=" --custom-plugin-monitors=${custom_km_config},${custom_dm_config},${custom_sm_config}"
1407+
flags+=" --config.system-log-monitor=${km_config},${dm_config},${sm_config}"
1408+
flags+=" --config.system-stats-monitor=${ssm_config}"
1409+
flags+=" --config.custom-plugin-monitor=${custom_km_config},${custom_sm_config}"
14061410
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
14071411
flags+=" --port=${npd_port}"
14081412
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then

cluster/gce/gci/configure.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ set -o pipefail
2626
### Hardcoded constants
2727
DEFAULT_CNI_VERSION="v0.7.5"
2828
DEFAULT_CNI_SHA1="52e9d2de8a5f927307d9397308735658ee44ab8d"
29-
DEFAULT_NPD_VERSION="v0.6.3"
30-
DEFAULT_NPD_SHA1="3a6ac56be6c121f1b94450bfd1a81ad28d532369"
29+
DEFAULT_NPD_VERSION="v0.7.1"
30+
DEFAULT_NPD_SHA1="a9cae965973d586bf5206ad4fe5aae07e6bfd154"
3131
DEFAULT_CRICTL_VERSION="v1.14.0"
3232
DEFAULT_CRICTL_SHA1="1f93c6183d0a4e186708efe7899da7a7bce9c736"
3333
DEFAULT_MOUNTER_TAR_SHA="8003b798cf33c7f91320cd6ee5cec4fa22244571"

test/e2e/node/node_problem_detector.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
131131
gomega.Eventually(func() error {
132132
return verifyEvents(f, eventListOptions, 1, "AUFSUmountHung", node.Name)
133133
}, pollTimeout, pollInterval).Should(gomega.Succeed())
134+
135+
// Node problem detector reports kubelet start events automatically starting from NPD v0.7.0+.
136+
// Since Kubelet may be restarted for a few times after node is booted. We just check the event
137+
// is detected, but do not check how many times Kubelet is started.
138+
ginkgo.By(fmt.Sprintf("Check node-problem-detector posted KubeletStart event on node %q", node.Name))
139+
gomega.Eventually(func() error {
140+
return verifyEventExists(f, eventListOptions, "KubeletStart", node.Name)
141+
}, pollTimeout, pollInterval).Should(gomega.Succeed())
134142
}
135143

136144
ginkgo.By("Gather node-problem-detector cpu and memory stats")
@@ -202,6 +210,19 @@ func verifyEvents(f *framework.Framework, options metav1.ListOptions, num int, r
202210
return nil
203211
}
204212

213+
func verifyEventExists(f *framework.Framework, options metav1.ListOptions, reason, nodeName string) error {
214+
events, err := f.ClientSet.CoreV1().Events(metav1.NamespaceDefault).List(options)
215+
if err != nil {
216+
return err
217+
}
218+
for _, event := range events.Items {
219+
if event.Reason == reason && event.Source.Host == nodeName && event.Count > 0 {
220+
return nil
221+
}
222+
}
223+
return fmt.Errorf("Event %s does not exist: %v", reason, events.Items)
224+
}
225+
205226
func verifyNodeCondition(f *framework.Framework, condition v1.NodeConditionType, status v1.ConditionStatus, reason, nodeName string) error {
206227
node, err := f.ClientSet.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
207228
if err != nil {

0 commit comments

Comments
 (0)