@@ -21,12 +21,15 @@ import (
21
21
"os"
22
22
"path"
23
23
"strings"
24
+ "time"
24
25
25
26
"k8s.io/node-problem-detector/pkg/util/metrics"
26
27
"k8s.io/node-problem-detector/test/e2e/lib/gce"
27
28
"k8s.io/node-problem-detector/test/e2e/lib/npd"
28
29
29
30
"github.com/onsi/ginkgo"
31
+ "github.com/onsi/ginkgo/config"
32
+ . "github.com/onsi/gomega"
30
33
"github.com/pborman/uuid"
31
34
)
32
35
@@ -57,42 +60,77 @@ var _ = ginkgo.Describe("NPD should export Prometheus metrics.", func() {
57
60
},
58
61
* image ,
59
62
* imageProject )
60
- if err != nil {
61
- ginkgo .Fail (fmt .Sprintf ("Unable to create test instance: %v" , err ))
62
- }
63
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Unable to create test instance: %v" , err ))
63
64
64
65
err = npd .SetupNPD (instance , * npdBuildTar )
65
- if err != nil {
66
- ginkgo .Fail (fmt .Sprintf ("Unable to setup NPD: %v" , err ))
67
- }
66
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Unable to setup NPD: %v" , err ))
68
67
})
69
68
70
69
ginkgo .Context ("On a clean node" , func () {
71
70
72
71
ginkgo .It ("NPD should export host_uptime metric" , func () {
73
72
err := npd .WaitForNPD (instance , []string {"host_uptime" }, 120 )
74
- if err != nil {
75
- ginkgo .Fail (fmt .Sprintf ("Expect NPD to become ready in 120s, but hit error: %v" , err ))
76
- }
73
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Expect NPD to become ready in 120s, but hit error: %v" , err ))
77
74
78
75
gotMetrics , err := npd .FetchNPDMetrics (instance )
79
- if err != nil {
80
- ginkgo .Fail (fmt .Sprintf ("Error fetching NPD metrics: %v" , err ))
81
- }
76
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Error fetching NPD metrics: %v" , err ))
77
+
82
78
_ , err = metrics .GetFloat64Metric (gotMetrics , "host_uptime" , map [string ]string {}, false )
83
- if err != nil {
84
- ginkgo .Fail (fmt .Sprintf ("Failed to find uptime metric: %v.\n Here is all NPD exported metrics: %v" ,
85
- err , gotMetrics ))
86
- }
79
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Failed to find uptime metric: %v.\n Here is all NPD exported metrics: %v" , err , gotMetrics ))
80
+ })
81
+
82
+ ginkgo .It ("NPD should not report any problem" , func () {
83
+ err := npd .WaitForNPD (instance , []string {"problem_gauge" }, 120 )
84
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Expect NPD to become ready in 120s, but hit error: %v" , err ))
85
+
86
+ assertMetricValueInBound (instance ,
87
+ "problem_gauge" , map [string ]string {"reason" : "DockerHung" , "type" : "KernelDeadlock" },
88
+ 0.0 , 0.0 )
89
+ assertMetricValueInBound (instance ,
90
+ "problem_counter" , map [string ]string {"reason" : "DockerHung" },
91
+ 0.0 , 0.0 )
92
+ assertMetricValueInBound (instance ,
93
+ "problem_counter" , map [string ]string {"reason" : "FilesystemIsReadOnly" },
94
+ 0.0 , 0.0 )
95
+ assertMetricValueInBound (instance ,
96
+ "problem_counter" , map [string ]string {"reason" : "KernelOops" },
97
+ 0.0 , 0.0 )
98
+ assertMetricValueInBound (instance ,
99
+ "problem_counter" , map [string ]string {"reason" : "OOMKilling" },
100
+ 0.0 , 0.0 )
101
+ })
102
+ })
103
+
104
+ ginkgo .Context ("When ext4 filesystem error happens" , func () {
105
+
106
+ ginkgo .BeforeEach (func () {
107
+ err := npd .WaitForNPD (instance , []string {"problem_gauge" }, 120 )
108
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Expect NPD to become ready in 120s, but hit error: %v" , err ))
109
+ // This will trigger a ext4 error on the boot disk, causing the boot disk mounted as read-only and systemd-journald crashing.
110
+ instance .RunCommandOrFail ("sudo /home/kubernetes/bin/problem-maker --problem Ext4FilesystemError" )
111
+ })
112
+
113
+ ginkgo .It ("NPD should update problem_counter{reason:Ext4Error} and problem_gauge{type:ReadonlyFilesystem}" , func () {
114
+ time .Sleep (5 * time .Second )
115
+ assertMetricValueInBound (instance ,
116
+ "problem_counter" , map [string ]string {"reason" : "Ext4Error" },
117
+ 1.0 , 2.0 )
118
+ assertMetricValueInBound (instance ,
119
+ "problem_gauge" , map [string ]string {"reason" : "FilesystemIsReadOnly" , "type" : "ReadonlyFilesystem" },
120
+ 1.0 , 1.0 )
121
+ })
122
+
123
+ ginkgo .It ("NPD should remain healthy" , func () {
124
+ npdStates := instance .RunCommandOrFail ("sudo systemctl show node-problem-detector -p ActiveState -p SubState" )
125
+ Expect (npdStates .Stdout ).To (ContainSubstring ("ActiveState=active" ), "NPD is no longer active: %v" , npdStates )
126
+ Expect (npdStates .Stdout ).To (ContainSubstring ("SubState=running" ), "NPD is no longer running: %v" , npdStates )
87
127
})
88
128
})
89
129
90
130
ginkgo .AfterEach (func () {
91
131
defer func () {
92
132
err := instance .DeleteInstance ()
93
- if err != nil {
94
- ginkgo .Fail (fmt .Sprintf ("Failed to clean up the test VM: %v" , err ))
95
- }
133
+ Expect (err ).NotTo (HaveOccurred (), fmt .Sprintf ("Failed to clena up the test VM: %v" , err ))
96
134
}()
97
135
98
136
artifactSubDir := ""
@@ -109,9 +147,20 @@ var _ = ginkgo.Describe("NPD should export Prometheus metrics.", func() {
109
147
}
110
148
}
111
149
112
- errs := npd .SaveTestArtifacts (instance , artifactSubDir )
150
+ errs := npd .SaveTestArtifacts (instance , artifactSubDir , config . GinkgoConfig . ParallelNode )
113
151
if len (errs ) != 0 {
114
152
fmt .Printf ("Error storing debugging data to test artifacts: %v" , errs )
115
153
}
116
154
})
117
155
})
156
+
157
+ func assertMetricValueInBound (instance gce.Instance , metricName string , labels map [string ]string , lowBound float64 , highBound float64 ) {
158
+ value , err := npd .FetchNPDMetric (instance , metricName , labels )
159
+ if err != nil {
160
+ ginkgo .Fail (fmt .Sprintf ("Failed to find %s metric with label %v: %v" , metricName , labels , err ))
161
+ }
162
+ Expect (value ).Should (BeNumerically (">=" , lowBound ),
163
+ "Got value for metric %s with label %v: %v, expect at least %v." , metricName , labels , value , lowBound )
164
+ Expect (value ).Should (BeNumerically ("<=" , highBound ),
165
+ "Got value for metric %s with label %v: %v, expect at most %v." , metricName , labels , value , highBound )
166
+ }
0 commit comments