Skip to content

Commit a26c815

Browse files
committed
upgrade status CLI monitortest: test health check
1 parent 53dc37a commit a26c815

File tree

1 file changed

+269
-0
lines changed

1 file changed

+269
-0
lines changed
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
package admupgradestatus
2+
3+
import (
4+
"errors"
5+
"testing"
6+
"time"
7+
8+
"github.com/google/go-cmp/cmp"
9+
"github.com/google/go-cmp/cmp/cmpopts"
10+
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
11+
)
12+
13+
var healthExampleOutput = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
14+
= Control Plane =
15+
Assessment: Progressing
16+
Target Version: 4.20.0-0.ci-2025-08-13-121604-test-ci-op-njttt0ww-latest (from 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial)
17+
Updating: kube-apiserver
18+
Completion: 6% (2 operators updated, 1 updating, 31 waiting)
19+
Duration: 8m57s (Est. Time Remaining: 1h9m)
20+
Operator Health: 34 Healthy
21+
22+
Updating Cluster Operators
23+
NAME SINCE REASON MESSAGE
24+
kube-apiserver 7m27s NodeInstaller NodeInstallerProgressing: 1 node is at revision 7; 2 nodes are at revision 8
25+
26+
Control Plane Nodes
27+
NAME ASSESSMENT PHASE VERSION EST MESSAGE
28+
ip-10-0-111-19.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
29+
ip-10-0-53-218.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
30+
ip-10-0-99-189.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
31+
32+
= Worker Upgrade =
33+
34+
WORKER POOL ASSESSMENT COMPLETION STATUS
35+
worker Pending 0% (0/3) 3 Available, 0 Progressing, 0 Draining
36+
37+
Worker Pool Nodes: worker
38+
NAME ASSESSMENT PHASE VERSION EST MESSAGE
39+
ip-10-0-0-72.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
40+
ip-10-0-100-255.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
41+
ip-10-0-106-212.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
42+
43+
= Update Health =
44+
SINCE LEVEL IMPACT MESSAGE
45+
8m57s Info None Update is proceeding well`
46+
47+
var healthBadOutput = `= Control Plane =
48+
Assessment: Progressing
49+
Target Version: 4.20.0-0.ci-2025-08-13-121604-test-ci-op-njttt0ww-latest (from 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial)
50+
Completion: 6% (2 operators updated, 1 updating, 31 waiting)
51+
Duration: 8m57s (Est. Time Remaining: 1h9m)
52+
Operator Health: 34 Healthy
53+
54+
Control Plane Nodes
55+
NAME ASSESSMENT PHASE VERSION EST MESSAGE
56+
ip-10-0-111-19.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ?
57+
58+
SOMETHING UNEXPECTED HERE
59+
60+
= Update Health =
61+
SINCE LEVEL IMPACT MESSAGE
62+
8m57s Info None Update is proceeding well`
63+
64+
var healthTableOutput = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
65+
= Control Plane =
66+
Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m)
67+
68+
All control plane nodes successfully updated to 4.16.0-ec.3
69+
70+
= Update Health =
71+
SINCE LEVEL IMPACT MESSAGE
72+
58m18s Error API Availability Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady)
73+
now Warning Update Stalled Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded)`
74+
75+
var healthDetailedOutputSingle = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
76+
= Control Plane =
77+
Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m)
78+
79+
All control plane nodes successfully updated to 4.16.0-ec.3
80+
81+
= Update Health =
82+
Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady)
83+
Since: 58m18s
84+
Level: Error
85+
Impact: API Availability
86+
Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md
87+
Resources:
88+
clusteroperators.config.openshift.io: kube-apiserver
89+
Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`
90+
91+
var healthDetailedOutputMultiple = `
92+
Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
93+
= Control Plane =
94+
Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m)
95+
96+
All control plane nodes successfully updated to 4.16.0-ec.3
97+
98+
= Update Health =
99+
Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady)
100+
Since: 58m18s
101+
Level: Error
102+
Impact: API Availability
103+
Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md
104+
Resources:
105+
clusteroperators.config.openshift.io: kube-apiserver
106+
Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)
107+
108+
Message: Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded)
109+
Since: now
110+
Level: Warning
111+
Impact: Update Stalled
112+
Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md
113+
Resources:
114+
clusterversions.config.openshift.io: version
115+
Description: Cluster operators etcd, kube-apiserver are degraded`
116+
117+
var healthMissingField = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
118+
= Control Plane =
119+
Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m)
120+
121+
All control plane nodes successfully updated to 4.16.0-ec.3
122+
123+
= Update Health =
124+
Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady)
125+
Since: 58m18s
126+
Impact: API Availability
127+
Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md
128+
Resources:
129+
clusteroperators.config.openshift.io: kube-apiserver
130+
Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`
131+
132+
var healthEmptyField = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session
133+
= Control Plane =
134+
Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m)
135+
136+
All control plane nodes successfully updated to 4.16.0-ec.3
137+
138+
= Update Health =
139+
Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady)
140+
Since:
141+
Level: Warning
142+
Impact: API Availability
143+
Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md
144+
Resources:
145+
clusteroperators.config.openshift.io: kube-apiserver
146+
Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`
147+
148+
func TestMonitor_Health(t *testing.T) {
149+
t.Parallel()
150+
151+
testCases := []struct {
152+
name string
153+
snapshots []snapshot
154+
expected *junitapi.JUnitTestCase
155+
}{
156+
{
157+
name: "no snapshots -> test skipped",
158+
expected: &junitapi.JUnitTestCase{
159+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
160+
SkipMessage: &junitapi.SkipMessage{
161+
Message: "Test skipped because no oc adm upgrade status output was successfully collected",
162+
},
163+
},
164+
},
165+
{
166+
name: "good snapshots",
167+
snapshots: []snapshot{
168+
{when: time.Now(), out: healthExampleOutput},
169+
{when: time.Now(), out: healthExampleOutput},
170+
{when: time.Now(), out: healthExampleOutput},
171+
},
172+
expected: &junitapi.JUnitTestCase{
173+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
174+
},
175+
},
176+
{
177+
name: "errored snapshots are skipped",
178+
snapshots: []snapshot{
179+
{when: time.Now(), out: healthExampleOutput},
180+
{when: time.Now(), out: healthBadOutput, err: errors.New("some error")},
181+
{when: time.Now(), out: healthExampleOutput},
182+
},
183+
expected: &junitapi.JUnitTestCase{
184+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
185+
},
186+
},
187+
{
188+
name: "unparseable snapshots are skipped",
189+
snapshots: []snapshot{
190+
{when: time.Now(), out: healthExampleOutput},
191+
{when: time.Now(), out: "unparseable output"},
192+
{when: time.Now(), out: healthExampleOutput},
193+
},
194+
expected: &junitapi.JUnitTestCase{
195+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
196+
},
197+
},
198+
{
199+
name: "multiple table lines",
200+
snapshots: []snapshot{
201+
{when: time.Now(), out: healthTableOutput},
202+
},
203+
expected: &junitapi.JUnitTestCase{
204+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
205+
},
206+
},
207+
{
208+
name: "detailed output single item",
209+
snapshots: []snapshot{
210+
{when: time.Now(), out: healthDetailedOutputSingle},
211+
},
212+
expected: &junitapi.JUnitTestCase{
213+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
214+
},
215+
},
216+
{
217+
name: "detailed output multiple items",
218+
snapshots: []snapshot{
219+
{when: time.Now(), out: healthDetailedOutputMultiple},
220+
},
221+
expected: &junitapi.JUnitTestCase{
222+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
223+
},
224+
},
225+
{
226+
name: "missing item from detailed output",
227+
snapshots: []snapshot{
228+
{when: time.Now(), out: healthMissingField},
229+
},
230+
expected: &junitapi.JUnitTestCase{
231+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
232+
FailureOutput: &junitapi.FailureOutput{
233+
Message: "observed unexpected outputs in oc adm upgrade status health section",
234+
},
235+
},
236+
},
237+
{
238+
name: "empty item from detailed output",
239+
snapshots: []snapshot{
240+
{when: time.Now(), out: healthEmptyField},
241+
},
242+
expected: &junitapi.JUnitTestCase{
243+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent",
244+
FailureOutput: &junitapi.FailureOutput{
245+
Message: "observed unexpected outputs in oc adm upgrade status health section",
246+
},
247+
},
248+
},
249+
}
250+
251+
for _, tc := range testCases {
252+
t.Run(tc.name, func(t *testing.T) {
253+
t.Parallel()
254+
255+
m := NewOcAdmUpgradeStatusChecker().(*monitor)
256+
m.ocAdmUpgradeStatus = append(m.ocAdmUpgradeStatus, tc.snapshots...)
257+
258+
ignoreOutput := cmpopts.IgnoreFields(junitapi.FailureOutput{}, "Output")
259+
260+
// Process snapshots into models for the health check to work with
261+
_ = m.expectedLayout()
262+
263+
result := m.health()
264+
if diff := cmp.Diff(tc.expected, result, ignoreOutput); diff != "" {
265+
t.Errorf("unexpected result (-want +got):\n%s", diff)
266+
}
267+
})
268+
}
269+
}

0 commit comments

Comments
 (0)