|
| 1 | +package admupgradestatus |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "testing" |
| 6 | + "time" |
| 7 | + |
| 8 | + "github.com/google/go-cmp/cmp" |
| 9 | + "github.com/google/go-cmp/cmp/cmpopts" |
| 10 | + "github.com/openshift/origin/pkg/test/ginkgo/junitapi" |
| 11 | +) |
| 12 | + |
| 13 | +var healthExampleOutput = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 14 | += Control Plane = |
| 15 | +Assessment: Progressing |
| 16 | +Target Version: 4.20.0-0.ci-2025-08-13-121604-test-ci-op-njttt0ww-latest (from 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial) |
| 17 | +Updating: kube-apiserver |
| 18 | +Completion: 6% (2 operators updated, 1 updating, 31 waiting) |
| 19 | +Duration: 8m57s (Est. Time Remaining: 1h9m) |
| 20 | +Operator Health: 34 Healthy |
| 21 | +
|
| 22 | +Updating Cluster Operators |
| 23 | +NAME SINCE REASON MESSAGE |
| 24 | +kube-apiserver 7m27s NodeInstaller NodeInstallerProgressing: 1 node is at revision 7; 2 nodes are at revision 8 |
| 25 | +
|
| 26 | +Control Plane Nodes |
| 27 | +NAME ASSESSMENT PHASE VERSION EST MESSAGE |
| 28 | +ip-10-0-111-19.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 29 | +ip-10-0-53-218.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 30 | +ip-10-0-99-189.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 31 | +
|
| 32 | += Worker Upgrade = |
| 33 | +
|
| 34 | +WORKER POOL ASSESSMENT COMPLETION STATUS |
| 35 | +worker Pending 0% (0/3) 3 Available, 0 Progressing, 0 Draining |
| 36 | +
|
| 37 | +Worker Pool Nodes: worker |
| 38 | +NAME ASSESSMENT PHASE VERSION EST MESSAGE |
| 39 | +ip-10-0-0-72.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 40 | +ip-10-0-100-255.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 41 | +ip-10-0-106-212.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 42 | +
|
| 43 | += Update Health = |
| 44 | +SINCE LEVEL IMPACT MESSAGE |
| 45 | +8m57s Info None Update is proceeding well` |
| 46 | + |
| 47 | +var healthBadOutput = `= Control Plane = |
| 48 | +Assessment: Progressing |
| 49 | +Target Version: 4.20.0-0.ci-2025-08-13-121604-test-ci-op-njttt0ww-latest (from 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial) |
| 50 | +Completion: 6% (2 operators updated, 1 updating, 31 waiting) |
| 51 | +Duration: 8m57s (Est. Time Remaining: 1h9m) |
| 52 | +Operator Health: 34 Healthy |
| 53 | +
|
| 54 | +Control Plane Nodes |
| 55 | +NAME ASSESSMENT PHASE VERSION EST MESSAGE |
| 56 | +ip-10-0-111-19.us-west-1.compute.internal Outdated Pending 4.20.0-0.ci-2025-08-13-114210-test-ci-op-njttt0ww-initial ? |
| 57 | +
|
| 58 | +SOMETHING UNEXPECTED HERE |
| 59 | +
|
| 60 | += Update Health = |
| 61 | +SINCE LEVEL IMPACT MESSAGE |
| 62 | +8m57s Info None Update is proceeding well` |
| 63 | + |
| 64 | +var healthTableOutput = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 65 | += Control Plane = |
| 66 | +Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m) |
| 67 | +
|
| 68 | +All control plane nodes successfully updated to 4.16.0-ec.3 |
| 69 | +
|
| 70 | += Update Health = |
| 71 | +SINCE LEVEL IMPACT MESSAGE |
| 72 | +58m18s Error API Availability Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) |
| 73 | +now Warning Update Stalled Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded)` |
| 74 | + |
| 75 | +var healthDetailedOutputSingle = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 76 | += Control Plane = |
| 77 | +Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m) |
| 78 | +
|
| 79 | +All control plane nodes successfully updated to 4.16.0-ec.3 |
| 80 | +
|
| 81 | += Update Health = |
| 82 | +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) |
| 83 | + Since: 58m18s |
| 84 | + Level: Error |
| 85 | + Impact: API Availability |
| 86 | + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md |
| 87 | + Resources: |
| 88 | + clusteroperators.config.openshift.io: kube-apiserver |
| 89 | + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)` |
| 90 | + |
| 91 | +var healthDetailedOutputMultiple = ` |
| 92 | +Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 93 | += Control Plane = |
| 94 | +Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m) |
| 95 | +
|
| 96 | +All control plane nodes successfully updated to 4.16.0-ec.3 |
| 97 | +
|
| 98 | += Update Health = |
| 99 | +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) |
| 100 | + Since: 58m18s |
| 101 | + Level: Error |
| 102 | + Impact: API Availability |
| 103 | + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md |
| 104 | + Resources: |
| 105 | + clusteroperators.config.openshift.io: kube-apiserver |
| 106 | + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?) |
| 107 | +
|
| 108 | +Message: Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded) |
| 109 | + Since: now |
| 110 | + Level: Warning |
| 111 | + Impact: Update Stalled |
| 112 | + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md |
| 113 | + Resources: |
| 114 | + clusterversions.config.openshift.io: version |
| 115 | + Description: Cluster operators etcd, kube-apiserver are degraded` |
| 116 | + |
| 117 | +var healthMissingField = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 118 | += Control Plane = |
| 119 | +Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m) |
| 120 | +
|
| 121 | +All control plane nodes successfully updated to 4.16.0-ec.3 |
| 122 | +
|
| 123 | += Update Health = |
| 124 | +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) |
| 125 | + Since: 58m18s |
| 126 | + Impact: API Availability |
| 127 | + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md |
| 128 | + Resources: |
| 129 | + clusteroperators.config.openshift.io: kube-apiserver |
| 130 | + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)` |
| 131 | + |
| 132 | +var healthEmptyField = `Unable to fetch alerts, ignoring alerts in 'Update Health': failed to get alerts from Thanos: no token is currently in use for this session |
| 133 | += Control Plane = |
| 134 | +Update to 4.16.0-ec.3 successfully completed at 2024-02-27T15:42:58Z (duration: 3h31m) |
| 135 | +
|
| 136 | +All control plane nodes successfully updated to 4.16.0-ec.3 |
| 137 | +
|
| 138 | += Update Health = |
| 139 | +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) |
| 140 | + Since: |
| 141 | + Level: Warning |
| 142 | + Impact: API Availability |
| 143 | + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md |
| 144 | + Resources: |
| 145 | + clusteroperators.config.openshift.io: kube-apiserver |
| 146 | + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)` |
| 147 | + |
| 148 | +func TestMonitor_Health(t *testing.T) { |
| 149 | + t.Parallel() |
| 150 | + |
| 151 | + testCases := []struct { |
| 152 | + name string |
| 153 | + snapshots []snapshot |
| 154 | + expected *junitapi.JUnitTestCase |
| 155 | + }{ |
| 156 | + { |
| 157 | + name: "no snapshots -> test skipped", |
| 158 | + expected: &junitapi.JUnitTestCase{ |
| 159 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 160 | + SkipMessage: &junitapi.SkipMessage{ |
| 161 | + Message: "Test skipped because no oc adm upgrade status output was successfully collected", |
| 162 | + }, |
| 163 | + }, |
| 164 | + }, |
| 165 | + { |
| 166 | + name: "good snapshots", |
| 167 | + snapshots: []snapshot{ |
| 168 | + {when: time.Now(), out: healthExampleOutput}, |
| 169 | + {when: time.Now(), out: healthExampleOutput}, |
| 170 | + {when: time.Now(), out: healthExampleOutput}, |
| 171 | + }, |
| 172 | + expected: &junitapi.JUnitTestCase{ |
| 173 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 174 | + }, |
| 175 | + }, |
| 176 | + { |
| 177 | + name: "errored snapshots are skipped", |
| 178 | + snapshots: []snapshot{ |
| 179 | + {when: time.Now(), out: healthExampleOutput}, |
| 180 | + {when: time.Now(), out: healthBadOutput, err: errors.New("some error")}, |
| 181 | + {when: time.Now(), out: healthExampleOutput}, |
| 182 | + }, |
| 183 | + expected: &junitapi.JUnitTestCase{ |
| 184 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 185 | + }, |
| 186 | + }, |
| 187 | + { |
| 188 | + name: "unparseable snapshots are skipped", |
| 189 | + snapshots: []snapshot{ |
| 190 | + {when: time.Now(), out: healthExampleOutput}, |
| 191 | + {when: time.Now(), out: "unparseable output"}, |
| 192 | + {when: time.Now(), out: healthExampleOutput}, |
| 193 | + }, |
| 194 | + expected: &junitapi.JUnitTestCase{ |
| 195 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 196 | + }, |
| 197 | + }, |
| 198 | + { |
| 199 | + name: "multiple table lines", |
| 200 | + snapshots: []snapshot{ |
| 201 | + {when: time.Now(), out: healthTableOutput}, |
| 202 | + }, |
| 203 | + expected: &junitapi.JUnitTestCase{ |
| 204 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 205 | + }, |
| 206 | + }, |
| 207 | + { |
| 208 | + name: "detailed output single item", |
| 209 | + snapshots: []snapshot{ |
| 210 | + {when: time.Now(), out: healthDetailedOutputSingle}, |
| 211 | + }, |
| 212 | + expected: &junitapi.JUnitTestCase{ |
| 213 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 214 | + }, |
| 215 | + }, |
| 216 | + { |
| 217 | + name: "detailed output multiple items", |
| 218 | + snapshots: []snapshot{ |
| 219 | + {when: time.Now(), out: healthDetailedOutputMultiple}, |
| 220 | + }, |
| 221 | + expected: &junitapi.JUnitTestCase{ |
| 222 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 223 | + }, |
| 224 | + }, |
| 225 | + { |
| 226 | + name: "missing item from detailed output", |
| 227 | + snapshots: []snapshot{ |
| 228 | + {when: time.Now(), out: healthMissingField}, |
| 229 | + }, |
| 230 | + expected: &junitapi.JUnitTestCase{ |
| 231 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 232 | + FailureOutput: &junitapi.FailureOutput{ |
| 233 | + Message: "observed unexpected outputs in oc adm upgrade status health section", |
| 234 | + }, |
| 235 | + }, |
| 236 | + }, |
| 237 | + { |
| 238 | + name: "empty item from detailed output", |
| 239 | + snapshots: []snapshot{ |
| 240 | + {when: time.Now(), out: healthEmptyField}, |
| 241 | + }, |
| 242 | + expected: &junitapi.JUnitTestCase{ |
| 243 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", |
| 244 | + FailureOutput: &junitapi.FailureOutput{ |
| 245 | + Message: "observed unexpected outputs in oc adm upgrade status health section", |
| 246 | + }, |
| 247 | + }, |
| 248 | + }, |
| 249 | + } |
| 250 | + |
| 251 | + for _, tc := range testCases { |
| 252 | + t.Run(tc.name, func(t *testing.T) { |
| 253 | + t.Parallel() |
| 254 | + |
| 255 | + m := NewOcAdmUpgradeStatusChecker().(*monitor) |
| 256 | + m.ocAdmUpgradeStatus = append(m.ocAdmUpgradeStatus, tc.snapshots...) |
| 257 | + |
| 258 | + ignoreOutput := cmpopts.IgnoreFields(junitapi.FailureOutput{}, "Output") |
| 259 | + |
| 260 | + // Process snapshots into models for the health check to work with |
| 261 | + _ = m.expectedLayout() |
| 262 | + |
| 263 | + result := m.health() |
| 264 | + if diff := cmp.Diff(tc.expected, result, ignoreOutput); diff != "" { |
| 265 | + t.Errorf("unexpected result (-want +got):\n%s", diff) |
| 266 | + } |
| 267 | + }) |
| 268 | + } |
| 269 | +} |
0 commit comments