You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
returnfmt.Errorf("could not initialize ocm client: %w", err)
116
107
}
117
108
118
-
cluster, err:=ocmClient.GetClusterInfo(clusterID)
119
-
iferr!=nil {
120
-
ifstrings.Contains(err.Error(), "no cluster found") {
121
-
logging.Warnf("No cluster found with ID '%s'. Exiting.", clusterID)
122
-
returnpdClient.EscalateIncidentWithNote("CAD was unable to find the incident cluster in OCM. An alert for a non-existing cluster is unexpected. Please investigate manually.")
109
+
builder:=&investigation.ResourceBuilderT{}
110
+
deferfunc() {
111
+
iferr!=nil {
112
+
handleCADFailure(err, builder, pdClient)
123
113
}
124
-
returnfmt.Errorf("could not retrieve cluster info for %s: %w", clusterID, err)
125
-
}
126
-
127
-
// From this point on, we normalize to internal ID, as this ID always exists.
128
-
// For installing clusters, externalID can be empty.
129
-
internalClusterID:=cluster.ID()
114
+
}()
130
115
131
-
// re-initialize logger for the internal-cluster-id context
iferr!=nil&&strings.Contains(err.Error(), "no cluster found") {
122
+
logging.Warnf("No cluster found with ID '%s'. Escalating and exiting.", clusterID)
123
+
returnpdClient.EscalateIncidentWithNote("CAD was unable to find the incident cluster in OCM. An alert for a non-existing cluster is unexpected. Please investigate manually.")
returnresult, fmt.Errorf("could not post limited support reason for %s: %w", cluster.Name(), err)
52
+
}
53
+
54
+
returnresult, pdClient.SilenceIncidentWithNote(fmt.Sprintf("Added the following Limited Support reason to cluster: %#v. Silencing alert.\n", ccamLimitedSupport))
55
+
casecmv1.ClusterStateUninstalling:
56
+
// A cluster in uninstalling state should not alert primary - we just skip this
57
+
returnresult, pdClient.SilenceIncidentWithNote(fmt.Sprintf("Skipped adding limited support reason '%s': cluster is already uninstalling.", ccamLimitedSupport.Summary))
58
+
default:
59
+
// Anything else is an unknown state to us and/or requires investigation.
60
+
// E.g. we land here if we run into a CPD alert where credentials were removed (installing state) and don't want to put it in LS yet.
61
+
returnresult, pdClient.EscalateIncidentWithNote(fmt.Sprintf("Cluster has invalid cloud credentials (support role/policy is missing) and the cluster is in state '%s'. Please investigate.", cluster.State()))
54
62
}
55
-
56
-
returnresult, pdClient.SilenceIncidentWithNote(fmt.Sprintf("Added the following Limited Support reason to cluster: %#v. Silencing alert.\n", ccamLimitedSupport))
57
-
casecmv1.ClusterStateUninstalling:
58
-
// A cluster in uninstalling state should not alert primary - we just skip this
59
-
returnresult, pdClient.SilenceIncidentWithNote(fmt.Sprintf("Skipped adding limited support reason '%s': cluster is already uninstalling.", ccamLimitedSupport.Summary))
60
-
default:
61
-
// Anything else is an unknown state to us and/or requires investigation.
62
-
// E.g. we land here if we run into a CPD alert where credentials were removed (installing state) and don't want to put it in LS yet.
63
-
returnresult, pdClient.EscalateIncidentWithNote(fmt.Sprintf("Cluster has invalid cloud credentials (support role/policy is missing) and the cluster is in state '%s'. Please investigate.", cluster.State()))
0 commit comments