|
| 1 | +package updatestatus |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "time" |
| 7 | + |
| 8 | + "gopkg.in/yaml.v3" |
| 9 | + "k8s.io/apimachinery/pkg/api/errors" |
| 10 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 11 | + "k8s.io/apimachinery/pkg/runtime" |
| 12 | + "k8s.io/klog/v2" |
| 13 | + |
| 14 | + configv1 "github.com/openshift/api/config/v1" |
| 15 | + configinformers "github.com/openshift/client-go/config/informers/externalversions" |
| 16 | + configv1listers "github.com/openshift/client-go/config/listers/config/v1" |
| 17 | + "github.com/openshift/library-go/pkg/controller/factory" |
| 18 | + "github.com/openshift/library-go/pkg/operator/events" |
| 19 | + |
| 20 | + "github.com/openshift/cluster-version-operator/lib/resourcemerge" |
| 21 | +) |
| 22 | + |
| 23 | +// controlPlaneInformerController is the controller that monitors health of the control plane-related resources (initially, |
| 24 | +// just ClusterVersion but will need to handle ClusterOperators too) and produces insights for control plane update. |
| 25 | +type controlPlaneInformerController struct { |
| 26 | + clusterVersions configv1listers.ClusterVersionLister |
| 27 | + recorder events.Recorder |
| 28 | + |
| 29 | + // sendInsight should be called to send produced insights to the update status controller |
| 30 | + sendInsight sendInsightFn |
| 31 | + |
| 32 | + // now is a function that returns the current time, used for testing |
| 33 | + now func() metav1.Time |
| 34 | +} |
| 35 | + |
| 36 | +func newControlPlaneInformerController( |
| 37 | + configInformers configinformers.SharedInformerFactory, |
| 38 | + recorder events.Recorder, |
| 39 | + sendInsight sendInsightFn, |
| 40 | +) factory.Controller { |
| 41 | + cpiRecorder := recorder.WithComponentSuffix("control-plane-informer") |
| 42 | + |
| 43 | + c := &controlPlaneInformerController{ |
| 44 | + clusterVersions: configInformers.Config().V1().ClusterVersions().Lister(), |
| 45 | + recorder: cpiRecorder, |
| 46 | + sendInsight: sendInsight, |
| 47 | + |
| 48 | + now: metav1.Now, |
| 49 | + } |
| 50 | + |
| 51 | + cvInformer := configInformers.Config().V1().ClusterVersions().Informer() |
| 52 | + |
| 53 | + controller := factory.New(). |
| 54 | + // call sync on ClusterVersion changes |
| 55 | + WithInformersQueueKeysFunc(configApiQueueKeys, cvInformer). |
| 56 | + WithSync(c.sync). |
| 57 | + ToController("ControlPlaneInformer", c.recorder) |
| 58 | + |
| 59 | + return controller |
| 60 | +} |
| 61 | + |
| 62 | +// sync is called for any controller event. It will assess the state and health of the control plane, indicated by |
| 63 | +// the changed resource (ClusterVersion), produce insights, and send them to the update status controller. Status |
| 64 | +// insights are not stored between calls, so every call produces a fresh insight. This means some fields do not follow |
| 65 | +// conventions, like LastTransitionTime in the Updating condition. Proper continuous insight maintenance will need to |
| 66 | +// be added later (not yet sure whether on consumer or producer side). |
| 67 | +func (c *controlPlaneInformerController) sync(_ context.Context, syncCtx factory.SyncContext) error { |
| 68 | + queueKey := syncCtx.QueueKey() |
| 69 | + |
| 70 | + clusterVersion, err := c.clusterVersions.Get(queueKey) |
| 71 | + if err != nil { |
| 72 | + if errors.IsNotFound(err) { |
| 73 | + // TODO: Handle deletes by deleting the status insight |
| 74 | + return nil |
| 75 | + } |
| 76 | + return err |
| 77 | + } |
| 78 | + |
| 79 | + now := c.now() |
| 80 | + insight := assessClusterVersion(clusterVersion, now) |
| 81 | + msg := makeInsightMsgForClusterVersion(insight, now) |
| 82 | + var msgForLog string |
| 83 | + if klog.V(4).Enabled() { |
| 84 | + msgForLog = fmt.Sprintf(" | msg=%s", string(msg.insight)) |
| 85 | + } |
| 86 | + klog.V(2).Infof("CPI :: Syncing ClusterVersion %s%s", clusterVersion.Name, msgForLog) |
| 87 | + c.sendInsight(msg) |
| 88 | + |
| 89 | + return nil |
| 90 | +} |
| 91 | + |
| 92 | +// makeInsightMsgForClusterVersion creates an informerMsg for the given ClusterVersionStatusInsight. It defines an uid |
| 93 | +// name and serializes the insight as YAML. Serialization is convenient because it prevents any data sharing issues |
| 94 | +// between controllers. |
| 95 | +func makeInsightMsgForClusterVersion(cvInsight *ClusterVersionStatusInsight, acquiredAt metav1.Time) informerMsg { |
| 96 | + uid := fmt.Sprintf("usc-cv-%s", cvInsight.Resource.Name) |
| 97 | + insight := Insight{ |
| 98 | + UID: uid, |
| 99 | + AcquiredAt: acquiredAt, |
| 100 | + InsightUnion: InsightUnion{ |
| 101 | + Type: ClusterVersionStatusInsightType, |
| 102 | + ClusterVersionStatusInsight: cvInsight, |
| 103 | + }, |
| 104 | + } |
| 105 | + // Should handle errors, but ultimately we will have a proper API and won’t need to serialize ourselves |
| 106 | + rawInsight, _ := yaml.Marshal(insight) |
| 107 | + return informerMsg{ |
| 108 | + uid: uid, |
| 109 | + insight: rawInsight, |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +// assessClusterVersion produces a ClusterVersion status insight from the current state of the ClusterVersion resource. |
| 114 | +// It does not take previous status insight into account. Many fields of the status insights (such as completion) cannot |
| 115 | +// be properly calculated without also watching and processing ClusterOperators, so that functionality will need to be |
| 116 | +// added later. |
| 117 | +func assessClusterVersion(cv *configv1.ClusterVersion, now metav1.Time) *ClusterVersionStatusInsight { |
| 118 | + |
| 119 | + var lastHistoryItem *configv1.UpdateHistory |
| 120 | + if len(cv.Status.History) > 0 { |
| 121 | + lastHistoryItem = &cv.Status.History[0] |
| 122 | + } |
| 123 | + cvProgressing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing) |
| 124 | + |
| 125 | + updating, startedAt, completedAt := isControlPlaneUpdating(cvProgressing, lastHistoryItem) |
| 126 | + updating.LastTransitionTime = now |
| 127 | + |
| 128 | + klog.V(2).Infof("CPI :: CV/%s :: Updating=%s Started=%s Completed=%s", cv.Name, updating.Status, startedAt, completedAt) |
| 129 | + |
| 130 | + var assessment ControlPlaneAssessment |
| 131 | + var completion int32 |
| 132 | + switch updating.Status { |
| 133 | + case metav1.ConditionTrue: |
| 134 | + assessment = ControlPlaneAssessmentProgressing |
| 135 | + case metav1.ConditionFalse: |
| 136 | + assessment = ControlPlaneAssessmentCompleted |
| 137 | + completion = 100 |
| 138 | + case metav1.ConditionUnknown: |
| 139 | + assessment = ControlPlaneAssessmentUnknown |
| 140 | + default: |
| 141 | + assessment = ControlPlaneAssessmentUnknown |
| 142 | + } |
| 143 | + |
| 144 | + klog.V(2).Infof("CPI :: CV/%s :: Assessment=%s", cv.Name, assessment) |
| 145 | + |
| 146 | + insight := &ClusterVersionStatusInsight{ |
| 147 | + Resource: ResourceRef{ |
| 148 | + Resource: "clusterversions", |
| 149 | + Group: configv1.GroupName, |
| 150 | + Name: cv.Name, |
| 151 | + }, |
| 152 | + Assessment: assessment, |
| 153 | + Versions: versionsFromHistory(cv.Status.History), |
| 154 | + Completion: completion, |
| 155 | + StartedAt: startedAt, |
| 156 | + Conditions: []metav1.Condition{updating}, |
| 157 | + } |
| 158 | + |
| 159 | + if !completedAt.IsZero() { |
| 160 | + insight.CompletedAt = &completedAt |
| 161 | + } |
| 162 | + |
| 163 | + if est := estimateCompletion(startedAt.Time); !est.IsZero() { |
| 164 | + insight.EstimatedCompletedAt = &metav1.Time{Time: est} |
| 165 | + } |
| 166 | + |
| 167 | + return insight |
| 168 | +} |
| 169 | + |
| 170 | +// estimateCompletion returns a time.Time that is 60 minutes after the given time. Proper estimation needs to be added |
| 171 | +// once the controller starts handling ClusterOperators. |
| 172 | +func estimateCompletion(started time.Time) time.Time { |
| 173 | + return started.Add(60 * time.Minute) |
| 174 | +} |
| 175 | + |
| 176 | +// isControlPlaneUpdating determines whether the control plane is updating based on the ClusterVersion's Progressing |
| 177 | +// condition and the last history item. It returns an updating condition, the time the update started, and the time the |
| 178 | +// update completed. If the updating condition cannot be determined, the condition will have Status=Unknown and the |
| 179 | +// Reason and Message fields will explain why. |
| 180 | +func isControlPlaneUpdating(cvProgressing *configv1.ClusterOperatorStatusCondition, lastHistoryItem *configv1.UpdateHistory) (metav1.Condition, metav1.Time, metav1.Time) { |
| 181 | + updating := metav1.Condition{ |
| 182 | + Type: string(ClusterVersionStatusInsightUpdating), |
| 183 | + } |
| 184 | + |
| 185 | + if cvProgressing == nil { |
| 186 | + setCannotDetermineUpdating(&updating, "No Progressing condition in ClusterVersion") |
| 187 | + return updating, metav1.Time{}, metav1.Time{} |
| 188 | + } |
| 189 | + if lastHistoryItem == nil { |
| 190 | + setCannotDetermineUpdating(&updating, "Empty history in ClusterVersion") |
| 191 | + return updating, metav1.Time{}, metav1.Time{} |
| 192 | + } |
| 193 | + |
| 194 | + updating.Status, updating.Reason, updating.Message = cvProgressingToUpdating(*cvProgressing) |
| 195 | + |
| 196 | + var started metav1.Time |
| 197 | + // Looks like we are updating |
| 198 | + if cvProgressing.Status == configv1.ConditionTrue { |
| 199 | + if lastHistoryItem.State != configv1.PartialUpdate { |
| 200 | + setCannotDetermineUpdating(&updating, "Progressing=True in ClusterVersion but last history item is not Partial") |
| 201 | + } else if lastHistoryItem.CompletionTime != nil { |
| 202 | + setCannotDetermineUpdating(&updating, "Progressing=True in ClusterVersion but last history item has completion time") |
| 203 | + } else { |
| 204 | + started = lastHistoryItem.StartedTime |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | + var completed metav1.Time |
| 209 | + // Looks like we are not updating |
| 210 | + if cvProgressing.Status == configv1.ConditionFalse { |
| 211 | + if lastHistoryItem.State != configv1.CompletedUpdate { |
| 212 | + setCannotDetermineUpdating(&updating, "Progressing=False in ClusterVersion but last history item is not completed") |
| 213 | + } else if lastHistoryItem.CompletionTime == nil { |
| 214 | + setCannotDetermineUpdating(&updating, "Progressing=False in ClusterVersion but not no completion in last history item") |
| 215 | + } else { |
| 216 | + started = lastHistoryItem.StartedTime |
| 217 | + completed = *lastHistoryItem.CompletionTime |
| 218 | + } |
| 219 | + } |
| 220 | + |
| 221 | + return updating, started, completed |
| 222 | +} |
| 223 | + |
| 224 | +func setCannotDetermineUpdating(cond *metav1.Condition, message string) { |
| 225 | + cond.Status = metav1.ConditionUnknown |
| 226 | + cond.Reason = string(ClusterVersionCannotDetermineUpdating) |
| 227 | + cond.Message = message |
| 228 | +} |
| 229 | + |
| 230 | +// cvProgressingToUpdating returns a status, reason and message for the updating condition based on the cvProgressing |
| 231 | +// condition. |
| 232 | +func cvProgressingToUpdating(cvProgressing configv1.ClusterOperatorStatusCondition) (metav1.ConditionStatus, string, string) { |
| 233 | + status := metav1.ConditionStatus(cvProgressing.Status) |
| 234 | + var reason string |
| 235 | + switch status { |
| 236 | + case metav1.ConditionTrue: |
| 237 | + reason = string(ClusterVersionProgressing) |
| 238 | + case metav1.ConditionFalse: |
| 239 | + reason = string(ClusterVersionNotProgressing) |
| 240 | + case metav1.ConditionUnknown: |
| 241 | + reason = string(ClusterVersionCannotDetermineUpdating) |
| 242 | + default: |
| 243 | + reason = string(ClusterVersionCannotDetermineUpdating) |
| 244 | + } |
| 245 | + |
| 246 | + message := fmt.Sprintf("ClusterVersion has Progressing=%s(Reason=%s) | Message='%s'", cvProgressing.Status, cvProgressing.Reason, cvProgressing.Message) |
| 247 | + return status, reason, message |
| 248 | +} |
| 249 | + |
| 250 | +// versionsFromHistory returns a ControlPlaneUpdateVersions struct with the target version and metadata from the given |
| 251 | +// history. |
| 252 | +func versionsFromHistory(history []configv1.UpdateHistory) ControlPlaneUpdateVersions { |
| 253 | + var versions ControlPlaneUpdateVersions |
| 254 | + |
| 255 | + if len(history) == 0 { |
| 256 | + return versions |
| 257 | + } |
| 258 | + |
| 259 | + versions.Target.Version = history[0].Version |
| 260 | + |
| 261 | + if len(history) == 1 { |
| 262 | + versions.Target.Metadata = []VersionMetadata{{Key: InstallationMetadata}} |
| 263 | + } |
| 264 | + if len(history) > 1 { |
| 265 | + versions.Previous.Version = history[1].Version |
| 266 | + if history[1].State == configv1.PartialUpdate { |
| 267 | + versions.Previous.Metadata = []VersionMetadata{{Key: PartialMetadata}} |
| 268 | + } |
| 269 | + } |
| 270 | + return versions |
| 271 | +} |
| 272 | + |
| 273 | +func configApiQueueKeys(object runtime.Object) []string { |
| 274 | + if object == nil { |
| 275 | + return nil |
| 276 | + } |
| 277 | + |
| 278 | + switch o := object.(type) { |
| 279 | + case *configv1.ClusterVersion: |
| 280 | + return []string{o.Name} |
| 281 | + } |
| 282 | + |
| 283 | + klog.Fatalf("USC :: Unknown object type: %T", object) |
| 284 | + return nil |
| 285 | +} |
0 commit comments