@@ -2,13 +2,16 @@ package updatestatus
2
2
3
3
import (
4
4
"context"
5
+ "errors"
5
6
"fmt"
7
+ "strings"
6
8
"time"
7
9
8
10
"gopkg.in/yaml.v3"
9
- "k8s.io/apimachinery/pkg/api/errors"
11
+ kerrors "k8s.io/apimachinery/pkg/api/errors"
10
12
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11
13
"k8s.io/apimachinery/pkg/runtime"
14
+ appsv1client "k8s.io/client-go/kubernetes/typed/apps/v1"
12
15
"k8s.io/klog/v2"
13
16
14
17
configv1 "github.com/openshift/api/config/v1"
@@ -20,75 +23,272 @@ import (
20
23
"github.com/openshift/cluster-version-operator/lib/resourcemerge"
21
24
)
22
25
23
- // controlPlaneInformerController is the controller that monitors health of the control plane-related resources (initially,
24
- // just ClusterVersion but will need to handle ClusterOperators too) and produces insights for control plane update.
26
+ // controlPlaneInformerController is the controller that monitors health of the control plane-related resources
27
+ // and produces insights for control plane update.
25
28
type controlPlaneInformerController struct {
26
- clusterVersions configv1listers.ClusterVersionLister
27
- recorder events.Recorder
29
+ clusterVersions configv1listers.ClusterVersionLister
30
+ clusterOperators configv1listers.ClusterOperatorLister
31
+ recorder events.Recorder
28
32
29
33
// sendInsight should be called to send produced insights to the update status controller
30
34
sendInsight sendInsightFn
31
35
36
+ appsClient appsv1client.AppsV1Interface
37
+
32
38
// now is a function that returns the current time, used for testing
33
39
now func () metav1.Time
34
40
}
35
41
36
42
func newControlPlaneInformerController (
43
+ appsClient appsv1client.AppsV1Interface ,
37
44
configInformers configinformers.SharedInformerFactory ,
38
45
recorder events.Recorder ,
39
46
sendInsight sendInsightFn ,
40
47
) factory.Controller {
41
48
cpiRecorder := recorder .WithComponentSuffix ("control-plane-informer" )
42
49
43
50
c := & controlPlaneInformerController {
44
- clusterVersions : configInformers .Config ().V1 ().ClusterVersions ().Lister (),
45
- recorder : cpiRecorder ,
46
- sendInsight : sendInsight ,
51
+ clusterVersions : configInformers .Config ().V1 ().ClusterVersions ().Lister (),
52
+ clusterOperators : configInformers .Config ().V1 ().ClusterOperators ().Lister (),
53
+ recorder : cpiRecorder ,
54
+ sendInsight : sendInsight ,
55
+ appsClient : appsClient ,
47
56
48
57
now : metav1 .Now ,
49
58
}
50
59
51
60
cvInformer := configInformers .Config ().V1 ().ClusterVersions ().Informer ()
61
+ coInformer := configInformers .Config ().V1 ().ClusterOperators ().Informer ()
52
62
53
63
controller := factory .New ().
54
64
// call sync on ClusterVersion changes
55
65
WithInformersQueueKeysFunc (configApiQueueKeys , cvInformer ).
66
+ // call sync on ClusterOperator changes with a filter
67
+ WithFilteredEventsInformersQueueKeysFunc (configApiQueueKeys , clusterOperatorEventFilterFunc , coInformer ).
56
68
WithSync (c .sync ).
57
69
ToController ("ControlPlaneInformer" , c .recorder )
58
70
59
71
return controller
60
72
}
61
73
74
+ func clusterOperatorEventFilterFunc (obj interface {}) bool {
75
+ co , ok := obj .(* configv1.ClusterOperator )
76
+ if ok {
77
+ for annotation := range co .Annotations {
78
+ if strings .HasPrefix (annotation , "exclude.release.openshift.io/" ) ||
79
+ strings .HasPrefix (annotation , "include.release.openshift.io/" ) {
80
+ return true
81
+ }
82
+ }
83
+ }
84
+ return false
85
+ }
86
+
87
+ const (
88
+ clusterVersionKindName = "ClusterVersion"
89
+ clusterOperatorKindName = "ClusterOperator"
90
+ )
91
+
62
92
// sync is called for any controller event. It will assess the state and health of the control plane, indicated by
63
93
// the changed resource (ClusterVersion), produce insights, and send them to the update status controller. Status
64
94
// insights are not stored between calls, so every call produces a fresh insight. This means some fields do not follow
65
95
// conventions, like LastTransitionTime in the Updating condition. Proper continuous insight maintenance will need to
66
96
// be added later (not yet sure whether on consumer or producer side).
67
- func (c * controlPlaneInformerController ) sync (_ context.Context , syncCtx factory.SyncContext ) error {
97
+ func (c * controlPlaneInformerController ) sync (ctx context.Context , syncCtx factory.SyncContext ) error {
68
98
queueKey := syncCtx .QueueKey ()
69
99
70
- clusterVersion , err := c . clusterVersions . Get (queueKey )
100
+ t , name , err := parseQueueKey (queueKey )
71
101
if err != nil {
72
- if errors .IsNotFound (err ) {
73
- // TODO: Handle deletes by deleting the status insight
74
- return nil
75
- }
76
- return err
102
+ return fmt .Errorf ("failed to parse queue key: %w" , err )
77
103
}
78
104
79
- now := c .now ()
80
- insight := assessClusterVersion (clusterVersion , now )
81
- msg := makeInsightMsgForClusterVersion (insight , now )
105
+ var msg informerMsg
106
+ switch t {
107
+ case clusterVersionKindName :
108
+ clusterVersion , err := c .clusterVersions .Get (name )
109
+ if err != nil {
110
+ if kerrors .IsNotFound (err ) {
111
+ // TODO: Handle deletes by deleting the status insight
112
+ return nil
113
+ }
114
+ return err
115
+ }
116
+
117
+ now := c .now ()
118
+ insight := assessClusterVersion (clusterVersion , now )
119
+ msg = makeInsightMsgForClusterVersion (insight , now )
120
+
121
+ case clusterOperatorKindName :
122
+ clusterVersion , err := c .clusterVersions .Get ("version" )
123
+ if err != nil {
124
+ return err
125
+ }
126
+ targetVersion := clusterVersion .Status .Desired .Version
127
+
128
+ clusterOperator , err := c .clusterOperators .Get (name )
129
+ if err != nil {
130
+ if kerrors .IsNotFound (err ) {
131
+ // TODO: Handle deletes by deleting the status insight
132
+ return nil
133
+ }
134
+ return err
135
+ }
136
+
137
+ now := c .now ()
138
+ insight , err := assessClusterOperator (ctx , clusterOperator , targetVersion , c .appsClient , now )
139
+ if err != nil {
140
+ return fmt .Errorf ("failed to assess cluster operator %s: %w" , name , err )
141
+ }
142
+ msg = makeInsightMsgForClusterOperator (insight , now )
143
+ default :
144
+ return fmt .Errorf ("invalid queue key %s with unexpected type %s" , queueKey , t )
145
+ }
82
146
var msgForLog string
83
147
if klog .V (4 ).Enabled () {
84
148
msgForLog = fmt .Sprintf (" | msg=%s" , string (msg .insight ))
85
149
}
86
- klog .V (2 ).Infof ("CPI :: Syncing ClusterVersion %s%s" , clusterVersion . Name , msgForLog )
150
+ klog .V (2 ).Infof ("CPI :: Syncing %s %s%s" , t , name , msgForLog )
87
151
c .sendInsight (msg )
88
152
89
153
return nil
90
154
}
91
155
156
+ func makeInsightMsgForClusterOperator (coInsight * ClusterOperatorStatusInsight , acquiredAt metav1.Time ) informerMsg {
157
+ uid := fmt .Sprintf ("usc-co-%s" , coInsight .Name )
158
+ insight := Insight {
159
+ UID : uid ,
160
+ AcquiredAt : acquiredAt ,
161
+ InsightUnion : InsightUnion {
162
+ Type : ClusterOperatorStatusInsightType ,
163
+ ClusterOperatorStatusInsight : coInsight ,
164
+ },
165
+ }
166
+ // Should handle errors, but ultimately we will have a proper API and won’t need to serialize ourselves
167
+ rawInsight , _ := yaml .Marshal (insight )
168
+ return informerMsg {
169
+ uid : uid ,
170
+ insight : rawInsight ,
171
+ }
172
+ }
173
+
174
+ func assessClusterOperator (ctx context.Context , operator * configv1.ClusterOperator , targetVersion string , appsClient appsv1client.AppsV1Interface , now metav1.Time ) (* ClusterOperatorStatusInsight , error ) {
175
+ updating := metav1.Condition {
176
+ Type : string (ClusterOperatorStatusInsightUpdating ),
177
+ Status : metav1 .ConditionUnknown ,
178
+ Reason : string (ClusterOperatorUpdatingCannotDetermine ),
179
+ LastTransitionTime : now ,
180
+ }
181
+
182
+ imagePullSpec , err := getImagePullSpec (ctx , operator .Name , appsClient )
183
+ if err != nil && ! errors .Is (err , operatorImageNotImplemented ) {
184
+ return nil , err
185
+ }
186
+
187
+ noOperatorImageVersion := true
188
+ var operatorImageUpdated , versionUpdated bool
189
+ for _ , version := range operator .Status .Versions {
190
+ if version .Name == "operator-image" {
191
+ noOperatorImageVersion = false
192
+ if imagePullSpec != "" && imagePullSpec == version .Version {
193
+ operatorImageUpdated = true
194
+ }
195
+ }
196
+ if version .Name == "operator" && version .Version == targetVersion {
197
+ versionUpdated = true
198
+ }
199
+ }
200
+
201
+ // "operator-image" might not be implemented by every cluster operator
202
+ updated := (noOperatorImageVersion || operatorImageUpdated ) && versionUpdated
203
+ if updated {
204
+ updating .Status = metav1 .ConditionFalse
205
+ updating .Reason = string (ClusterOperatorUpdatingReasonUpdated )
206
+ }
207
+
208
+ var available * configv1.ClusterOperatorStatusCondition
209
+ var degraded * configv1.ClusterOperatorStatusCondition
210
+ var progressing * configv1.ClusterOperatorStatusCondition
211
+
212
+ for _ , condition := range operator .Status .Conditions {
213
+ condition := condition
214
+ switch {
215
+ case condition .Type == configv1 .OperatorAvailable :
216
+ available = & condition
217
+ case condition .Type == configv1 .OperatorDegraded :
218
+ degraded = & condition
219
+ case condition .Type == configv1 .OperatorProgressing :
220
+ progressing = & condition
221
+ }
222
+ }
223
+
224
+ if ! updated && progressing != nil {
225
+ if progressing .Status == configv1 .ConditionTrue {
226
+ updating .Status = metav1 .ConditionTrue
227
+ updating .Reason = string (ClusterOperatorUpdatingReasonProgressing )
228
+ updating .Message = progressing .Message
229
+ }
230
+ if progressing .Status == configv1 .ConditionFalse {
231
+ updating .Status = metav1 .ConditionFalse
232
+ updating .Reason = string (ClusterOperatorUpdatingReasonPending )
233
+ updating .Message = progressing .Message
234
+ }
235
+ }
236
+
237
+ health := metav1.Condition {
238
+ Type : string (ClusterOperatorStatusInsightHealthy ),
239
+ Status : metav1 .ConditionTrue ,
240
+ Reason : string (ClusterOperatorHealthyReasonAsExpected ),
241
+ LastTransitionTime : now ,
242
+ }
243
+
244
+ if available == nil {
245
+ health .Status = metav1 .ConditionUnknown
246
+ health .Reason = string (ClusterOperatorHealthyReasonUnavailable )
247
+ health .Message = "The cluster operator is unavailable because the available condition is not found in the cluster operator's status"
248
+ } else if available .Status != configv1 .ConditionTrue {
249
+ health .Status = metav1 .ConditionFalse
250
+ health .Reason = string (ClusterOperatorHealthyReasonUnavailable )
251
+ health .Message = available .Message
252
+ } else if degraded != nil && degraded .Status == configv1 .ConditionTrue {
253
+ health .Status = metav1 .ConditionFalse
254
+ health .Reason = string (ClusterOperatorHealthyReasonDegraded )
255
+ health .Message = degraded .Message
256
+ }
257
+
258
+ return & ClusterOperatorStatusInsight {
259
+ Name : operator .Name ,
260
+ Resource : ResourceRef {
261
+ Resource : "clusteroperators" ,
262
+ Group : configv1 .GroupName ,
263
+ Name : operator .Name ,
264
+ },
265
+ Conditions : []metav1.Condition {updating , health },
266
+ }, nil
267
+ }
268
+
269
+ var operatorImageNotImplemented = errors .New ("operator-image not implemented in the versions from cluster operator's status" )
270
+
271
+ func getImagePullSpec (ctx context.Context , name string , appsClient appsv1client.AppsV1Interface ) (string , error ) {
272
+ // It is known that the image pull spec for co/machine-config can be accessed from the deployment
273
+ if name == "machine-config" {
274
+ if appsClient == nil {
275
+ return "" , errors .New ("apps client is nil" )
276
+ }
277
+ mcoDeployment , err := appsClient .Deployments ("openshift-machine-config-operator" ).Get (ctx , "machine-config-operator" , metav1.GetOptions {})
278
+ if err != nil {
279
+ return "" , err
280
+ }
281
+ for _ , c := range mcoDeployment .Spec .Template .Spec .Containers {
282
+ if c .Name == "machine-config-operator" {
283
+ return c .Image , nil
284
+ }
285
+ }
286
+ return "" , errors .New ("machine-config-operator container not found" )
287
+ }
288
+ // We may add here retrieval of the image pull spec for other COs when they implement "operator-image" in the status.versions
289
+ return "" , operatorImageNotImplemented
290
+ }
291
+
92
292
// makeInsightMsgForClusterVersion creates an informerMsg for the given ClusterVersionStatusInsight. It defines an uid
93
293
// name and serializes the insight as YAML. Serialization is convenient because it prevents any data sharing issues
94
294
// between controllers.
@@ -270,16 +470,27 @@ func versionsFromHistory(history []configv1.UpdateHistory) ControlPlaneUpdateVer
270
470
return versions
271
471
}
272
472
473
+ func parseQueueKey (queueKey string ) (string , string , error ) {
474
+ splits := strings .Split (queueKey , "/" )
475
+ if len (splits ) != 2 {
476
+ return "" , "" , fmt .Errorf ("invalid queue key: %s" , queueKey )
477
+ }
478
+ return splits [0 ], splits [1 ], nil
479
+ }
480
+
273
481
func configApiQueueKeys (object runtime.Object ) []string {
274
482
if object == nil {
275
483
return nil
276
484
}
277
485
278
486
switch o := object .(type ) {
279
487
case * configv1.ClusterVersion :
280
- return []string {o .Name }
488
+ return []string {fmt .Sprintf ("%s/%s" , clusterVersionKindName , o .Name )}
489
+ case * configv1.ClusterOperator :
490
+ return []string {fmt .Sprintf ("%s/%s" , clusterOperatorKindName , o .Name )}
281
491
}
282
492
283
- klog .Fatalf ("USC :: Unknown object type: %T" , object )
284
- return nil
493
+ msg := fmt .Sprintf ("USC :: Unknown object type: %T" , object )
494
+ klog .Error (msg )
495
+ panic (msg )
285
496
}
0 commit comments