Skip to content

Commit 94a626d

Browse files
committed
OTA-1269: Add scaffolding of the Update Status Controller
As agreed in OTA-1269, the controller uses ConfigMap as a pseudo-api until we have a real API committed to openshift/api. The Update Status Controller is a new Cobra subcommand of the CVO binary. The new command launches two controllers for now: 1. **Control Plane Informer** (CPI) monitors ClusterVersion resources (for now) and produces a respective ClusterVersion status insights. The functionality is limited, because part of the logic necessary to populate all fields on a CV insight requires watching also ClusterOperator monitoring and processing, which is not present yet. 1. **Update Status Controller** (USC) receives insights from the CPI and maintains the ConfigMap that mimicks the eventual Status API. Squashed review-feedback commits: USC: Treat version specifically in USC library-go code wants a different notion of version (k8s `version.Info` struct) than CVO uses. To avoid messing with CVO at this stage of USC, make USC use a minimal necessary version code, isolated from existing CVO. Mark this as tech debt to be solved before we graduate to TechPreview USC: Log messages sent by CPI at V(4) USC: Explain not waiting for informer cache sync USC: Start insight receiver as PostStartHook Previously the code spawned the receiver goroutine in the constructor, which was surprising and poorly structured. Spawn the receiver in a `PostStartHook` that is called by the library code after the controller is started. This also has an advantage of having access to controller queue, which means the receiver can trigger standard controller sync instead of calling controller internals. Additionally, clearly document the communication mechanism as a scaffold code to be replaced or improved if needed
1 parent 5a49cb9 commit 94a626d

File tree

7 files changed

+1750
-0
lines changed

7 files changed

+1750
-0
lines changed

cmd/updatestatuscontroller.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package main
2+
3+
import (
4+
"context"
5+
6+
k8sversion "k8s.io/apimachinery/pkg/version"
7+
8+
"github.com/openshift/library-go/pkg/controller/controllercmd"
9+
10+
"github.com/openshift/cluster-version-operator/pkg/updatestatus"
11+
cvoversion "github.com/openshift/cluster-version-operator/pkg/version"
12+
)
13+
14+
func init() {
15+
uscCommand := controllercmd.NewControllerCommandConfig(
16+
"update-status-controller",
17+
// TODO(USC: TechPreview): Unify version handling, potentially modernize CVO pkg/version
18+
// to use k8sversion.Info too.
19+
// https://github.com/openshift/cluster-version-operator/pull/1091#discussion_r1810601697
20+
k8sversion.Info{GitVersion: cvoversion.Raw},
21+
updatestatus.Run,
22+
).NewCommandWithContext(context.Background())
23+
24+
uscCommand.Short = "The Update Status Controller watches cluster state/health during the update process and exposes it through the UpdateStatus API."
25+
uscCommand.Use = "update-status-controller"
26+
27+
rootCmd.AddCommand(uscCommand)
28+
}

go.mod

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,31 @@ require (
3434
)
3535

3636
require (
37+
github.com/NYTimes/gziphandler v1.1.1 // indirect
38+
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect
39+
github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 // indirect
3740
github.com/beorn7/perks v1.0.1 // indirect
41+
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
3842
github.com/cespare/xxhash/v2 v2.2.0 // indirect
43+
github.com/coreos/go-semver v0.3.1 // indirect
44+
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
3945
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
4046
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
47+
github.com/felixge/httpsnoop v1.0.3 // indirect
48+
github.com/fsnotify/fsnotify v1.7.0 // indirect
4149
github.com/go-logr/logr v1.4.1 // indirect
50+
github.com/go-logr/stdr v1.2.2 // indirect
4251
github.com/go-openapi/jsonpointer v0.19.6 // indirect
4352
github.com/go-openapi/jsonreference v0.20.2 // indirect
4453
github.com/go-openapi/swag v0.22.3 // indirect
4554
github.com/gogo/protobuf v1.3.2 // indirect
4655
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
4756
github.com/golang/protobuf v1.5.4 // indirect
57+
github.com/google/cel-go v0.17.8 // indirect
4858
github.com/google/gnostic-models v0.6.8 // indirect
4959
github.com/google/gofuzz v1.2.0 // indirect
60+
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
61+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
5062
github.com/imdario/mergo v0.3.12 // indirect
5163
github.com/inconshreveable/mousetrap v1.1.0 // indirect
5264
github.com/josharian/intern v1.0.0 // indirect
@@ -58,20 +70,47 @@ require (
5870
github.com/modern-go/reflect2 v1.0.2 // indirect
5971
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
6072
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
73+
github.com/pkg/profile v1.3.0 // indirect
6174
github.com/prometheus/procfs v0.10.1 // indirect
75+
github.com/robfig/cron v1.2.0 // indirect
6276
github.com/sirupsen/logrus v1.9.0 // indirect
6377
github.com/spf13/pflag v1.0.5 // indirect
78+
github.com/stoewer/go-strcase v1.2.0 // indirect
79+
go.etcd.io/etcd/api/v3 v3.5.10 // indirect
80+
go.etcd.io/etcd/client/pkg/v3 v3.5.10 // indirect
81+
go.etcd.io/etcd/client/v3 v3.5.10 // indirect
82+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0 // indirect
83+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect
84+
go.opentelemetry.io/otel v1.19.0 // indirect
85+
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
86+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
87+
go.opentelemetry.io/otel/metric v1.19.0 // indirect
88+
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
89+
go.opentelemetry.io/otel/trace v1.19.0 // indirect
90+
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
91+
go.uber.org/multierr v1.11.0 // indirect
92+
go.uber.org/zap v1.26.0 // indirect
93+
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
6494
golang.org/x/oauth2 v0.10.0 // indirect
95+
golang.org/x/sync v0.6.0 // indirect
6596
golang.org/x/sys v0.18.0 // indirect
6697
golang.org/x/term v0.18.0 // indirect
6798
golang.org/x/text v0.14.0 // indirect
6899
google.golang.org/appengine v1.6.7 // indirect
100+
google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 // indirect
101+
google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e // indirect
102+
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
103+
google.golang.org/grpc v1.58.3 // indirect
69104
google.golang.org/protobuf v1.33.0 // indirect
70105
gopkg.in/inf.v0 v0.9.1 // indirect
106+
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
71107
gopkg.in/yaml.v2 v2.4.0 // indirect
72108
gopkg.in/yaml.v3 v3.0.1 // indirect
73109
k8s.io/apiserver v0.30.1 // indirect
110+
k8s.io/component-base v0.30.1 // indirect
111+
k8s.io/kms v0.30.1 // indirect
74112
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
113+
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect
75114
sigs.k8s.io/controller-runtime v0.12.1 // indirect
76115
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
77116
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
package updatestatus
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"time"
7+
8+
"gopkg.in/yaml.v3"
9+
"k8s.io/apimachinery/pkg/api/errors"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/runtime"
12+
"k8s.io/klog/v2"
13+
14+
configv1 "github.com/openshift/api/config/v1"
15+
configinformers "github.com/openshift/client-go/config/informers/externalversions"
16+
configv1listers "github.com/openshift/client-go/config/listers/config/v1"
17+
"github.com/openshift/library-go/pkg/controller/factory"
18+
"github.com/openshift/library-go/pkg/operator/events"
19+
20+
"github.com/openshift/cluster-version-operator/lib/resourcemerge"
21+
)
22+
23+
// controlPlaneInformerController is the controller that monitors health of the control plane-related resources (initially,
24+
// just ClusterVersion but will need to handle ClusterOperators too) and produces insights for control plane update.
25+
type controlPlaneInformerController struct {
26+
clusterVersions configv1listers.ClusterVersionLister
27+
recorder events.Recorder
28+
29+
// sendInsight should be called to send produced insights to the update status controller
30+
sendInsight sendInsightFn
31+
32+
// now is a function that returns the current time, used for testing
33+
now func() metav1.Time
34+
}
35+
36+
func newControlPlaneInformerController(
37+
configInformers configinformers.SharedInformerFactory,
38+
recorder events.Recorder,
39+
sendInsight sendInsightFn,
40+
) factory.Controller {
41+
cpiRecorder := recorder.WithComponentSuffix("control-plane-informer")
42+
43+
c := &controlPlaneInformerController{
44+
clusterVersions: configInformers.Config().V1().ClusterVersions().Lister(),
45+
recorder: cpiRecorder,
46+
sendInsight: sendInsight,
47+
48+
now: metav1.Now,
49+
}
50+
51+
cvInformer := configInformers.Config().V1().ClusterVersions().Informer()
52+
53+
controller := factory.New().
54+
// call sync on ClusterVersion changes
55+
WithInformersQueueKeysFunc(configApiQueueKeys, cvInformer).
56+
WithSync(c.sync).
57+
ToController("ControlPlaneInformer", c.recorder)
58+
59+
return controller
60+
}
61+
62+
// sync is called for any controller event. It will assess the state and health of the control plane, indicated by
63+
// the changed resource (ClusterVersion), produce insights, and send them to the update status controller. Status
64+
// insights are not stored between calls, so every call produces a fresh insight. This means some fields do not follow
65+
// conventions, like LastTransitionTime in the Updating condition. Proper continuous insight maintenance will need to
66+
// be added later (not yet sure whether on consumer or producer side).
67+
func (c *controlPlaneInformerController) sync(_ context.Context, syncCtx factory.SyncContext) error {
68+
queueKey := syncCtx.QueueKey()
69+
70+
clusterVersion, err := c.clusterVersions.Get(queueKey)
71+
if err != nil {
72+
if errors.IsNotFound(err) {
73+
// TODO: Handle deletes by deleting the status insight
74+
return nil
75+
}
76+
return err
77+
}
78+
79+
now := c.now()
80+
insight := assessClusterVersion(clusterVersion, now)
81+
msg := makeInsightMsgForClusterVersion(insight, now)
82+
var msgForLog string
83+
if klog.V(4).Enabled() {
84+
msgForLog = fmt.Sprintf(" | msg=%s", string(msg.insight))
85+
}
86+
klog.V(2).Infof("CPI :: Syncing ClusterVersion %s%s", clusterVersion.Name, msgForLog)
87+
c.sendInsight(msg)
88+
89+
return nil
90+
}
91+
92+
// makeInsightMsgForClusterVersion creates an informerMsg for the given ClusterVersionStatusInsight. It defines an uid
93+
// name and serializes the insight as YAML. Serialization is convenient because it prevents any data sharing issues
94+
// between controllers.
95+
func makeInsightMsgForClusterVersion(cvInsight *ClusterVersionStatusInsight, acquiredAt metav1.Time) informerMsg {
96+
uid := fmt.Sprintf("usc-cv-%s", cvInsight.Resource.Name)
97+
insight := Insight{
98+
UID: uid,
99+
AcquiredAt: acquiredAt,
100+
InsightUnion: InsightUnion{
101+
Type: ClusterVersionStatusInsightType,
102+
ClusterVersionStatusInsight: cvInsight,
103+
},
104+
}
105+
// Should handle errors, but ultimately we will have a proper API and won’t need to serialize ourselves
106+
rawInsight, _ := yaml.Marshal(insight)
107+
return informerMsg{
108+
uid: uid,
109+
insight: rawInsight,
110+
}
111+
}
112+
113+
// assessClusterVersion produces a ClusterVersion status insight from the current state of the ClusterVersion resource.
114+
// It does not take previous status insight into account. Many fields of the status insights (such as completion) cannot
115+
// be properly calculated without also watching and processing ClusterOperators, so that functionality will need to be
116+
// added later.
117+
func assessClusterVersion(cv *configv1.ClusterVersion, now metav1.Time) *ClusterVersionStatusInsight {
118+
119+
var lastHistoryItem *configv1.UpdateHistory
120+
if len(cv.Status.History) > 0 {
121+
lastHistoryItem = &cv.Status.History[0]
122+
}
123+
cvProgressing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing)
124+
125+
updating, startedAt, completedAt := isControlPlaneUpdating(cvProgressing, lastHistoryItem)
126+
updating.LastTransitionTime = now
127+
128+
klog.V(2).Infof("CPI :: CV/%s :: Updating=%s Started=%s Completed=%s", cv.Name, updating.Status, startedAt, completedAt)
129+
130+
var assessment ControlPlaneAssessment
131+
var completion int32
132+
switch updating.Status {
133+
case metav1.ConditionTrue:
134+
assessment = ControlPlaneAssessmentProgressing
135+
case metav1.ConditionFalse:
136+
assessment = ControlPlaneAssessmentCompleted
137+
completion = 100
138+
case metav1.ConditionUnknown:
139+
assessment = ControlPlaneAssessmentUnknown
140+
default:
141+
assessment = ControlPlaneAssessmentUnknown
142+
}
143+
144+
klog.V(2).Infof("CPI :: CV/%s :: Assessment=%s", cv.Name, assessment)
145+
146+
insight := &ClusterVersionStatusInsight{
147+
Resource: ResourceRef{
148+
Resource: "clusterversions",
149+
Group: configv1.GroupName,
150+
Name: cv.Name,
151+
},
152+
Assessment: assessment,
153+
Versions: versionsFromHistory(cv.Status.History),
154+
Completion: completion,
155+
StartedAt: startedAt,
156+
Conditions: []metav1.Condition{updating},
157+
}
158+
159+
if !completedAt.IsZero() {
160+
insight.CompletedAt = &completedAt
161+
}
162+
163+
if est := estimateCompletion(startedAt.Time); !est.IsZero() {
164+
insight.EstimatedCompletedAt = &metav1.Time{Time: est}
165+
}
166+
167+
return insight
168+
}
169+
170+
// estimateCompletion returns a time.Time that is 60 minutes after the given time. Proper estimation needs to be added
171+
// once the controller starts handling ClusterOperators.
172+
func estimateCompletion(started time.Time) time.Time {
173+
return started.Add(60 * time.Minute)
174+
}
175+
176+
// isControlPlaneUpdating determines whether the control plane is updating based on the ClusterVersion's Progressing
177+
// condition and the last history item. It returns an updating condition, the time the update started, and the time the
178+
// update completed. If the updating condition cannot be determined, the condition will have Status=Unknown and the
179+
// Reason and Message fields will explain why.
180+
func isControlPlaneUpdating(cvProgressing *configv1.ClusterOperatorStatusCondition, lastHistoryItem *configv1.UpdateHistory) (metav1.Condition, metav1.Time, metav1.Time) {
181+
updating := metav1.Condition{
182+
Type: string(ClusterVersionStatusInsightUpdating),
183+
}
184+
185+
if cvProgressing == nil {
186+
setCannotDetermineUpdating(&updating, "No Progressing condition in ClusterVersion")
187+
return updating, metav1.Time{}, metav1.Time{}
188+
}
189+
if lastHistoryItem == nil {
190+
setCannotDetermineUpdating(&updating, "Empty history in ClusterVersion")
191+
return updating, metav1.Time{}, metav1.Time{}
192+
}
193+
194+
updating.Status, updating.Reason, updating.Message = cvProgressingToUpdating(*cvProgressing)
195+
196+
var started metav1.Time
197+
// Looks like we are updating
198+
if cvProgressing.Status == configv1.ConditionTrue {
199+
if lastHistoryItem.State != configv1.PartialUpdate {
200+
setCannotDetermineUpdating(&updating, "Progressing=True in ClusterVersion but last history item is not Partial")
201+
} else if lastHistoryItem.CompletionTime != nil {
202+
setCannotDetermineUpdating(&updating, "Progressing=True in ClusterVersion but last history item has completion time")
203+
} else {
204+
started = lastHistoryItem.StartedTime
205+
}
206+
}
207+
208+
var completed metav1.Time
209+
// Looks like we are not updating
210+
if cvProgressing.Status == configv1.ConditionFalse {
211+
if lastHistoryItem.State != configv1.CompletedUpdate {
212+
setCannotDetermineUpdating(&updating, "Progressing=False in ClusterVersion but last history item is not completed")
213+
} else if lastHistoryItem.CompletionTime == nil {
214+
setCannotDetermineUpdating(&updating, "Progressing=False in ClusterVersion but not no completion in last history item")
215+
} else {
216+
started = lastHistoryItem.StartedTime
217+
completed = *lastHistoryItem.CompletionTime
218+
}
219+
}
220+
221+
return updating, started, completed
222+
}
223+
224+
func setCannotDetermineUpdating(cond *metav1.Condition, message string) {
225+
cond.Status = metav1.ConditionUnknown
226+
cond.Reason = string(ClusterVersionCannotDetermineUpdating)
227+
cond.Message = message
228+
}
229+
230+
// cvProgressingToUpdating returns a status, reason and message for the updating condition based on the cvProgressing
231+
// condition.
232+
func cvProgressingToUpdating(cvProgressing configv1.ClusterOperatorStatusCondition) (metav1.ConditionStatus, string, string) {
233+
status := metav1.ConditionStatus(cvProgressing.Status)
234+
var reason string
235+
switch status {
236+
case metav1.ConditionTrue:
237+
reason = string(ClusterVersionProgressing)
238+
case metav1.ConditionFalse:
239+
reason = string(ClusterVersionNotProgressing)
240+
case metav1.ConditionUnknown:
241+
reason = string(ClusterVersionCannotDetermineUpdating)
242+
default:
243+
reason = string(ClusterVersionCannotDetermineUpdating)
244+
}
245+
246+
message := fmt.Sprintf("ClusterVersion has Progressing=%s(Reason=%s) | Message='%s'", cvProgressing.Status, cvProgressing.Reason, cvProgressing.Message)
247+
return status, reason, message
248+
}
249+
250+
// versionsFromHistory returns a ControlPlaneUpdateVersions struct with the target version and metadata from the given
251+
// history.
252+
func versionsFromHistory(history []configv1.UpdateHistory) ControlPlaneUpdateVersions {
253+
var versions ControlPlaneUpdateVersions
254+
255+
if len(history) == 0 {
256+
return versions
257+
}
258+
259+
versions.Target.Version = history[0].Version
260+
261+
if len(history) == 1 {
262+
versions.Target.Metadata = []VersionMetadata{{Key: InstallationMetadata}}
263+
}
264+
if len(history) > 1 {
265+
versions.Previous.Version = history[1].Version
266+
if history[1].State == configv1.PartialUpdate {
267+
versions.Previous.Metadata = []VersionMetadata{{Key: PartialMetadata}}
268+
}
269+
}
270+
return versions
271+
}
272+
273+
func configApiQueueKeys(object runtime.Object) []string {
274+
if object == nil {
275+
return nil
276+
}
277+
278+
switch o := object.(type) {
279+
case *configv1.ClusterVersion:
280+
return []string{o.Name}
281+
}
282+
283+
klog.Fatalf("USC :: Unknown object type: %T", object)
284+
return nil
285+
}

0 commit comments

Comments
 (0)