Skip to content

Commit 61d7e04

Browse files
author
Per Goncalves da Silva
committed
Add more logging around catalog source sync
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent b7aa493 commit 61d7e04

File tree

2 files changed

+50
-43
lines changed

2 files changed

+50
-43
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,7 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
912912
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
913913
}
914914
if err != nil {
915+
logger.WithError(err).Error("error validating catalog source type")
915916
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
916917
return
917918
}
@@ -923,7 +924,6 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
923924
}
924925
}
925926
continueSync = true
926-
927927
return
928928
}
929929

@@ -936,27 +936,32 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
936936

937937
out = in.DeepCopy()
938938

939+
logger = logger.WithField("step", "syncConfigMap")
939940
logger = logger.WithFields(logrus.Fields{
940941
"configmap.namespace": in.Namespace,
941942
"configmap.name": in.Spec.ConfigMap,
942943
})
943-
logger.Info("checking catsrc configmap state")
944+
945+
defer func() {
946+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("config map sync'ed")
947+
}()
944948

945949
var updateLabel bool
946950
// Get the catalog source's config map
947951
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
948952
// Attempt to look up the CM via api call if there is a cache miss
949953
if apierrors.IsNotFound(err) {
954+
// TODO: Don't reach out via live client if its not found in the cache (https://github.com/operator-framework/operator-lifecycle-manager/issues/3415)
950955
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
951956
// Found cm in the cluster, add managed label to configmap
952957
if err == nil {
953-
labels := configMap.GetLabels()
954-
if labels == nil {
955-
labels = make(map[string]string)
958+
cmLabels := configMap.GetLabels()
959+
if cmLabels == nil {
960+
cmLabels = make(map[string]string)
956961
}
957962

958-
labels[install.OLMManagedLabelKey] = "false"
959-
configMap.SetLabels(labels)
963+
cmLabels[install.OLMManagedLabelKey] = "false"
964+
configMap.SetLabels(cmLabels)
960965
updateLabel = true
961966
}
962967
}
@@ -973,12 +978,9 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
973978
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
974979
return
975980
}
976-
977-
logger.Info("adopted configmap")
978981
}
979982

980983
if in.Status.ConfigMapResource == nil || !in.Status.ConfigMapResource.IsAMatch(&configMap.ObjectMeta) {
981-
logger.Info("updating catsrc configmap state")
982984
// configmap ref nonexistent or updated, write out the new configmap ref to status and exit
983985
out.Status.ConfigMapResource = &v1alpha1.ConfigMapResourceReference{
984986
Name: configMap.GetName(),
@@ -998,11 +1000,11 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
9981000
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
9991001
out = in.DeepCopy()
10001002

1001-
logger.Info("synchronizing registry server")
10021003
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
1004+
10031005
srcReconciler := o.reconciler.ReconcilerForSource(in)
10041006
if srcReconciler == nil {
1005-
// TODO: Add failure status on catalogsource and remove from sources
1007+
// TODO: Add failure status on catalogsource and remove from sources )
10061008
syncError = fmt.Errorf("no reconciler for source type %s", in.Spec.SourceType)
10071009
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10081010
return
@@ -1015,21 +1017,18 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10151017
return
10161018
}
10171019

1018-
logger.WithField("health", healthy).Infof("checked registry server health")
1020+
logger = logger.WithField("health", healthy)
1021+
logger.Info("checked registry server health")
10191022

10201023
if healthy && in.Status.RegistryServiceStatus != nil {
1021-
logger.Info("registry state good")
10221024
continueSync = true
10231025
// return here if catalog does not have polling enabled
10241026
if !out.Poll() {
1025-
logger.Info("polling not enabled, nothing more to do")
10261027
return
10271028
}
10281029
}
10291030

10301031
// Registry pod hasn't been created or hasn't been updated since the last configmap update, recreate it
1031-
logger.Info("ensuring registry server")
1032-
10331032
err = srcReconciler.EnsureRegistryServer(logger, out)
10341033
if err != nil {
10351034
if _, ok := err.(reconciler.UpdateNotReadyErr); ok {
@@ -1042,8 +1041,6 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10421041
return
10431042
}
10441043

1045-
logger.Info("ensured registry server")
1046-
10471044
// requeue the catalog sync based on the polling interval, for accurate syncs of catalogs with polling enabled
10481045
if out.Spec.UpdateStrategy != nil && out.Spec.UpdateStrategy.RegistryPoll != nil {
10491046
if out.Spec.UpdateStrategy.Interval == nil {
@@ -1052,16 +1049,17 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10521049
return
10531050
}
10541051
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1055-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1052+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1053+
logger.WithError(err).Error("registry server sync error: failed to parse registry poll interval")
1054+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10561055
}
1057-
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10581056
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
10591057
o.catsrcQueueSet.RequeueAfter(out.GetNamespace(), out.GetName(), queueinformer.ResyncWithJitter(resyncPeriod, 0.1)())
10601058
return
10611059
}
10621060

10631061
if err := o.sources.Remove(sourceKey); err != nil {
1064-
o.logger.WithError(err).Debug("error closing client connection")
1062+
o.logger.WithError(err).Error("registry server sync error: error closing client connection")
10651063
}
10661064

10671065
return
@@ -1152,7 +1150,6 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11521150
"catalogsource.name": catsrc.Name,
11531151
"id": queueinformer.NewLoopID(),
11541152
})
1155-
logger.Info("syncing catalog source")
11561153

11571154
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11581155
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"strings"
77
"time"
88

9-
"github.com/google/go-cmp/cmp"
109
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1110

1211
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
@@ -201,10 +200,9 @@ func (c *GrpcRegistryReconciler) currentUpdatePods(logger *logrus.Entry, source
201200
}
202201

203202
func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logrus.Entry, source grpcCatalogSourceDecorator, serviceAccount *corev1.ServiceAccount, defaultPodSecurityConfig v1alpha1.SecurityConfig) ([]*corev1.Pod, error) {
204-
logger.Info("searching for current pods")
205203
pods, err := c.Lister.CoreV1().PodLister().Pods(source.GetNamespace()).List(labels.SelectorFromValidatedSet(source.Labels()))
206204
if err != nil {
207-
logger.WithError(err).Warn("couldn't find pod in cache")
205+
logger.WithError(err).Warn("error searching for catalog source pods: couldn't find pod in cache")
208206
return nil, nil
209207
}
210208
found := []*corev1.Pod{}
@@ -213,20 +211,10 @@ func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logr
213211
return nil, err
214212
}
215213
for _, p := range pods {
216-
images, hash := correctImages(source, p), podHashMatch(p, newPod)
217-
logger = logger.WithFields(logrus.Fields{
218-
"current-pod.namespace": p.Namespace, "current-pod.name": p.Name,
219-
"correctImages": images, "correctHash": hash,
220-
})
221-
logger.Info("evaluating current pod")
222-
if !hash {
223-
logger.Infof("pod spec diff: %s", cmp.Diff(p.Spec, newPod.Spec))
224-
}
225214
if correctImages(source, p) && podHashMatch(p, newPod) {
226215
found = append(found, p)
227216
}
228217
}
229-
logger.Infof("of %d pods matching label selector, %d have the correct images and matching hash", len(pods), len(found))
230218
return found, nil
231219
}
232220

@@ -252,64 +240,71 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
252240
// if service status is nil, we force create every object to ensure they're created the first time
253241
valid, err := isRegistryServiceStatusValid(&source)
254242
if err != nil {
243+
logger.WithError(err).Error("error ensuring registry server: could not validate registry service status")
255244
return err
256245
}
257246
overwrite := !valid
258-
if overwrite {
259-
logger.Info("registry service status invalid, need to overwrite")
260-
}
261247

262248
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
263249
sa, err := c.ensureSA(source)
264250
if err != nil && !apierrors.IsAlreadyExists(err) {
251+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry service account")
265252
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266253
}
267254

268255
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269256
if err != nil {
257+
logger.WithError(err).Error("error ensuring registry server: could not get registry service account")
270258
return err
271259
}
272260

273261
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274262
if err != nil {
263+
logger.WithError(err).Error("error ensuring registry server: could not get default pod security config")
275264
return err
276265
}
277266

278267
// recreate the pod if no existing pod is serving the latest image or correct spec
279268
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280269
if err != nil {
270+
logger.WithError(err).Error("error ensuring registry server: could not get current pods with correct image and spec")
281271
return err
282272
}
283273
overwritePod := overwrite || len(current) == 0
284-
if overwritePod {
285-
logger.Info("registry pods invalid, need to overwrite")
286-
}
287274

288275
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289276
if err != nil {
277+
logger.WithError(err).Error("error ensuring registry server: could not create registry pod")
290278
return err
291279
}
292280
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
281+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry pod")
293282
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294283
}
295284
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
285+
logger.WithError(err).Error("error ensuring registry server: could not ensure update pod")
296286
if _, ok := err.(UpdateNotReadyErr); ok {
287+
logger.WithError(err).Error("error ensuring registry server: ensure update pod error is not of type UpdateNotReadyErr")
297288
return err
298289
}
299290
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300291
}
292+
301293
service, err := source.Service()
302294
if err != nil {
295+
logger.WithError(err).Error("couldn't get service")
303296
return err
304297
}
305298
if err := c.ensureService(source, overwrite); err != nil {
299+
logger.WithError(err).Error("error ensuring registry server: could not ensure service")
306300
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307301
}
308302

309303
if overwritePod {
310304
now := c.now()
311305
service, err := source.Service()
312306
if err != nil {
307+
logger.WithError(err).Error("error ensuring registry server: could not get service")
313308
return err
314309
}
315310
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -603,6 +598,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
603598
serviceAccount := source.ServiceAccount()
604599
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
605600
if err != nil {
601+
logger.WithError(err).Error("registry service not healthy: could not get service account")
606602
if !apierrors.IsNotFound(err) {
607603
return false, err
608604
}
@@ -611,27 +607,41 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
611607

612608
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
613609
if err != nil {
610+
logger.WithError(err).Error("registry service not healthy: could not get registry pod security config")
614611
return false, err
615612
}
616613

617614
// Check on registry resources
618615
// TODO: add gRPC health check
619616
service, err := c.currentService(source)
620617
if err != nil {
618+
logger.WithError(err).Error("registry service not healthy: could not get current service")
621619
return false, err
622620
}
621+
623622
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
624623
if err != nil {
624+
logger.WithError(err).Error("registry service not healthy: could not get current pods")
625625
return false, err
626626
}
627+
628+
currentServiceAccount := c.currentServiceAccount(source)
627629
if len(currentPods) < 1 ||
628-
service == nil || c.currentServiceAccount(source) == nil {
630+
service == nil || currentServiceAccount == nil {
631+
logger.WithFields(logrus.Fields{
632+
"numCurrentPods": len(currentPods),
633+
"isServiceNil": service == nil,
634+
"isCurrentServiceAccountNil": currentServiceAccount == nil,
635+
}).Error("registry service not healthy: one or more required resources are missing")
629636
return false, nil
630637
}
638+
631639
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
632640
if e != nil {
641+
logger.WithError(e).Error("registry service not healthy: could not detect and delete dead pods")
633642
return false, fmt.Errorf("error deleting dead pods: %v", e)
634643
}
644+
635645
return podsAreLive, nil
636646
}
637647

0 commit comments

Comments
 (0)