Skip to content

Commit 6390a9c

Browse files
author
Per Goncalves da Silva
committed
Add more logging around catalog source sync
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent b7aa493 commit 6390a9c

File tree

2 files changed

+40
-48
lines changed

2 files changed

+40
-48
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,7 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
912912
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
913913
}
914914
if err != nil {
915+
logger.WithError(err).Error("error validating catalog source type")
915916
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
916917
return
917918
}
@@ -923,7 +924,6 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
923924
}
924925
}
925926
continueSync = true
926-
927927
return
928928
}
929929

@@ -936,27 +936,22 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
936936

937937
out = in.DeepCopy()
938938

939-
logger = logger.WithFields(logrus.Fields{
940-
"configmap.namespace": in.Namespace,
941-
"configmap.name": in.Spec.ConfigMap,
942-
})
943-
logger.Info("checking catsrc configmap state")
944-
945939
var updateLabel bool
946940
// Get the catalog source's config map
947941
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
948942
// Attempt to look up the CM via api call if there is a cache miss
949943
if apierrors.IsNotFound(err) {
944+
// TODO: Don't reach out via live client if its not found in the cache (https://github.com/operator-framework/operator-lifecycle-manager/issues/3415)
950945
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
951946
// Found cm in the cluster, add managed label to configmap
952947
if err == nil {
953-
labels := configMap.GetLabels()
954-
if labels == nil {
955-
labels = make(map[string]string)
948+
cmLabels := configMap.GetLabels()
949+
if cmLabels == nil {
950+
cmLabels = make(map[string]string)
956951
}
957952

958-
labels[install.OLMManagedLabelKey] = "false"
959-
configMap.SetLabels(labels)
953+
cmLabels[install.OLMManagedLabelKey] = "false"
954+
configMap.SetLabels(cmLabels)
960955
updateLabel = true
961956
}
962957
}
@@ -973,12 +968,9 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
973968
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
974969
return
975970
}
976-
977-
logger.Info("adopted configmap")
978971
}
979972

980973
if in.Status.ConfigMapResource == nil || !in.Status.ConfigMapResource.IsAMatch(&configMap.ObjectMeta) {
981-
logger.Info("updating catsrc configmap state")
982974
// configmap ref nonexistent or updated, write out the new configmap ref to status and exit
983975
out.Status.ConfigMapResource = &v1alpha1.ConfigMapResourceReference{
984976
Name: configMap.GetName(),
@@ -998,7 +990,6 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
998990
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
999991
out = in.DeepCopy()
1000992

1001-
logger.Info("synchronizing registry server")
1002993
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
1003994
srcReconciler := o.reconciler.ReconcilerForSource(in)
1004995
if srcReconciler == nil {
@@ -1015,21 +1006,15 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10151006
return
10161007
}
10171008

1018-
logger.WithField("health", healthy).Infof("checked registry server health")
1019-
10201009
if healthy && in.Status.RegistryServiceStatus != nil {
1021-
logger.Info("registry state good")
10221010
continueSync = true
10231011
// return here if catalog does not have polling enabled
10241012
if !out.Poll() {
1025-
logger.Info("polling not enabled, nothing more to do")
10261013
return
10271014
}
10281015
}
10291016

10301017
// Registry pod hasn't been created or hasn't been updated since the last configmap update, recreate it
1031-
logger.Info("ensuring registry server")
1032-
10331018
err = srcReconciler.EnsureRegistryServer(logger, out)
10341019
if err != nil {
10351020
if _, ok := err.(reconciler.UpdateNotReadyErr); ok {
@@ -1042,8 +1027,6 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10421027
return
10431028
}
10441029

1045-
logger.Info("ensured registry server")
1046-
10471030
// requeue the catalog sync based on the polling interval, for accurate syncs of catalogs with polling enabled
10481031
if out.Spec.UpdateStrategy != nil && out.Spec.UpdateStrategy.RegistryPoll != nil {
10491032
if out.Spec.UpdateStrategy.Interval == nil {
@@ -1052,16 +1035,17 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10521035
return
10531036
}
10541037
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1055-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1038+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1039+
logger.WithError(err).Error("registry server sync error: failed to parse registry poll interval")
1040+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10561041
}
1057-
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10581042
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
10591043
o.catsrcQueueSet.RequeueAfter(out.GetNamespace(), out.GetName(), queueinformer.ResyncWithJitter(resyncPeriod, 0.1)())
10601044
return
10611045
}
10621046

10631047
if err := o.sources.Remove(sourceKey); err != nil {
1064-
o.logger.WithError(err).Debug("error closing client connection")
1048+
o.logger.WithError(err).Error("registry server sync error: error closing client connection")
10651049
}
10661050

10671051
return
@@ -1152,7 +1136,6 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11521136
"catalogsource.name": catsrc.Name,
11531137
"id": queueinformer.NewLoopID(),
11541138
})
1155-
logger.Info("syncing catalog source")
11561139

11571140
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11581141
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"strings"
77
"time"
88

9-
"github.com/google/go-cmp/cmp"
109
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1110

1211
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
@@ -201,10 +200,9 @@ func (c *GrpcRegistryReconciler) currentUpdatePods(logger *logrus.Entry, source
201200
}
202201

203202
func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logrus.Entry, source grpcCatalogSourceDecorator, serviceAccount *corev1.ServiceAccount, defaultPodSecurityConfig v1alpha1.SecurityConfig) ([]*corev1.Pod, error) {
204-
logger.Info("searching for current pods")
205203
pods, err := c.Lister.CoreV1().PodLister().Pods(source.GetNamespace()).List(labels.SelectorFromValidatedSet(source.Labels()))
206204
if err != nil {
207-
logger.WithError(err).Warn("couldn't find pod in cache")
205+
logger.WithError(err).Warn("error searching for catalog source pods: couldn't find pod in cache")
208206
return nil, nil
209207
}
210208
found := []*corev1.Pod{}
@@ -213,20 +211,10 @@ func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logr
213211
return nil, err
214212
}
215213
for _, p := range pods {
216-
images, hash := correctImages(source, p), podHashMatch(p, newPod)
217-
logger = logger.WithFields(logrus.Fields{
218-
"current-pod.namespace": p.Namespace, "current-pod.name": p.Name,
219-
"correctImages": images, "correctHash": hash,
220-
})
221-
logger.Info("evaluating current pod")
222-
if !hash {
223-
logger.Infof("pod spec diff: %s", cmp.Diff(p.Spec, newPod.Spec))
224-
}
225214
if correctImages(source, p) && podHashMatch(p, newPod) {
226215
found = append(found, p)
227216
}
228217
}
229-
logger.Infof("of %d pods matching label selector, %d have the correct images and matching hash", len(pods), len(found))
230218
return found, nil
231219
}
232220

@@ -252,64 +240,71 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
252240
// if service status is nil, we force create every object to ensure they're created the first time
253241
valid, err := isRegistryServiceStatusValid(&source)
254242
if err != nil {
243+
logger.WithError(err).Error("error ensuring registry server: could not validate registry service status")
255244
return err
256245
}
257246
overwrite := !valid
258-
if overwrite {
259-
logger.Info("registry service status invalid, need to overwrite")
260-
}
261247

262248
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
263249
sa, err := c.ensureSA(source)
264250
if err != nil && !apierrors.IsAlreadyExists(err) {
251+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry service account")
265252
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266253
}
267254

268255
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269256
if err != nil {
257+
logger.WithError(err).Error("error ensuring registry server: could not get registry service account")
270258
return err
271259
}
272260

273261
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274262
if err != nil {
263+
logger.WithError(err).Error("error ensuring registry server: could not get default pod security config")
275264
return err
276265
}
277266

278267
// recreate the pod if no existing pod is serving the latest image or correct spec
279268
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280269
if err != nil {
270+
logger.WithError(err).Error("error ensuring registry server: could not get current pods with correct image and spec")
281271
return err
282272
}
283273
overwritePod := overwrite || len(current) == 0
284-
if overwritePod {
285-
logger.Info("registry pods invalid, need to overwrite")
286-
}
287274

288275
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289276
if err != nil {
277+
logger.WithError(err).Error("error ensuring registry server: could not create registry pod")
290278
return err
291279
}
292280
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
281+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry pod")
293282
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294283
}
295284
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
285+
logger.WithError(err).Error("error ensuring registry server: could not ensure update pod")
296286
if _, ok := err.(UpdateNotReadyErr); ok {
287+
logger.WithError(err).Error("error ensuring registry server: ensure update pod error is not of type UpdateNotReadyErr")
297288
return err
298289
}
299290
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300291
}
292+
301293
service, err := source.Service()
302294
if err != nil {
295+
logger.WithError(err).Error("couldn't get service")
303296
return err
304297
}
305298
if err := c.ensureService(source, overwrite); err != nil {
299+
logger.WithError(err).Error("error ensuring registry server: could not ensure service")
306300
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307301
}
308302

309303
if overwritePod {
310304
now := c.now()
311305
service, err := source.Service()
312306
if err != nil {
307+
logger.WithError(err).Error("error ensuring registry server: could not get service")
313308
return err
314309
}
315310
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -603,6 +598,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
603598
serviceAccount := source.ServiceAccount()
604599
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
605600
if err != nil {
601+
logger.WithError(err).Error("registry service not healthy: could not get service account")
606602
if !apierrors.IsNotFound(err) {
607603
return false, err
608604
}
@@ -611,25 +607,38 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
611607

612608
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
613609
if err != nil {
610+
logger.WithError(err).Error("registry service not healthy: could not get registry pod security config")
614611
return false, err
615612
}
616613

617614
// Check on registry resources
618615
// TODO: add gRPC health check
619616
service, err := c.currentService(source)
620617
if err != nil {
618+
logger.WithError(err).Error("registry service not healthy: could not get current service")
621619
return false, err
622620
}
621+
623622
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
624623
if err != nil {
624+
logger.WithError(err).Error("registry service not healthy: could not get current pods")
625625
return false, err
626626
}
627+
628+
currentServiceAccount := c.currentServiceAccount(source)
627629
if len(currentPods) < 1 ||
628-
service == nil || c.currentServiceAccount(source) == nil {
630+
service == nil || currentServiceAccount == nil {
631+
logger.WithFields(logrus.Fields{
632+
"numCurrentPods": len(currentPods),
633+
"isServiceNil": service == nil,
634+
"isCurrentServiceAccountNil": currentServiceAccount == nil,
635+
}).Error("registry service not healthy: one or more required resources are missing")
629636
return false, nil
630637
}
638+
631639
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
632640
if e != nil {
641+
logger.WithError(e).Error("registry service not healthy: could not detect and delete dead pods")
633642
return false, fmt.Errorf("error deleting dead pods: %v", e)
634643
}
635644
return podsAreLive, nil

0 commit comments

Comments
 (0)