diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index ebdd6313a4..e2dae38ecb 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -912,6 +912,7 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out * err = fmt.Errorf("unknown sourcetype: %s", sourceType) } if err != nil { + logger.WithError(err).Error("error validating catalog source type") out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err) return } @@ -923,7 +924,6 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out * } } continueSync = true - return } @@ -936,27 +936,22 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc out = in.DeepCopy() - logger = logger.WithFields(logrus.Fields{ - "configmap.namespace": in.Namespace, - "configmap.name": in.Spec.ConfigMap, - }) - logger.Info("checking catsrc configmap state") - var updateLabel bool // Get the catalog source's config map configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap) // Attempt to look up the CM via api call if there is a cache miss if apierrors.IsNotFound(err) { + // TODO: Don't reach out via live client if its not found in the cache (https://github.com/operator-framework/operator-lifecycle-manager/issues/3415) configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{}) // Found cm in the cluster, add managed label to configmap if err == nil { - labels := configMap.GetLabels() - if labels == nil { - labels = make(map[string]string) + cmLabels := configMap.GetLabels() + if cmLabels == nil { + cmLabels = make(map[string]string) } - labels[install.OLMManagedLabelKey] = "false" - configMap.SetLabels(labels) + cmLabels[install.OLMManagedLabelKey] = "false" + configMap.SetLabels(cmLabels) updateLabel = true } } @@ -973,12 +968,9 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError) return } - - logger.Info("adopted configmap") } if in.Status.ConfigMapResource == nil || !in.Status.ConfigMapResource.IsAMatch(&configMap.ObjectMeta) { - logger.Info("updating catsrc configmap state") // configmap ref nonexistent or updated, write out the new configmap ref to status and exit out.Status.ConfigMapResource = &v1alpha1.ConfigMapResourceReference{ Name: configMap.GetName(), @@ -998,7 +990,6 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) { out = in.DeepCopy() - logger.Info("synchronizing registry server") sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()} srcReconciler := o.reconciler.ReconcilerForSource(in) if srcReconciler == nil { @@ -1015,21 +1006,15 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog return } - logger.WithField("health", healthy).Infof("checked registry server health") - if healthy && in.Status.RegistryServiceStatus != nil { - logger.Info("registry state good") continueSync = true // return here if catalog does not have polling enabled if !out.Poll() { - logger.Info("polling not enabled, nothing more to do") return } } // Registry pod hasn't been created or hasn't been updated since the last configmap update, recreate it - logger.Info("ensuring registry server") - err = srcReconciler.EnsureRegistryServer(logger, out) if err != nil { if _, ok := err.(reconciler.UpdateNotReadyErr); ok { @@ -1042,8 +1027,6 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog return } - logger.Info("ensured registry server") - // requeue the catalog sync based on the polling interval, for accurate syncs of catalogs with polling enabled if out.Spec.UpdateStrategy != nil && out.Spec.UpdateStrategy.RegistryPoll != nil { if out.Spec.UpdateStrategy.Interval == nil { @@ -1052,16 +1035,17 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog return } if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError { - out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)) + err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError) + logger.WithError(err).Error("registry server sync error: failed to parse registry poll interval") + out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err) } - logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String()) resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now()) o.catsrcQueueSet.RequeueAfter(out.GetNamespace(), out.GetName(), queueinformer.ResyncWithJitter(resyncPeriod, 0.1)()) return } if err := o.sources.Remove(sourceKey); err != nil { - o.logger.WithError(err).Debug("error closing client connection") + o.logger.WithError(err).Error("registry server sync error: error closing client connection") } return @@ -1152,7 +1136,6 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) { "catalogsource.name": catsrc.Name, "id": queueinformer.NewLoopID(), }) - logger.Info("syncing catalog source") syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) { out = in diff --git a/pkg/controller/registry/reconciler/grpc.go b/pkg/controller/registry/reconciler/grpc.go index a960a00729..2f32ad8264 100644 --- a/pkg/controller/registry/reconciler/grpc.go +++ b/pkg/controller/registry/reconciler/grpc.go @@ -7,6 +7,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/operator-framework/api/pkg/operators/v1alpha1" "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install" @@ -201,10 +202,9 @@ func (c *GrpcRegistryReconciler) currentUpdatePods(logger *logrus.Entry, source } func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logrus.Entry, source grpcCatalogSourceDecorator, serviceAccount *corev1.ServiceAccount, defaultPodSecurityConfig v1alpha1.SecurityConfig) ([]*corev1.Pod, error) { - logger.Info("searching for current pods") pods, err := c.Lister.CoreV1().PodLister().Pods(source.GetNamespace()).List(labels.SelectorFromValidatedSet(source.Labels())) if err != nil { - logger.WithError(err).Warn("couldn't find pod in cache") + logger.WithError(err).Warn("error searching for catalog source pods: couldn't find pod in cache") return nil, nil } found := []*corev1.Pod{} @@ -222,7 +222,7 @@ func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logr if !hash { logger.Infof("pod spec diff: %s", cmp.Diff(p.Spec, newPod.Spec)) } - if correctImages(source, p) && podHashMatch(p, newPod) { + if images && hash { found = append(found, p) } } @@ -252,6 +252,7 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata // if service status is nil, we force create every object to ensure they're created the first time valid, err := isRegistryServiceStatusValid(&source) if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not validate registry service status") return err } overwrite := !valid @@ -262,22 +263,26 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata //TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated) sa, err := c.ensureSA(source) if err != nil && !apierrors.IsAlreadyExists(err) { + logger.WithError(err).Error("error ensuring registry server: could not ensure registry service account") return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName()) } sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName()) if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not get registry service account") return err } defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace()) if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not get default pod security config") return err } // recreate the pod if no existing pod is serving the latest image or correct spec current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig) if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not get current pods with correct image and spec") return err } overwritePod := overwrite || len(current) == 0 @@ -287,22 +292,29 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata pod, err := source.Pod(sa, defaultPodSecurityConfig) if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not create registry pod") return err } if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil { + logger.WithError(err).Error("error ensuring registry server: could not ensure registry pod") return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName()) } if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil { + logger.WithError(err).Error("error ensuring registry server: could not ensure update pod") if _, ok := err.(UpdateNotReadyErr); ok { + logger.WithError(err).Error("error ensuring registry server: ensure update pod error is not of type UpdateNotReadyErr") return err } return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName()) } + service, err := source.Service() if err != nil { + logger.WithError(err).Error("couldn't get service") return err } if err := c.ensureService(source, overwrite); err != nil { + logger.WithError(err).Error("error ensuring registry server: could not ensure service") return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName()) } @@ -310,6 +322,7 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata now := c.now() service, err := source.Service() if err != nil { + logger.WithError(err).Error("error ensuring registry server: could not get service") return err } catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{ @@ -603,6 +616,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal serviceAccount := source.ServiceAccount() serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName()) if err != nil { + logger.WithError(err).Error("registry service not healthy: could not get service account") if !apierrors.IsNotFound(err) { return false, err } @@ -611,6 +625,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace()) if err != nil { + logger.WithError(err).Error("registry service not healthy: could not get registry pod security config") return false, err } @@ -618,18 +633,30 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal // TODO: add gRPC health check service, err := c.currentService(source) if err != nil { + logger.WithError(err).Error("registry service not healthy: could not get current service") return false, err } + currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig) if err != nil { + logger.WithError(err).Error("registry service not healthy: could not get current pods") return false, err } + + currentServiceAccount := c.currentServiceAccount(source) if len(currentPods) < 1 || - service == nil || c.currentServiceAccount(source) == nil { + service == nil || currentServiceAccount == nil { + logger.WithFields(logrus.Fields{ + "numCurrentPods": len(currentPods), + "isServiceNil": service == nil, + "isCurrentServiceAccountNil": currentServiceAccount == nil, + }).Error("registry service not healthy: one or more required resources are missing") return false, nil } + podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace()) if e != nil { + logger.WithError(e).Error("registry service not healthy: could not detect and delete dead pods") return false, fmt.Errorf("error deleting dead pods: %v", e) } return podsAreLive, nil