Skip to content

Commit e739531

Browse files
author
Per Goncalves da Silva
committed
Add more logging around catalog source sync
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent efe9779 commit e739531

File tree

2 files changed

+74
-13
lines changed

2 files changed

+74
-13
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,7 @@ func (o *Operator) handleCatSrcDeletion(obj interface{}) {
897897
}
898898

899899
func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, _ error) {
900+
logger.Info("validating catalog source type")
900901
out = in
901902
var err error
902903
switch sourceType := out.Spec.SourceType; sourceType {
@@ -912,41 +913,46 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
912913
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
913914
}
914915
if err != nil {
916+
logger.WithError(err).Error("error validating catalog source type")
915917
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
916918
return
917919
}
918920

919921
// The sourceType is valid, clear all status (other than status conditions array) if there's existing invalid spec reason
920922
if out.Status.Reason == v1alpha1.CatalogSourceSpecInvalidError {
923+
logger.Info("catalog source has invalid spec")
921924
out.Status = v1alpha1.CatalogSourceStatus{
922925
Conditions: out.Status.Conditions,
923926
}
924927
}
925928
continueSync = true
926929

930+
logger.Info("catalog source type validated")
927931
return
928932
}
929933

930934
func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
935+
logger = logger.WithFields(logrus.Fields{
936+
"configmap.namespace": in.Namespace,
937+
"configmap.name": in.Spec.ConfigMap,
938+
})
939+
logger.Info("checking catsrc configmap state")
940+
931941
out = in
932942
if !(in.Spec.SourceType == v1alpha1.SourceTypeInternal || in.Spec.SourceType == v1alpha1.SourceTypeConfigmap) {
933943
continueSync = true
944+
logger.Info("catalog type is neither internal not configmap - bailing without error")
934945
return
935946
}
936947

937948
out = in.DeepCopy()
938949

939-
logger = logger.WithFields(logrus.Fields{
940-
"configmap.namespace": in.Namespace,
941-
"configmap.name": in.Spec.ConfigMap,
942-
})
943-
logger.Info("checking catsrc configmap state")
944-
945950
var updateLabel bool
946951
// Get the catalog source's config map
947952
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
948953
// Attempt to look up the CM via api call if there is a cache miss
949954
if apierrors.IsNotFound(err) {
955+
logger.Info("configmap not found in cache - attempting with live client")
950956
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
951957
// Found cm in the cluster, add managed label to configmap
952958
if err == nil {
@@ -961,6 +967,7 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
961967
}
962968
}
963969
if err != nil {
970+
logger.WithError(err).Error("error getting configmap")
964971
syncError = fmt.Errorf("failed to get catalog config map %s: %s", in.Spec.ConfigMap, err)
965972
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
966973
return
@@ -969,11 +976,11 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
969976
if wasOwned := ownerutil.EnsureOwner(configMap, in); !wasOwned || updateLabel {
970977
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(configMap.GetNamespace()).Update(context.TODO(), configMap, metav1.UpdateOptions{})
971978
if err != nil {
979+
logger.WithError(err).Error("error updating configmap")
972980
syncError = fmt.Errorf("unable to write owner onto catalog source configmap - %v", err)
973981
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
974982
return
975983
}
976-
977984
logger.Info("adopted configmap")
978985
}
979986

@@ -992,22 +999,30 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
992999
}
9931000

9941001
continueSync = true
1002+
logger.Info("configmap catalog source sync'ed successfully")
9951003
return
9961004
}
9971005

9981006
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
9991007
out = in.DeepCopy()
10001008

1001-
logger.Info("synchronizing registry server")
1009+
logger = logger.WithField("step", "syncRegistryServer")
1010+
logger.Info("registry server sync start")
1011+
defer func() {
1012+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("registry server sync end")
1013+
}()
10021014
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
1015+
1016+
logger.Info("getting reconciler for catalog source")
10031017
srcReconciler := o.reconciler.ReconcilerForSource(in)
10041018
if srcReconciler == nil {
1005-
// TODO: Add failure status on catalogsource and remove from sources
1019+
// TODO: Add failure status on catalogsource and remove from sources )
10061020
syncError = fmt.Errorf("no reconciler for source type %s", in.Spec.SourceType)
10071021
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10081022
return
10091023
}
10101024

1025+
logger.Info("checking registry server health")
10111026
healthy, err := srcReconciler.CheckRegistryServer(logger, in)
10121027
if err != nil {
10131028
syncError = err
@@ -1052,7 +1067,9 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10521067
return
10531068
}
10541069
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1055-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1070+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1071+
logger.WithError(err).Info("failed to parse registry poll interval")
1072+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10561073
}
10571074
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10581075
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
@@ -1061,13 +1078,18 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10611078
}
10621079

10631080
if err := o.sources.Remove(sourceKey); err != nil {
1064-
o.logger.WithError(err).Debug("error closing client connection")
1081+
o.logger.WithError(err).Info("error closing client connection")
10651082
}
10661083

10671084
return
10681085
}
10691086

10701087
func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
1088+
logger = logger.WithField("step", "syncConnection")
1089+
logger.Info("sync connection start")
1090+
defer func() {
1091+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("sync connection end")
1092+
}()
10711093
out = in.DeepCopy()
10721094

10731095
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
@@ -1101,6 +1123,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11011123

11021124
source := o.sources.GetMeta(sourceKey)
11031125
if source == nil {
1126+
logger.Info("source not found - attempting to establish connection")
11041127
source, syncError = connectFunc()
11051128
if syncError != nil {
11061129
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1109,10 +1132,15 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11091132

11101133
// Set connection status and return.
11111134
updateConnectionStateFunc(out, source)
1135+
logger.Info("successfully created source and updated connection state")
11121136
return
11131137
}
11141138

11151139
if source.Address != address {
1140+
logger.WithFields(logrus.Fields{
1141+
"source.address": source.Address,
1142+
"catalogsource.address": address,
1143+
}).Info("source and catalog source addresses differ - creating a new source")
11161144
source, syncError = connectFunc()
11171145
if syncError != nil {
11181146
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1125,6 +1153,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11251153

11261154
// GRPCConnectionState update must fail before
11271155
if out.Status.GRPCConnectionState == nil {
1156+
logger.Info("connection state is nil - attempting to update connection state")
11281157
updateConnectionStateFunc(out, source)
11291158
}
11301159

@@ -1152,7 +1181,10 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11521181
"catalogsource.name": catsrc.Name,
11531182
"id": queueinformer.NewLoopID(),
11541183
})
1155-
logger.Info("syncing catalog source")
1184+
logger.Info("catalog source sync start")
1185+
defer func() {
1186+
logger.WithError(syncError).Info("catalog source sync end")
1187+
}()
11561188

11571189
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11581190
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
252252
// if service status is nil, we force create every object to ensure they're created the first time
253253
valid, err := isRegistryServiceStatusValid(&source)
254254
if err != nil {
255+
logger.WithError(err).Info("couldn't validate registry service status")
255256
return err
256257
}
257258
overwrite := !valid
@@ -262,22 +263,26 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
262263
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
263264
sa, err := c.ensureSA(source)
264265
if err != nil && !apierrors.IsAlreadyExists(err) {
266+
logger.WithError(err).Error("couldn't ensure registry service account")
265267
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266268
}
267269

268270
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269271
if err != nil {
272+
logger.WithError(err).Error("couldn't get registry service account")
270273
return err
271274
}
272275

273276
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274277
if err != nil {
278+
logger.WithError(err).Error("couldn't get default pod security config")
275279
return err
276280
}
277281

278282
// recreate the pod if no existing pod is serving the latest image or correct spec
279283
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280284
if err != nil {
285+
logger.WithError(err).Error("couldn't get current pods with correct image and spec")
281286
return err
282287
}
283288
overwritePod := overwrite || len(current) == 0
@@ -287,29 +292,36 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
287292

288293
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289294
if err != nil {
295+
logger.WithError(err).Error("couldn't create registry pod")
290296
return err
291297
}
292298
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
299+
logger.WithError(err).Error("couldn't ensure registry pod")
293300
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294301
}
295302
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
296303
if _, ok := err.(UpdateNotReadyErr); ok {
304+
logger.WithError(err).Error("ensure update pod error is not of type UpdateNotReadyErr")
297305
return err
298306
}
299307
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300308
}
301309
service, err := source.Service()
302310
if err != nil {
311+
logger.WithError(err).Error("couldn't get service")
303312
return err
304313
}
305314
if err := c.ensureService(source, overwrite); err != nil {
315+
logger.WithError(err).Error("couldn't ensure service")
306316
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307317
}
308318

309319
if overwritePod {
320+
logger.Info("overwriting pod registry service status")
310321
now := c.now()
311322
service, err := source.Service()
312323
if err != nil {
324+
logger.WithError(err).Error("couldn't get service")
313325
return err
314326
}
315327
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -320,6 +332,8 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
320332
Port: getPort(service),
321333
}
322334
}
335+
336+
logger.Info("registry service ensured successfully")
323337
return nil
324338
}
325339

@@ -581,6 +595,7 @@ func (c *GrpcRegistryReconciler) removePods(pods []*corev1.Pod, namespace string
581595

582596
// CheckRegistryServer returns true if the given CatalogSource is considered healthy; false otherwise.
583597
func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catalogSource *v1alpha1.CatalogSource) (bool, error) {
598+
logger.Info("checking registry server")
584599
source := grpcCatalogSourceDecorator{CatalogSource: catalogSource, createPodAsUser: c.createPodAsUser, opmImage: c.opmImage, utilImage: c.utilImage}
585600

586601
// The CheckRegistryServer function is called by the CatalogSoruce controller before the registry resources are created,
@@ -589,6 +604,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
589604
serviceAccount := source.ServiceAccount()
590605
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
591606
if err != nil {
607+
logger.WithError(err).Infof("registry service not healthy: could not get service account")
592608
if !apierrors.IsNotFound(err) {
593609
return false, err
594610
}
@@ -597,27 +613,40 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
597613

598614
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
599615
if err != nil {
616+
logger.WithError(err).Info("registry service not healthy: could not get registry pod security config")
600617
return false, err
601618
}
602619

603620
// Check on registry resources
604621
// TODO: add gRPC health check
605622
service, err := c.currentService(source)
606623
if err != nil {
624+
logger.WithError(err).Info("catalog registry service not healthy: could not get current service")
607625
return false, err
608626
}
609627
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
610628
if err != nil {
629+
logger.WithError(err).Info("catalog registry service not healthy: could not get current pods")
611630
return false, err
612631
}
632+
633+
currentServiceAccount := c.currentServiceAccount(source)
613634
if len(currentPods) < 1 ||
614-
service == nil || c.currentServiceAccount(source) == nil {
635+
service == nil || currentServiceAccount == nil {
636+
logger.WithFields(logrus.Fields{
637+
"numCurrentPods": len(currentPods),
638+
"isServiceNil": service == nil,
639+
"isServiceAccountNil": serviceAccount == nil,
640+
}).Info("catalog registry service not healthy: one or more required resources are missing")
615641
return false, nil
616642
}
617643
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
618644
if e != nil {
645+
logger.WithError(e).Info("catalog registry service not healthy: could not detect and delete dead pods")
619646
return false, fmt.Errorf("error deleting dead pods: %v", e)
620647
}
648+
649+
logger.WithField("podsAreAlive", podsAreLive).Info("registry server is healthy")
621650
return podsAreLive, nil
622651
}
623652

0 commit comments

Comments
 (0)