Skip to content

Commit a5ec49a

Browse files
author
Per Goncalves da Silva
committed
Add more logging around catalog source sync
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent efe9779 commit a5ec49a

File tree

2 files changed

+94
-14
lines changed

2 files changed

+94
-14
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,7 @@ func (o *Operator) handleCatSrcDeletion(obj interface{}) {
897897
}
898898

899899
func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, _ error) {
900+
logger.Info("validating catalog source type")
900901
out = in
901902
var err error
902903
switch sourceType := out.Spec.SourceType; sourceType {
@@ -912,41 +913,46 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
912913
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
913914
}
914915
if err != nil {
916+
logger.WithError(err).Error("error validating catalog source type")
915917
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
916918
return
917919
}
918920

919921
// The sourceType is valid, clear all status (other than status conditions array) if there's existing invalid spec reason
920922
if out.Status.Reason == v1alpha1.CatalogSourceSpecInvalidError {
923+
logger.Info("catalog source has invalid spec")
921924
out.Status = v1alpha1.CatalogSourceStatus{
922925
Conditions: out.Status.Conditions,
923926
}
924927
}
925928
continueSync = true
926929

930+
logger.Info("catalog source type validated")
927931
return
928932
}
929933

930934
func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
935+
logger = logger.WithFields(logrus.Fields{
936+
"configmap.namespace": in.Namespace,
937+
"configmap.name": in.Spec.ConfigMap,
938+
})
939+
logger.Info("checking catsrc configmap state")
940+
931941
out = in
932942
if !(in.Spec.SourceType == v1alpha1.SourceTypeInternal || in.Spec.SourceType == v1alpha1.SourceTypeConfigmap) {
933943
continueSync = true
944+
logger.Info("catalog type is neither internal not configmap - bailing without error")
934945
return
935946
}
936947

937948
out = in.DeepCopy()
938949

939-
logger = logger.WithFields(logrus.Fields{
940-
"configmap.namespace": in.Namespace,
941-
"configmap.name": in.Spec.ConfigMap,
942-
})
943-
logger.Info("checking catsrc configmap state")
944-
945950
var updateLabel bool
946951
// Get the catalog source's config map
947952
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
948953
// Attempt to look up the CM via api call if there is a cache miss
949954
if apierrors.IsNotFound(err) {
955+
logger.Info("configmap not found in cache - attempting with live client")
950956
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
951957
// Found cm in the cluster, add managed label to configmap
952958
if err == nil {
@@ -961,6 +967,7 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
961967
}
962968
}
963969
if err != nil {
970+
logger.WithError(err).Error("error getting configmap")
964971
syncError = fmt.Errorf("failed to get catalog config map %s: %s", in.Spec.ConfigMap, err)
965972
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
966973
return
@@ -969,11 +976,11 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
969976
if wasOwned := ownerutil.EnsureOwner(configMap, in); !wasOwned || updateLabel {
970977
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(configMap.GetNamespace()).Update(context.TODO(), configMap, metav1.UpdateOptions{})
971978
if err != nil {
979+
logger.WithError(err).Error("error updating configmap")
972980
syncError = fmt.Errorf("unable to write owner onto catalog source configmap - %v", err)
973981
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
974982
return
975983
}
976-
977984
logger.Info("adopted configmap")
978985
}
979986

@@ -992,30 +999,39 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
992999
}
9931000

9941001
continueSync = true
1002+
logger.Info("configmap catalog source sync'ed successfully")
9951003
return
9961004
}
9971005

9981006
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
9991007
out = in.DeepCopy()
10001008

1001-
logger.Info("synchronizing registry server")
1009+
logger = logger.WithField("step", "syncRegistryServer")
1010+
logger.Info("registry server sync start")
1011+
defer func() {
1012+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("registry server sync end")
1013+
}()
10021014
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
1015+
1016+
logger.Info("getting reconciler for catalog source")
10031017
srcReconciler := o.reconciler.ReconcilerForSource(in)
10041018
if srcReconciler == nil {
1005-
// TODO: Add failure status on catalogsource and remove from sources
1019+
// TODO: Add failure status on catalogsource and remove from sources )
10061020
syncError = fmt.Errorf("no reconciler for source type %s", in.Spec.SourceType)
10071021
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10081022
return
10091023
}
10101024

1025+
logger.Info("checking registry server health")
10111026
healthy, err := srcReconciler.CheckRegistryServer(logger, in)
10121027
if err != nil {
10131028
syncError = err
10141029
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10151030
return
10161031
}
10171032

1018-
logger.WithField("health", healthy).Infof("checked registry server health")
1033+
logger = logger.WithField("health", healthy)
1034+
logger.Info("checked registry server health")
10191035

10201036
if healthy && in.Status.RegistryServiceStatus != nil {
10211037
logger.Info("registry state good")
@@ -1052,7 +1068,9 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10521068
return
10531069
}
10541070
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1055-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1071+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1072+
logger.WithError(err).Info("failed to parse registry poll interval")
1073+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10561074
}
10571075
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10581076
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
@@ -1061,13 +1079,18 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10611079
}
10621080

10631081
if err := o.sources.Remove(sourceKey); err != nil {
1064-
o.logger.WithError(err).Debug("error closing client connection")
1082+
o.logger.WithError(err).Info("error closing client connection")
10651083
}
10661084

10671085
return
10681086
}
10691087

10701088
func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
1089+
logger = logger.WithField("step", "syncConnection")
1090+
logger.Info("sync connection start")
1091+
defer func() {
1092+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("sync connection end")
1093+
}()
10711094
out = in.DeepCopy()
10721095

10731096
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
@@ -1101,6 +1124,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11011124

11021125
source := o.sources.GetMeta(sourceKey)
11031126
if source == nil {
1127+
logger.Info("source not found - attempting to establish connection")
11041128
source, syncError = connectFunc()
11051129
if syncError != nil {
11061130
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1109,10 +1133,15 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11091133

11101134
// Set connection status and return.
11111135
updateConnectionStateFunc(out, source)
1136+
logger.Info("successfully created source and updated connection state")
11121137
return
11131138
}
11141139

11151140
if source.Address != address {
1141+
logger.WithFields(logrus.Fields{
1142+
"source.address": source.Address,
1143+
"catalogsource.address": address,
1144+
}).Info("source and catalog source addresses differ - creating a new source")
11161145
source, syncError = connectFunc()
11171146
if syncError != nil {
11181147
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1125,6 +1154,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11251154

11261155
// GRPCConnectionState update must fail before
11271156
if out.Status.GRPCConnectionState == nil {
1157+
logger.Info("connection state is nil - attempting to update connection state")
11281158
updateConnectionStateFunc(out, source)
11291159
}
11301160

@@ -1152,7 +1182,10 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11521182
"catalogsource.name": catsrc.Name,
11531183
"id": queueinformer.NewLoopID(),
11541184
})
1155-
logger.Info("syncing catalog source")
1185+
logger.Info("catalog source sync start")
1186+
defer func() {
1187+
logger.WithError(syncError).Info("catalog source sync end")
1188+
}()
11561189

11571190
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11581191
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,18 @@ func correctImages(source grpcCatalogSourceDecorator, pod *corev1.Pod) bool {
247247

248248
// EnsureRegistryServer ensures that all components of registry server are up to date.
249249
func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, catalogSource *v1alpha1.CatalogSource) error {
250+
logger = logger.WithField("step", "ensureRegistryServer")
251+
logger.Info("ensure registry server start")
252+
defer func() {
253+
logger.Info("ensure registry service end")
254+
}()
250255
source := grpcCatalogSourceDecorator{CatalogSource: catalogSource, createPodAsUser: c.createPodAsUser, opmImage: c.opmImage, utilImage: c.utilImage}
251256

252257
// if service status is nil, we force create every object to ensure they're created the first time
258+
logger.Info("validating registry service status")
253259
valid, err := isRegistryServiceStatusValid(&source)
254260
if err != nil {
261+
logger.WithError(err).Info("couldn't validate registry service status")
255262
return err
256263
}
257264
overwrite := !valid
@@ -260,24 +267,30 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
260267
}
261268

262269
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
270+
logger.Info("ensuring registry server service account")
263271
sa, err := c.ensureSA(source)
264272
if err != nil && !apierrors.IsAlreadyExists(err) {
273+
logger.WithError(err).Error("couldn't ensure registry service account")
265274
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266275
}
267276

268277
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269278
if err != nil {
279+
logger.WithError(err).Error("couldn't get registry service account")
270280
return err
271281
}
272282

283+
logger.Info("ensuring registry server pod")
273284
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274285
if err != nil {
286+
logger.WithError(err).Error("couldn't get default pod security config")
275287
return err
276288
}
277289

278290
// recreate the pod if no existing pod is serving the latest image or correct spec
279291
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280292
if err != nil {
293+
logger.WithError(err).Error("couldn't get current pods with correct image and spec")
281294
return err
282295
}
283296
overwritePod := overwrite || len(current) == 0
@@ -287,29 +300,39 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
287300

288301
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289302
if err != nil {
303+
logger.WithError(err).Error("couldn't create registry pod")
290304
return err
291305
}
292306
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
307+
logger.WithError(err).Error("couldn't ensure registry pod")
293308
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294309
}
295310
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
311+
logger.WithError(err).Info("error ensuring registry server pod")
296312
if _, ok := err.(UpdateNotReadyErr); ok {
313+
logger.WithError(err).Error("ensure update pod error is not of type UpdateNotReadyErr")
297314
return err
298315
}
299316
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300317
}
318+
319+
logger.Info("ensuring registry server service")
301320
service, err := source.Service()
302321
if err != nil {
322+
logger.WithError(err).Error("couldn't get service")
303323
return err
304324
}
305325
if err := c.ensureService(source, overwrite); err != nil {
326+
logger.WithError(err).Error("couldn't ensure service")
306327
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307328
}
308329

309330
if overwritePod {
331+
logger.Info("overwriting pod registry service status")
310332
now := c.now()
311333
service, err := source.Service()
312334
if err != nil {
335+
logger.WithError(err).Error("couldn't get service")
313336
return err
314337
}
315338
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -581,14 +604,22 @@ func (c *GrpcRegistryReconciler) removePods(pods []*corev1.Pod, namespace string
581604

582605
// CheckRegistryServer returns true if the given CatalogSource is considered healthy; false otherwise.
583606
func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catalogSource *v1alpha1.CatalogSource) (bool, error) {
607+
logger = logger.WithField("step", "checkRegistryServer")
608+
logger.Info("check registry server start")
609+
defer func() {
610+
logger.Info("check registry server end")
611+
}()
612+
584613
source := grpcCatalogSourceDecorator{CatalogSource: catalogSource, createPodAsUser: c.createPodAsUser, opmImage: c.opmImage, utilImage: c.utilImage}
585614

586615
// The CheckRegistryServer function is called by the CatalogSoruce controller before the registry resources are created,
587616
// returning a IsNotFound error will cause the controller to exit and never create the resources, so we should
588617
// only return an error if it is something other than a NotFound error.
618+
logger.Info("getting registry server resources (sa, svc, pods)")
589619
serviceAccount := source.ServiceAccount()
590620
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
591621
if err != nil {
622+
logger.WithError(err).Infof("registry service not healthy: could not get service account")
592623
if !apierrors.IsNotFound(err) {
593624
return false, err
594625
}
@@ -597,27 +628,43 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
597628

598629
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
599630
if err != nil {
631+
logger.WithError(err).Info("registry service not healthy: could not get registry pod security config")
600632
return false, err
601633
}
602634

603635
// Check on registry resources
604636
// TODO: add gRPC health check
605637
service, err := c.currentService(source)
606638
if err != nil {
639+
logger.WithError(err).Info("registry service not healthy: could not get current service")
607640
return false, err
608641
}
642+
609643
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
610644
if err != nil {
645+
logger.WithError(err).Info("registry service not healthy: could not get current pods")
611646
return false, err
612647
}
648+
649+
logger.Info("checking registry server resources")
650+
currentServiceAccount := c.currentServiceAccount(source)
613651
if len(currentPods) < 1 ||
614-
service == nil || c.currentServiceAccount(source) == nil {
652+
service == nil || currentServiceAccount == nil {
653+
logger.WithFields(logrus.Fields{
654+
"numCurrentPods": len(currentPods),
655+
"isServiceNil": service == nil,
656+
"isServiceAccountNil": serviceAccount == nil,
657+
}).Info("registry service not healthy: one or more required resources are missing")
615658
return false, nil
616659
}
660+
661+
logger.Info("cleaning up registry server dead pods")
617662
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
618663
if e != nil {
664+
logger.WithError(e).Info("registry service not healthy: could not detect and delete dead pods")
619665
return false, fmt.Errorf("error deleting dead pods: %v", e)
620666
}
667+
621668
return podsAreLive, nil
622669
}
623670

0 commit comments

Comments
 (0)