Skip to content

Commit e13881e

Browse files
author
Per Goncalves da Silva
committed
Add more logging around catalog source sync
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent efe9779 commit e13881e

File tree

2 files changed

+97
-15
lines changed

2 files changed

+97
-15
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,11 @@ func (o *Operator) handleCatSrcDeletion(obj interface{}) {
897897
}
898898

899899
func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, _ error) {
900+
logger = logger.WithField("step", "validateSourceType")
901+
logger.Info("validate source type start")
902+
defer func() {
903+
logger.WithField("continueSync", continueSync).Info("validate source type end")
904+
}()
900905
out = in
901906
var err error
902907
switch sourceType := out.Spec.SourceType; sourceType {
@@ -912,41 +917,44 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
912917
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
913918
}
914919
if err != nil {
920+
logger.WithError(err).Error("error validating catalog source type")
915921
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
916922
return
917923
}
918924

919925
// The sourceType is valid, clear all status (other than status conditions array) if there's existing invalid spec reason
920926
if out.Status.Reason == v1alpha1.CatalogSourceSpecInvalidError {
927+
logger.Info("catalog source has invalid spec")
921928
out.Status = v1alpha1.CatalogSourceStatus{
922929
Conditions: out.Status.Conditions,
923930
}
924931
}
925932
continueSync = true
926-
927933
return
928934
}
929935

930936
func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
937+
logger = logger.WithFields(logrus.Fields{
938+
"configmap.namespace": in.Namespace,
939+
"configmap.name": in.Spec.ConfigMap,
940+
})
941+
logger.Info("checking catsrc configmap state")
942+
931943
out = in
932944
if !(in.Spec.SourceType == v1alpha1.SourceTypeInternal || in.Spec.SourceType == v1alpha1.SourceTypeConfigmap) {
933945
continueSync = true
946+
logger.Info("catalog type is neither internal not configmap - bailing without error")
934947
return
935948
}
936949

937950
out = in.DeepCopy()
938951

939-
logger = logger.WithFields(logrus.Fields{
940-
"configmap.namespace": in.Namespace,
941-
"configmap.name": in.Spec.ConfigMap,
942-
})
943-
logger.Info("checking catsrc configmap state")
944-
945952
var updateLabel bool
946953
// Get the catalog source's config map
947954
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
948955
// Attempt to look up the CM via api call if there is a cache miss
949956
if apierrors.IsNotFound(err) {
957+
logger.Info("configmap not found in cache - attempting with live client")
950958
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
951959
// Found cm in the cluster, add managed label to configmap
952960
if err == nil {
@@ -961,6 +969,7 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
961969
}
962970
}
963971
if err != nil {
972+
logger.WithError(err).Error("error getting configmap")
964973
syncError = fmt.Errorf("failed to get catalog config map %s: %s", in.Spec.ConfigMap, err)
965974
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
966975
return
@@ -969,11 +978,11 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
969978
if wasOwned := ownerutil.EnsureOwner(configMap, in); !wasOwned || updateLabel {
970979
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(configMap.GetNamespace()).Update(context.TODO(), configMap, metav1.UpdateOptions{})
971980
if err != nil {
981+
logger.WithError(err).Error("error updating configmap")
972982
syncError = fmt.Errorf("unable to write owner onto catalog source configmap - %v", err)
973983
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
974984
return
975985
}
976-
977986
logger.Info("adopted configmap")
978987
}
979988

@@ -992,30 +1001,39 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
9921001
}
9931002

9941003
continueSync = true
1004+
logger.Info("configmap catalog source sync'ed successfully")
9951005
return
9961006
}
9971007

9981008
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
9991009
out = in.DeepCopy()
10001010

1001-
logger.Info("synchronizing registry server")
1011+
logger = logger.WithField("step", "syncRegistryServer")
1012+
logger.Info("registry server sync start")
1013+
defer func() {
1014+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("registry server sync end")
1015+
}()
10021016
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
1017+
1018+
logger.Info("getting reconciler for catalog source")
10031019
srcReconciler := o.reconciler.ReconcilerForSource(in)
10041020
if srcReconciler == nil {
1005-
// TODO: Add failure status on catalogsource and remove from sources
1021+
// TODO: Add failure status on catalogsource and remove from sources )
10061022
syncError = fmt.Errorf("no reconciler for source type %s", in.Spec.SourceType)
10071023
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10081024
return
10091025
}
10101026

1027+
logger.Info("checking registry server health")
10111028
healthy, err := srcReconciler.CheckRegistryServer(logger, in)
10121029
if err != nil {
10131030
syncError = err
10141031
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
10151032
return
10161033
}
10171034

1018-
logger.WithField("health", healthy).Infof("checked registry server health")
1035+
logger = logger.WithField("health", healthy)
1036+
logger.Info("checked registry server health")
10191037

10201038
if healthy && in.Status.RegistryServiceStatus != nil {
10211039
logger.Info("registry state good")
@@ -1052,7 +1070,9 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10521070
return
10531071
}
10541072
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1055-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1073+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1074+
logger.WithError(err).Info("failed to parse registry poll interval")
1075+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10561076
}
10571077
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10581078
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
@@ -1061,13 +1081,18 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10611081
}
10621082

10631083
if err := o.sources.Remove(sourceKey); err != nil {
1064-
o.logger.WithError(err).Debug("error closing client connection")
1084+
o.logger.WithError(err).Info("error closing client connection")
10651085
}
10661086

10671087
return
10681088
}
10691089

10701090
func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
1091+
logger = logger.WithField("step", "syncConnection")
1092+
logger.Info("sync connection start")
1093+
defer func() {
1094+
logger.WithError(syncError).WithField("continueSync", continueSync).Info("sync connection end")
1095+
}()
10711096
out = in.DeepCopy()
10721097

10731098
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
@@ -1101,6 +1126,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11011126

11021127
source := o.sources.GetMeta(sourceKey)
11031128
if source == nil {
1129+
logger.Info("source not found - attempting to establish connection")
11041130
source, syncError = connectFunc()
11051131
if syncError != nil {
11061132
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1109,10 +1135,15 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11091135

11101136
// Set connection status and return.
11111137
updateConnectionStateFunc(out, source)
1138+
logger.Info("successfully created source and updated connection state")
11121139
return
11131140
}
11141141

11151142
if source.Address != address {
1143+
logger.WithFields(logrus.Fields{
1144+
"source.address": source.Address,
1145+
"catalogsource.address": address,
1146+
}).Info("source and catalog source addresses differ - creating a new source")
11161147
source, syncError = connectFunc()
11171148
if syncError != nil {
11181149
out.SetError(v1alpha1.CatalogSourceRegistryServerError, syncError)
@@ -1125,6 +1156,7 @@ func (o *Operator) syncConnection(logger *logrus.Entry, in *v1alpha1.CatalogSour
11251156

11261157
// GRPCConnectionState update must fail before
11271158
if out.Status.GRPCConnectionState == nil {
1159+
logger.Info("connection state is nil - attempting to update connection state")
11281160
updateConnectionStateFunc(out, source)
11291161
}
11301162

@@ -1152,7 +1184,10 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11521184
"catalogsource.name": catsrc.Name,
11531185
"id": queueinformer.NewLoopID(),
11541186
})
1155-
logger.Info("syncing catalog source")
1187+
logger.Info("catalog source sync start")
1188+
defer func() {
1189+
logger.WithError(syncError).Info("catalog source sync end")
1190+
}()
11561191

11571192
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11581193
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,18 @@ func correctImages(source grpcCatalogSourceDecorator, pod *corev1.Pod) bool {
247247

248248
// EnsureRegistryServer ensures that all components of registry server are up to date.
249249
func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, catalogSource *v1alpha1.CatalogSource) error {
250+
logger = logger.WithField("step", "ensureRegistryServer")
251+
logger.Info("ensure registry server start")
252+
defer func() {
253+
logger.Info("ensure registry service end")
254+
}()
250255
source := grpcCatalogSourceDecorator{CatalogSource: catalogSource, createPodAsUser: c.createPodAsUser, opmImage: c.opmImage, utilImage: c.utilImage}
251256

252257
// if service status is nil, we force create every object to ensure they're created the first time
258+
logger.Info("validating registry service status")
253259
valid, err := isRegistryServiceStatusValid(&source)
254260
if err != nil {
261+
logger.WithError(err).Info("couldn't validate registry service status")
255262
return err
256263
}
257264
overwrite := !valid
@@ -260,24 +267,30 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
260267
}
261268

262269
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
270+
logger.Info("ensuring registry server service account")
263271
sa, err := c.ensureSA(source)
264272
if err != nil && !apierrors.IsAlreadyExists(err) {
273+
logger.WithError(err).Error("couldn't ensure registry service account")
265274
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266275
}
267276

268277
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269278
if err != nil {
279+
logger.WithError(err).Error("couldn't get registry service account")
270280
return err
271281
}
272282

283+
logger.Info("ensuring registry server pod")
273284
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274285
if err != nil {
286+
logger.WithError(err).Error("couldn't get default pod security config")
275287
return err
276288
}
277289

278290
// recreate the pod if no existing pod is serving the latest image or correct spec
279291
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280292
if err != nil {
293+
logger.WithError(err).Error("couldn't get current pods with correct image and spec")
281294
return err
282295
}
283296
overwritePod := overwrite || len(current) == 0
@@ -287,29 +300,39 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
287300

288301
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289302
if err != nil {
303+
logger.WithError(err).Error("couldn't create registry pod")
290304
return err
291305
}
292306
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
307+
logger.WithError(err).Error("couldn't ensure registry pod")
293308
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294309
}
295310
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
311+
logger.WithError(err).Info("error ensuring registry server pod")
296312
if _, ok := err.(UpdateNotReadyErr); ok {
313+
logger.WithError(err).Error("ensure update pod error is not of type UpdateNotReadyErr")
297314
return err
298315
}
299316
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300317
}
318+
319+
logger.Info("ensuring registry server service")
301320
service, err := source.Service()
302321
if err != nil {
322+
logger.WithError(err).Error("couldn't get service")
303323
return err
304324
}
305325
if err := c.ensureService(source, overwrite); err != nil {
326+
logger.WithError(err).Error("couldn't ensure service")
306327
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307328
}
308329

309330
if overwritePod {
331+
logger.Info("overwriting pod registry service status")
310332
now := c.now()
311333
service, err := source.Service()
312334
if err != nil {
335+
logger.WithError(err).Error("couldn't get service")
313336
return err
314337
}
315338
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -581,14 +604,22 @@ func (c *GrpcRegistryReconciler) removePods(pods []*corev1.Pod, namespace string
581604

582605
// CheckRegistryServer returns true if the given CatalogSource is considered healthy; false otherwise.
583606
func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catalogSource *v1alpha1.CatalogSource) (bool, error) {
607+
logger = logger.WithField("step", "checkRegistryServer")
608+
logger.Info("check registry server start")
609+
defer func() {
610+
logger.Info("check registry server end")
611+
}()
612+
584613
source := grpcCatalogSourceDecorator{CatalogSource: catalogSource, createPodAsUser: c.createPodAsUser, opmImage: c.opmImage, utilImage: c.utilImage}
585614

586615
// The CheckRegistryServer function is called by the CatalogSoruce controller before the registry resources are created,
587616
// returning a IsNotFound error will cause the controller to exit and never create the resources, so we should
588617
// only return an error if it is something other than a NotFound error.
618+
logger.Info("getting registry server resources (sa, svc, pods)")
589619
serviceAccount := source.ServiceAccount()
590620
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
591621
if err != nil {
622+
logger.WithError(err).Infof("registry service not healthy: could not get service account")
592623
if !apierrors.IsNotFound(err) {
593624
return false, err
594625
}
@@ -597,27 +628,43 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
597628

598629
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
599630
if err != nil {
631+
logger.WithError(err).Info("registry service not healthy: could not get registry pod security config")
600632
return false, err
601633
}
602634

603635
// Check on registry resources
604636
// TODO: add gRPC health check
605637
service, err := c.currentService(source)
606638
if err != nil {
639+
logger.WithError(err).Info("registry service not healthy: could not get current service")
607640
return false, err
608641
}
642+
609643
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
610644
if err != nil {
645+
logger.WithError(err).Info("registry service not healthy: could not get current pods")
611646
return false, err
612647
}
648+
649+
logger.Info("checking registry server resources")
650+
currentServiceAccount := c.currentServiceAccount(source)
613651
if len(currentPods) < 1 ||
614-
service == nil || c.currentServiceAccount(source) == nil {
652+
service == nil || currentServiceAccount == nil {
653+
logger.WithFields(logrus.Fields{
654+
"numCurrentPods": len(currentPods),
655+
"isServiceNil": service == nil,
656+
"isServiceAccountNil": serviceAccount == nil,
657+
}).Info("registry service not healthy: one or more required resources are missing")
615658
return false, nil
616659
}
660+
661+
logger.Info("cleaning up registry server dead pods")
617662
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
618663
if e != nil {
664+
logger.WithError(e).Info("registry service not healthy: could not detect and delete dead pods")
619665
return false, fmt.Errorf("error deleting dead pods: %v", e)
620666
}
667+
621668
return podsAreLive, nil
622669
}
623670

0 commit comments

Comments
 (0)