8
8
"sync"
9
9
"time"
10
10
11
+ "github.com/operator-framework/operator-registry/pkg/api/grpc_health_v1"
11
12
registryclient "github.com/operator-framework/operator-registry/pkg/client"
12
13
errorwrap "github.com/pkg/errors"
13
14
"github.com/sirupsen/logrus"
@@ -302,6 +303,10 @@ func (o *Operator) handleCatSrcDeletion(obj interface{}) {
302
303
delete (o .sources , sourceKey )
303
304
}()
304
305
o .Log .WithField ("source" , sourceKey ).Info ("removed client for deleted catalogsource" )
306
+
307
+ if err := o .catSrcQueueSet .Remove (sourceKey .Name , sourceKey .Namespace ); err != nil {
308
+ o .Log .WithError (err )
309
+ }
305
310
}
306
311
307
312
func (o * Operator ) syncCatalogSources (obj interface {}) (syncError error ) {
@@ -314,18 +319,21 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
314
319
logger := o .Log .WithFields (logrus.Fields {
315
320
"source" : catsrc .GetName (),
316
321
})
317
-
322
+ logger . Debug ( "syncing catsrc" )
318
323
out := catsrc .DeepCopy ()
319
324
sourceKey := resolver.CatalogKey {Name : catsrc .GetName (), Namespace : catsrc .GetNamespace ()}
320
325
321
326
if catsrc .Spec .SourceType == v1alpha1 .SourceTypeInternal || catsrc .Spec .SourceType == v1alpha1 .SourceTypeConfigmap {
327
+ logger .Debug ("checking catsrc configmap state" )
328
+
322
329
// Get the catalog source's config map
323
330
configMap , err := o .lister .CoreV1 ().ConfigMapLister ().ConfigMaps (catsrc .GetNamespace ()).Get (catsrc .Spec .ConfigMap )
324
331
if err != nil {
325
332
return fmt .Errorf ("failed to get catalog config map %s: %s" , catsrc .Spec .ConfigMap , err )
326
333
}
327
334
328
335
if catsrc .Status .ConfigMapResource == nil || catsrc .Status .ConfigMapResource .UID != configMap .GetUID () || catsrc .Status .ConfigMapResource .ResourceVersion != configMap .GetResourceVersion () {
336
+ logger .Debug ("updating catsrc configmap state" )
329
337
// configmap ref nonexistent or updated, write out the new configmap ref to status and exit
330
338
out .Status .ConfigMapResource = & v1alpha1.ConfigMapResourceReference {
331
339
Name : configMap .GetName (),
@@ -351,26 +359,33 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
351
359
return fmt .Errorf ("no reconciler for source type %s" , catsrc .Spec .SourceType )
352
360
}
353
361
362
+ logger .Debug ("catsrc configmap state good, checking registry pod" )
363
+
354
364
// if registry pod hasn't been created or hasn't been updated since the last configmap update, recreate it
355
365
if catsrc .Status .RegistryServiceStatus == nil || catsrc .Status .RegistryServiceStatus .CreatedAt .Before (& catsrc .Status .LastSync ) {
366
+ logger .Debug ("registry pod scheduled for recheck" )
367
+
356
368
if err := reconciler .EnsureRegistryServer (out ); err != nil {
357
369
logger .WithError (err ).Warn ("couldn't ensure registry server" )
358
370
return err
359
371
}
372
+ logger .Debug ("ensured registry pod" )
360
373
361
- if ! catsrc .Status .LastSync .Before (& out .Status .LastSync ) {
362
- return nil
363
- }
374
+ out .Status .RegistryServiceStatus .CreatedAt = timeNow ()
375
+ out .Status .LastSync = timeNow ()
364
376
377
+ logger .Debug ("updating catsrc status" )
365
378
// update status
366
379
if _ , err := o .client .OperatorsV1alpha1 ().CatalogSources (out .GetNamespace ()).UpdateStatus (out ); err != nil {
367
380
return err
368
381
}
369
382
370
383
o .sourcesLastUpdate = timeNow ()
384
+ logger .Debug ("registry pod recreated" )
371
385
372
386
return nil
373
387
}
388
+ logger .Debug ("registry pod state good" )
374
389
375
390
// update operator's view of sources
376
391
sourcesUpdated := false
@@ -379,7 +394,9 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
379
394
defer o .sourcesLock .Unlock ()
380
395
address := catsrc .Status .RegistryServiceStatus .Address ()
381
396
currentSource , ok := o .sources [sourceKey ]
382
- if ! ok || currentSource .Address != address {
397
+ logger = logger .WithField ("currentSource" , sourceKey )
398
+ if ! ok || currentSource .Address != address || catsrc .Status .LastSync .After (currentSource .LastConnect .Time ) {
399
+ logger .Info ("building connection to registry" )
383
400
client , err := registryclient .NewClient (address )
384
401
if err != nil {
385
402
logger .WithError (err ).Warn ("couldn't connect to registry" )
@@ -388,8 +405,42 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
388
405
Address : address ,
389
406
Client : client ,
390
407
LastConnect : timeNow (),
408
+ LastHealthy : metav1.Time {}, // haven't detected healthy yet
391
409
}
392
410
o .sources [sourceKey ] = sourceRef
411
+ currentSource = sourceRef
412
+ sourcesUpdated = true
413
+ }
414
+ if currentSource .LastHealthy .IsZero () {
415
+ logger .Info ("client hasn't yet become healthy, attempt a health check" )
416
+ client , ok := currentSource .Client .(* registryclient.Client )
417
+ if ! ok {
418
+ logger .WithField ("client" , currentSource .Client ).Warn ("unexpected client" )
419
+ return
420
+ }
421
+ res , err := client .Health .Check (context .TODO (), & grpc_health_v1.HealthCheckRequest {Service : "Registry" })
422
+ if err != nil {
423
+ logger .WithError (err ).Debug ("error checking health" )
424
+ if client .Conn .GetState () == connectivity .TransientFailure {
425
+ logger .Debug ("wait for state to change" )
426
+ ctx , _ := context .WithTimeout (context .TODO (), 1 * time .Second )
427
+ if ! client .Conn .WaitForStateChange (ctx , connectivity .TransientFailure ) {
428
+ logger .Debug ("state didn't change, trigger reconnect. this may happen when cached dns is wrong." )
429
+ delete (o .sources , sourceKey )
430
+ if err := o .catSrcQueueSet .Requeue (sourceKey .Name , sourceKey .Namespace ); err != nil {
431
+ logger .WithError (err ).Debug ("error requeueing" )
432
+ }
433
+ return
434
+ }
435
+ }
436
+ return
437
+ }
438
+ if res .Status != grpc_health_v1 .HealthCheckResponse_SERVING {
439
+ logger .WithField ("status" , res .Status .String ()).Debug ("source not healthy" )
440
+ return
441
+ }
442
+ currentSource .LastHealthy = timeNow ()
443
+ o .sources [sourceKey ] = currentSource
393
444
sourcesUpdated = true
394
445
}
395
446
}()
@@ -484,10 +535,12 @@ func (o *Operator) syncSubscriptions(obj interface{}) error {
484
535
485
536
installplanReference , err := o .createInstallPlan (namespace , subs , installPlanApproval , steps )
486
537
if err != nil {
538
+ logger .WithError (err ).Debug ("error creating installplan" )
487
539
return err
488
540
}
489
541
490
542
if err := o .ensureSubscriptionInstallPlanState (namespace , subs , installplanReference ); err != nil {
543
+ logger .WithError (err ).Debug ("error ensuring subscription installplan state" )
491
544
return err
492
545
}
493
546
return nil
@@ -500,6 +553,14 @@ func (o *Operator) ensureResolverSources(logger *logrus.Entry, namespace string)
500
553
o .sourcesLock .RLock ()
501
554
defer o .sourcesLock .RUnlock ()
502
555
for k , ref := range o .sources {
556
+ if ref .LastHealthy .IsZero () {
557
+ logger = logger .WithField ("source" , k )
558
+ logger .Debug ("omitting source, hasn't yet become healthy" )
559
+ if err := o .catSrcQueueSet .Requeue (k .Name , k .Namespace ); err != nil {
560
+ logger .Warn ("error requeueing" )
561
+ }
562
+ continue
563
+ }
503
564
// only resolve in namespace local + global catalogs
504
565
if k .Namespace == namespace || k .Namespace == o .namespace {
505
566
resolverSources [k ] = ref .Client
@@ -513,6 +574,7 @@ func (o *Operator) ensureResolverSources(logger *logrus.Entry, namespace string)
513
574
logger .Warn ("unexpected client" )
514
575
continue
515
576
}
577
+
516
578
logger = logger .WithField ("resolverSource" , k )
517
579
logger .WithField ("clientState" , client .Conn .GetState ()).Debug ("source" )
518
580
if client .Conn .GetState () == connectivity .TransientFailure {
@@ -837,7 +899,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
837
899
838
900
// Attempt to create the Subscription
839
901
sub .SetNamespace (namespace )
840
- _ , err = o .client .OperatorsV1alpha1 ().Subscriptions (sub .GetNamespace ()).Create (& sub )
902
+ created , err : = o .client .OperatorsV1alpha1 ().Subscriptions (sub .GetNamespace ()).Create (& sub )
841
903
if k8serrors .IsAlreadyExists (err ) {
842
904
// If it already existed, mark the step as Present.
843
905
plan .Status .Plan [i ].Status = v1alpha1 .StepStatusPresent
@@ -846,6 +908,15 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
846
908
} else {
847
909
// If no error occurred, mark the step as Created.
848
910
plan .Status .Plan [i ].Status = v1alpha1 .StepStatusCreated
911
+ created .Status .Install = & v1alpha1.InstallPlanReference {
912
+ UID : plan .GetUID (),
913
+ Name : plan .GetName (),
914
+ APIVersion : v1alpha1 .SchemeGroupVersion .String (),
915
+ Kind : v1alpha1 .InstallPlanKind ,
916
+ }
917
+ if _ , err := o .client .OperatorsV1alpha1 ().Subscriptions (sub .GetNamespace ()).UpdateStatus (created ); err != nil {
918
+ o .Log .WithError (err ).Warn ("couldn't set installplan reference on created subscription" )
919
+ }
849
920
}
850
921
case secretKind :
851
922
// TODO: this will confuse bundle users that include secrets in their bundles - this only handles pull secrets
0 commit comments