Skip to content

Commit b013769

Browse files
Bug fix - Service Monitor/Pod Monitor in TargetAllocator doesnt pick up updates to secrets (#4608)
* add secret informer changes * add secret informer changes * add test and fix error in error logging * add change log entry * updating test and changing to debug * clean up * refactor code * add e2e tests * remove password check * fix tests * fix test * simplify tests * fix basic auth struct * fix linting issues * fix unit tests * fix golang issues * fix lint errors * revert unnecessary changes * remove sleep * fix unit test * trigger test * retrigger tests * adding required permissions * add exclusiojs * change role * revert secret changes * revert rbac
1 parent 20f2a47 commit b013769

File tree

11 files changed

+737
-36
lines changed

11 files changed

+737
-36
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
2+
change_type: bug_fix
3+
4+
# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
5+
component: target allocator
6+
7+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
8+
note: Fix Service Monitor/Pod Monitor in TargetAllocator doesnt pick up updates to secrets
9+
10+
# One or more tracking issues related to the change
11+
issues: [4091]
12+
13+
# (Optional) One or more lines of additional information to render under the primary note.
14+
# These lines will be padded with 2 spaces and then inserted directly into the document.
15+
# Use pipe (|) for multiline entries.
16+
subtext: |
17+
Fixes issue where service/pod Monitors don't pick up updates to secrets.

cmd/otel-allocator/internal/watcher/promOperator.go

Lines changed: 164 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/blang/semver/v4"
1515
"github.com/go-logr/logr"
16+
promMonitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring"
1617
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
1718
promv1alpha1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
1819
"github.com/prometheus-operator/prometheus-operator/pkg/assets"
@@ -31,6 +32,7 @@ import (
3132
"k8s.io/apimachinery/pkg/runtime/schema"
3233
"k8s.io/client-go/discovery"
3334
"k8s.io/client-go/kubernetes"
35+
"k8s.io/client-go/metadata"
3436
"k8s.io/client-go/rest"
3537
"k8s.io/client-go/tools/cache"
3638
"k8s.io/client-go/util/retry"
@@ -54,11 +56,21 @@ func NewPrometheusCRWatcher(
5456
slogger := slog.New(logr.ToSlogHandler(logger))
5557
var resourceSelector *prometheus.ResourceSelector
5658

59+
mdClient, err := metadata.NewForConfig(cfg.ClusterConfig)
60+
if err != nil {
61+
return nil, err
62+
}
5763
allowList, denyList := cfg.PrometheusCR.GetAllowDenyLists()
5864

59-
factory := informers.NewMonitoringInformerFactories(allowList, denyList, monitoringclient, allocatorconfig.DefaultResyncTime, nil)
65+
monitoringInformerFactory := informers.NewMonitoringInformerFactories(allowList, denyList, monitoringclient, allocatorconfig.DefaultResyncTime, nil)
6066

61-
monitoringInformers, err := getInformers(factory, cfg.ClusterConfig, promLogger)
67+
// Scope the metadata informer factory to the collector namespace only.
68+
// This is used for the secrets informer so that it only needs namespace-scoped RBAC
69+
// (a Role in kube-system) rather than cluster-wide secrets list/watch access.
70+
secretsAllowList := map[string]struct{}{cfg.CollectorNamespace: {}}
71+
metaDataInformerFactory := informers.NewMetadataInformerFactory(secretsAllowList, denyList, mdClient, allocatorconfig.DefaultResyncTime, nil)
72+
73+
monitoringInformers, err := getInformers(monitoringInformerFactory, cfg.ClusterConfig, promLogger, metaDataInformerFactory)
6274
if err != nil {
6375
return nil, err
6476
}
@@ -197,7 +209,7 @@ func checkCRDAvailability(dcl discovery.DiscoveryInterface, resourceName string)
197209

198210
apiGroups := apiList.Groups
199211
for _, group := range apiGroups {
200-
if group.Name == "monitoring.coreos.com" {
212+
if group.Name == promMonitoring.GroupName {
201213
for _, version := range group.Versions {
202214
resources, err := dcl.ServerResourcesForGroupVersion(version.GroupVersion)
203215
if err != nil {
@@ -247,7 +259,7 @@ func createInformerIfAvailable(
247259
}
248260

249261
// getInformers returns a map of informers for the given resources.
250-
func getInformers(factory informers.FactoriesForNamespaces, clusterConfig *rest.Config, logger *slog.Logger) (map[string]*informers.ForResource, error) {
262+
func getInformers(factory informers.FactoriesForNamespaces, clusterConfig *rest.Config, logger *slog.Logger, metaDataInformerFactory informers.FactoriesForNamespaces) (map[string]*informers.ForResource, error) {
251263
informersMap := make(map[string]*informers.ForResource)
252264

253265
// Get the discovery client
@@ -308,6 +320,16 @@ func getInformers(factory informers.FactoriesForNamespaces, clusterConfig *rest.
308320
informersMap[promv1alpha1.ScrapeConfigName] = scrapeConfigInformer
309321
}
310322

323+
// Use the namespace-scoped secrets metadata informer factory so that secrets
324+
// list/watch only requires a namespaced Role instead of cluster-wide access.
325+
secretInformers, err := informers.NewInformersForResourceWithTransform(metaDataInformerFactory, v1.SchemeGroupVersion.WithResource(string(v1.ResourceSecrets)), informers.PartialObjectMetadataStrip(operator.SecretGVK()))
326+
if err != nil {
327+
return nil, err
328+
}
329+
if secretInformers != nil {
330+
informersMap[string(v1.ResourceSecrets)] = secretInformers
331+
}
332+
311333
return informersMap, nil
312334
}
313335

@@ -374,29 +396,62 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, _ chan error) err
374396
continue
375397
}
376398

377-
// only send an event notification if there isn't one already
378-
resource.AddEventHandler(cache.ResourceEventHandlerFuncs{
379-
// these functions only write to the notification channel if it's empty to avoid blocking
380-
// if scrape config updates are being rate-limited
381-
AddFunc: func(any) {
382-
select {
383-
case notifyEvents <- struct{}{}:
384-
default:
385-
}
386-
},
387-
UpdateFunc: func(any, any) {
388-
select {
389-
case notifyEvents <- struct{}{}:
390-
default:
391-
}
392-
},
393-
DeleteFunc: func(any) {
394-
select {
395-
case notifyEvents <- struct{}{}:
396-
default:
397-
}
398-
},
399-
})
399+
// Use a custom event handler for secrets since secret update requires asset store to be updated so that CRs can pick up updated secrets.
400+
if name == string(v1.ResourceSecrets) {
401+
w.logger.Info("Using custom event handler for secrets informer", "informer", name)
402+
// only send an event notification if there isn't one already
403+
resource.AddEventHandler(cache.ResourceEventHandlerFuncs{
404+
// these functions only write to the notification channel if it's empty to avoid blocking
405+
// if scrape config updates are being rate-limited
406+
AddFunc: func(_ any) {
407+
select {
408+
case notifyEvents <- struct{}{}:
409+
default:
410+
}
411+
},
412+
UpdateFunc: func(oldObj, newObj any) {
413+
if w.handleSecretUpdate(oldObj, newObj) {
414+
select {
415+
case notifyEvents <- struct{}{}:
416+
default:
417+
}
418+
}
419+
},
420+
DeleteFunc: func(obj any) {
421+
if w.handleSecretDelete(obj) {
422+
select {
423+
case notifyEvents <- struct{}{}:
424+
default:
425+
}
426+
}
427+
},
428+
})
429+
} else {
430+
w.logger.Info("Using default event handler for informer", "informer", name)
431+
// only send an event notification if there isn't one already
432+
resource.AddEventHandler(cache.ResourceEventHandlerFuncs{
433+
// these functions only write to the notification channel if it's empty to avoid blocking
434+
// if scrape config updates are being rate-limited
435+
AddFunc: func(any) {
436+
select {
437+
case notifyEvents <- struct{}{}:
438+
default:
439+
}
440+
},
441+
UpdateFunc: func(any, any) {
442+
select {
443+
case notifyEvents <- struct{}{}:
444+
default:
445+
}
446+
},
447+
DeleteFunc: func(any) {
448+
select {
449+
case notifyEvents <- struct{}{}:
450+
default:
451+
}
452+
},
453+
})
454+
}
400455
}
401456
if !success {
402457
return errors.New("failed to sync one of the caches")
@@ -409,6 +464,88 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, _ chan error) err
409464
return nil
410465
}
411466

467+
// handleSecretUpdate handles secret update events and returns true if the config needs to be reloaded.
468+
func (w *PrometheusCRWatcher) handleSecretUpdate(oldObj, newObj any) bool {
469+
oldMeta, _ := oldObj.(metav1.ObjectMetaAccessor)
470+
newMeta, _ := newObj.(metav1.ObjectMetaAccessor)
471+
secretName := newMeta.GetObjectMeta().GetName()
472+
secretNamespace := newMeta.GetObjectMeta().GetNamespace()
473+
474+
_, exists, err := w.store.GetObject(&v1.Secret{
475+
ObjectMeta: metav1.ObjectMeta{
476+
Name: secretName,
477+
Namespace: secretNamespace,
478+
},
479+
})
480+
if !exists || err != nil {
481+
if err != nil {
482+
w.logger.Debug("unexpected store error when checking if secret exists, skipping update", "secret", secretName, "error", err)
483+
}
484+
// if the secret does not exist in the store, we skip the update
485+
return false
486+
}
487+
488+
newSecret, err := w.store.GetSecretClient().Secrets(secretNamespace).Get(context.Background(), secretName, metav1.GetOptions{})
489+
if err != nil {
490+
w.logger.Debug("unexpected store error when getting updated secret", "secret", secretName, "error", err)
491+
return false
492+
}
493+
494+
w.logger.Debug("Updating secret in store", "newObjName", newMeta.GetObjectMeta().GetName(), "newobjnamespace", newMeta.GetObjectMeta().GetNamespace())
495+
if err := w.store.UpdateObject(newSecret); err != nil {
496+
w.logger.Debug("unexpected store error when updating secret", "secret", newMeta.GetObjectMeta().GetName(), "error", err)
497+
return false
498+
}
499+
500+
w.logger.Debug(
501+
"Successfully updated store, sending update event to notifyEvents channel",
502+
"oldObjName", oldMeta.GetObjectMeta().GetName(),
503+
"oldobjnamespace", oldMeta.GetObjectMeta().GetNamespace(),
504+
"newObjName", newMeta.GetObjectMeta().GetName(),
505+
"newobjnamespace", newMeta.GetObjectMeta().GetNamespace(),
506+
)
507+
return true
508+
}
509+
510+
// handleSecretDelete handles secret delete events and returns true if the config needs to be reloaded.
511+
func (w *PrometheusCRWatcher) handleSecretDelete(obj any) bool {
512+
secretMeta, _ := obj.(metav1.ObjectMetaAccessor)
513+
secretName := secretMeta.GetObjectMeta().GetName()
514+
secretNamespace := secretMeta.GetObjectMeta().GetNamespace()
515+
516+
// check if the secret exists in the store
517+
secretObj := &v1.Secret{
518+
ObjectMeta: metav1.ObjectMeta{
519+
Name: secretName,
520+
Namespace: secretNamespace,
521+
},
522+
}
523+
_, exists, err := w.store.GetObject(secretObj)
524+
// if the secret does not exist in the store, we skip the delete
525+
if !exists || err != nil {
526+
if err != nil {
527+
w.logger.Debug("unexpected store error when checking if secret exists, skipping delete", "secret", secretMeta.GetObjectMeta().GetName(), "error", err)
528+
}
529+
// if the secret does not exist in the store, we skip the delete
530+
return false
531+
}
532+
533+
w.logger.Debug("Deleting secret from store", "objName", secretMeta.GetObjectMeta().GetName(), "objnamespace", secretMeta.GetObjectMeta().GetNamespace())
534+
// if the secret exists in the store, we delete it
535+
// and send an event notification to the notifyEvents channel
536+
if err := w.store.DeleteObject(secretObj); err != nil {
537+
w.logger.Debug("unexpected store error when deleting secret", "secret", secretMeta.GetObjectMeta().GetName(), "error", err)
538+
return false
539+
}
540+
541+
w.logger.Debug(
542+
"Successfully removed secret from store, sending update event to notifyEvents channel",
543+
"objName", secretMeta.GetObjectMeta().GetName(),
544+
"objnamespace", secretMeta.GetObjectMeta().GetNamespace(),
545+
)
546+
return true
547+
}
548+
412549
// rateLimitedEventSender sends events to the upstreamEvents channel whenever it gets a notification on the notifyEvents channel,
413550
// but not more frequently than once per w.eventPeriod.
414551
func (w *PrometheusCRWatcher) rateLimitedEventSender(upstreamEvents chan Event, notifyEvents chan struct{}) {

0 commit comments

Comments
 (0)