Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/check-crd-availability.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
component: target allocator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: check CRD availability before registering informers

# One or more tracking issues related to the change
issues: [3987]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
3 changes: 3 additions & 0 deletions apis/v1beta1/targetallocator_rbac.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ var (
}, {
NonResourceURLs: []string{"/metrics"},
Verbs: []string{"get"},
}, {
NonResourceURLs: []string{"/api", "/api/*", "/apis", "/apis/*"},
Verbs: []string{"get"},
},
}
)
Expand Down
171 changes: 147 additions & 24 deletions cmd/otel-allocator/internal/watcher/promOperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ import (
"gopkg.in/yaml.v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/discovery"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/retry"

Expand Down Expand Up @@ -57,7 +60,7 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat

factory := informers.NewMonitoringInformerFactories(allowList, denyList, mClient, allocatorconfig.DefaultResyncTime, nil)

monitoringInformers, err := getInformers(factory)
monitoringInformers, err := getInformers(factory, cfg.ClusterConfig, promLogger)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -188,34 +191,126 @@ func getNamespaceInformer(ctx context.Context, allowList, denyList map[string]st

}

// checkCRDAvailability checks if a specific CRD is available in the cluster.
func checkCRDAvailability(dcl discovery.DiscoveryInterface, resourceName string) (bool, error) {
apiList, err := dcl.ServerGroups()
if err != nil {
return false, err
}

apiGroups := apiList.Groups
for _, group := range apiGroups {
if group.Name == "monitoring.coreos.com" {
for _, version := range group.Versions {
resources, err := dcl.ServerResourcesForGroupVersion(version.GroupVersion)
if err != nil {
return false, err
}

for _, resource := range resources.APIResources {
if resource.Name == resourceName {
return true, nil
}
}
}
}
}

return false, nil
}

// createInformerIfAvailable creates an informer for the given resource if the CRD is available,
// otherwise returns nil.
func createInformerIfAvailable(
factory informers.FactoriesForNamespaces,
dcl discovery.DiscoveryInterface,
resourceName string,
groupVersion schema.GroupVersionResource,
logger *slog.Logger,
) (*informers.ForResource, error) {
available, err := checkCRDAvailability(dcl, resourceName)
if err != nil {
logger.Warn("Failed to check CRD availability", "resource", resourceName, "error", err)
// Return nil on error to indicate unavailability
return nil, nil
}

if !available {
logger.Warn("CRD not available, skipping informer", "resource", resourceName)
return nil, nil
}

informer, err := informers.NewInformersForResource(factory, groupVersion)
if err != nil {
return nil, fmt.Errorf("failed to create informer for %s: %w", resourceName, err)
}

return informer, nil
}

// getInformers returns a map of informers for the given resources.
func getInformers(factory informers.FactoriesForNamespaces) (map[string]*informers.ForResource, error) {
serviceMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ServiceMonitorName))
func getInformers(factory informers.FactoriesForNamespaces, clusterConfig *rest.Config, logger *slog.Logger) (map[string]*informers.ForResource, error) {
informersMap := make(map[string]*informers.ForResource)

// Get the discovery client
dcl, err := discovery.NewDiscoveryClientForConfig(clusterConfig)
if err != nil {
return nil, fmt.Errorf("failed to create discovery client: %w", err)
}

// ServiceMonitor
serviceMonitorInformer, err := createInformerIfAvailable(
factory, dcl, "servicemonitors",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ServiceMonitorName),
logger,
)
if err != nil {
return nil, err
}
if serviceMonitorInformer != nil {
informersMap[monitoringv1.ServiceMonitorName] = serviceMonitorInformer
}

podMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PodMonitorName))
// PodMonitor
podMonitorInformer, err := createInformerIfAvailable(
factory, dcl, "podmonitors",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PodMonitorName),
logger,
)
if err != nil {
return nil, err
}
if podMonitorInformer != nil {
informersMap[monitoringv1.PodMonitorName] = podMonitorInformer
}

probeInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName))
// Probe
probeInformer, err := createInformerIfAvailable(
factory, dcl, "probes",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName),
logger,
)
if err != nil {
return nil, err
}
if probeInformer != nil {
informersMap[monitoringv1.ProbeName] = probeInformer
}

scrapeConfigInformers, err := informers.NewInformersForResource(factory, promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName))
// ScrapeConfig
scrapeConfigInformer, err := createInformerIfAvailable(
factory, dcl, "scrapeconfigs",
promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName),
logger,
)
if err != nil {
return nil, err
}
if scrapeConfigInformer != nil {
informersMap[promv1alpha1.ScrapeConfigName] = scrapeConfigInformer
}

return map[string]*informers.ForResource{
monitoringv1.ServiceMonitorName: serviceMonitorInformers,
monitoringv1.PodMonitorName: podMonitorInformers,
monitoringv1.ProbeName: probeInformers,
promv1alpha1.ScrapeConfigName: scrapeConfigInformers,
}, nil
return informersMap, nil
}

// Watch wrapped informers and wait for an initial sync.
Expand Down Expand Up @@ -267,7 +362,13 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, upstreamErrors ch
w.logger.Info("Unable to watch namespaces since namespace informer is nil")
}

// Only attempt to sync informers that were actually created
for name, resource := range w.informers {
if resource == nil {
w.logger.Info("Skipping nil informer", "informer", name)
continue
}

resource.Start(w.stopChannel)

if ok := w.WaitForNamedCacheSync(name, resource.HasSynced); !ok {
Expand Down Expand Up @@ -351,24 +452,46 @@ func (w *PrometheusCRWatcher) LoadConfig(ctx context.Context) (*promconfig.Confi
promCfg := &promconfig.Config{}

if w.resourceSelector != nil {
serviceMonitorInstances, err := w.resourceSelector.SelectServiceMonitors(ctx, w.informers[monitoringv1.ServiceMonitorName].ListAllByNamespace)
if err != nil {
return nil, err
// Initialize empty maps for all resource types
serviceMonitorInstances := make(map[string]*monitoringv1.ServiceMonitor)
podMonitorInstances := make(map[string]*monitoringv1.PodMonitor)
probeInstances := make(map[string]*monitoringv1.Probe)
scrapeConfigInstances := make(map[string]*promv1alpha1.ScrapeConfig)

// Get ServiceMonitors if the informer exists
if informer, ok := w.informers[monitoringv1.ServiceMonitorName]; ok {
instances, err := w.resourceSelector.SelectServiceMonitors(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
serviceMonitorInstances = instances
}

podMonitorInstances, err := w.resourceSelector.SelectPodMonitors(ctx, w.informers[monitoringv1.PodMonitorName].ListAllByNamespace)
if err != nil {
return nil, err
// Get PodMonitors if the informer exists
if informer, ok := w.informers[monitoringv1.PodMonitorName]; ok {
instances, err := w.resourceSelector.SelectPodMonitors(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
podMonitorInstances = instances
}

probeInstances, err := w.resourceSelector.SelectProbes(ctx, w.informers[monitoringv1.ProbeName].ListAllByNamespace)
if err != nil {
return nil, err
// Get Probes if the informer exists
if informer, ok := w.informers[monitoringv1.ProbeName]; ok {
instances, err := w.resourceSelector.SelectProbes(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
probeInstances = instances
}

scrapeConfigInstances, err := w.resourceSelector.SelectScrapeConfigs(ctx, w.informers[promv1alpha1.ScrapeConfigName].ListAllByNamespace)
if err != nil {
return nil, err
// Get ScrapeConfigs if the informer exists
if informer, ok := w.informers[promv1alpha1.ScrapeConfigName]; ok {
instances, err := w.resourceSelector.SelectScrapeConfigs(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
scrapeConfigInstances = instances
}

generatedConfig, err := w.configGenerator.GenerateServerConfiguration(
Expand Down
38 changes: 37 additions & 1 deletion cmd/otel-allocator/internal/watcher/promOperator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1220,7 +1220,8 @@ func getTestPrometheusCRWatcher(
}

factory := informers.NewMonitoringInformerFactories(map[string]struct{}{v1.NamespaceAll: {}}, map[string]struct{}{}, mClient, 0, nil)
informers, err := getInformers(factory)

informers, err := getTestInformers(factory)
if err != nil {
t.Fatal(t, err)
}
Expand Down Expand Up @@ -1303,3 +1304,38 @@ func sanitizeScrapeConfigsForTest(scs []*promconfig.ScrapeConfig) {
sc.MetricRelabelConfigs = nil
}
}

// getTestInformers creates informers for testing without CRD availability checks.
func getTestInformers(factory informers.FactoriesForNamespaces) (map[string]*informers.ForResource, error) {
informersMap := make(map[string]*informers.ForResource)

// Create ServiceMonitor informers
serviceMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ServiceMonitorName))
if err != nil {
return nil, err
}
informersMap[monitoringv1.ServiceMonitorName] = serviceMonitorInformers

// Create PodMonitor informers
podMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PodMonitorName))
if err != nil {
return nil, err
}
informersMap[monitoringv1.PodMonitorName] = podMonitorInformers

// Create Probe informers
probeInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName))
if err != nil {
return nil, err
}
informersMap[monitoringv1.ProbeName] = probeInformers

// Create ScrapeConfig informers
scrapeConfigInformers, err := informers.NewInformersForResource(factory, promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName))
if err != nil {
return nil, err
}
informersMap[promv1alpha1.ScrapeConfigName] = scrapeConfigInformers

return informersMap, nil
}
Loading