Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/check-crd-availability.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
component: target allocator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: check CRD availability before registering informers

# One or more tracking issues related to the change
issues: [3987]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
4 changes: 4 additions & 0 deletions apis/v1alpha1/targetallocator_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ func TestTargetAllocatorValidatingWebhook(t *testing.T) {
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - configmaps: [get]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - discovery.k8s.io/endpointslices: [get,list,watch]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - nonResourceURL: /metrics: [get]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - nonResourceURL: /api: [get]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - nonResourceURL: /api/*: [get]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - nonResourceURL: /apis: [get]",
"missing the following rules for system:serviceaccount:test-ns:test-ta-targetallocator - nonResourceURL: /apis/*: [get]",
},
},
{
Expand Down
4 changes: 4 additions & 0 deletions apis/v1beta1/collector_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,10 @@ func TestOTELColValidatingWebhook(t *testing.T) {
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - configmaps: [get]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - discovery.k8s.io/endpointslices: [get,list,watch]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - nonResourceURL: /metrics: [get]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - nonResourceURL: /api: [get]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - nonResourceURL: /api/*: [get]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - nonResourceURL: /apis: [get]",
"missing the following rules for system:serviceaccount:test-ns:adm-warning-targetallocator - nonResourceURL: /apis/*: [get]",
},
},
{
Expand Down
3 changes: 3 additions & 0 deletions apis/v1beta1/targetallocator_rbac.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ var (
}, {
NonResourceURLs: []string{"/metrics"},
Verbs: []string{"get"},
}, {
NonResourceURLs: []string{"/api", "/api/*", "/apis", "/apis/*"},
Verbs: []string{"get"},
},
}
)
Expand Down
172 changes: 148 additions & 24 deletions cmd/otel-allocator/internal/watcher/promOperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ import (
"gopkg.in/yaml.v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/discovery"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/retry"

Expand All @@ -54,7 +57,7 @@ func NewPrometheusCRWatcher(

factory := informers.NewMonitoringInformerFactories(allowList, denyList, monitoringclient, allocatorconfig.DefaultResyncTime, nil)

monitoringInformers, err := getInformers(factory)
monitoringInformers, err := getInformers(factory, cfg.ClusterConfig, promLogger)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -185,34 +188,127 @@ func getNamespaceInformer(ctx context.Context, allowList, denyList map[string]st

}

// checkCRDAvailability checks if a specific CRD is available in the cluster.
func checkCRDAvailability(dcl discovery.DiscoveryInterface, resourceName string) (bool, error) {
apiList, err := dcl.ServerGroups()
if err != nil {
return false, err
}

apiGroups := apiList.Groups
for _, group := range apiGroups {
if group.Name == "monitoring.coreos.com" {
for _, version := range group.Versions {
resources, err := dcl.ServerResourcesForGroupVersion(version.GroupVersion)
if err != nil {
return false, err
}

for _, resource := range resources.APIResources {
if resource.Name == resourceName {
return true, nil
}
}
}
}
}

return false, nil
}

// createInformerIfAvailable creates an informer for the given resource if the CRD is available,
// otherwise returns nil. If CRD availability cannot be checked due to permissions or other errors,
// it fails open and attempts to create the informer anyway.
func createInformerIfAvailable(
factory informers.FactoriesForNamespaces,
dcl discovery.DiscoveryInterface,
resourceName string,
groupVersion schema.GroupVersionResource,
logger *slog.Logger,
) (*informers.ForResource, error) {
available, err := checkCRDAvailability(dcl, resourceName)
if err != nil {
logger.Warn("Failed to check CRD availability, assuming CRD is available", "resource", resourceName, "error", err)
// Fail open: if we can't check availability, assume the CRD is available and try to create the informer
available = true
}

if !available {
logger.Warn("CRD not available, skipping informer", "resource", resourceName)
return nil, nil
}

informer, err := informers.NewInformersForResource(factory, groupVersion)
if err != nil {
return nil, fmt.Errorf("failed to create informer for %s: %w", resourceName, err)
}

return informer, nil
}

// getInformers returns a map of informers for the given resources.
func getInformers(factory informers.FactoriesForNamespaces) (map[string]*informers.ForResource, error) {
serviceMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ServiceMonitorName))
func getInformers(factory informers.FactoriesForNamespaces, clusterConfig *rest.Config, logger *slog.Logger) (map[string]*informers.ForResource, error) {
informersMap := make(map[string]*informers.ForResource)

// Get the discovery client
dcl, err := discovery.NewDiscoveryClientForConfig(clusterConfig)
if err != nil {
return nil, fmt.Errorf("failed to create discovery client: %w", err)
}

// ServiceMonitor
serviceMonitorInformer, err := createInformerIfAvailable(
factory, dcl, "servicemonitors",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ServiceMonitorName),
logger,
)
if err != nil {
return nil, err
}
if serviceMonitorInformer != nil {
informersMap[monitoringv1.ServiceMonitorName] = serviceMonitorInformer
}

podMonitorInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PodMonitorName))
// PodMonitor
podMonitorInformer, err := createInformerIfAvailable(
factory, dcl, "podmonitors",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PodMonitorName),
logger,
)
if err != nil {
return nil, err
}
if podMonitorInformer != nil {
informersMap[monitoringv1.PodMonitorName] = podMonitorInformer
}

probeInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName))
// Probe
probeInformer, err := createInformerIfAvailable(
factory, dcl, "probes",
monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName),
logger,
)
if err != nil {
return nil, err
}
if probeInformer != nil {
informersMap[monitoringv1.ProbeName] = probeInformer
}

scrapeConfigInformers, err := informers.NewInformersForResource(factory, promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName))
// ScrapeConfig
scrapeConfigInformer, err := createInformerIfAvailable(
factory, dcl, "scrapeconfigs",
promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName),
logger,
)
if err != nil {
return nil, err
}
if scrapeConfigInformer != nil {
informersMap[promv1alpha1.ScrapeConfigName] = scrapeConfigInformer
}

return map[string]*informers.ForResource{
monitoringv1.ServiceMonitorName: serviceMonitorInformers,
monitoringv1.PodMonitorName: podMonitorInformers,
monitoringv1.ProbeName: probeInformers,
promv1alpha1.ScrapeConfigName: scrapeConfigInformers,
}, nil
return informersMap, nil
}

// Watch wrapped informers and wait for an initial sync.
Expand Down Expand Up @@ -264,7 +360,13 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, upstreamErrors ch
w.logger.Info("Unable to watch namespaces since namespace informer is nil")
}

// Only attempt to sync informers that were actually created
for name, resource := range w.informers {
if resource == nil {
w.logger.Info("Skipping nil informer", "informer", name)
continue
}

resource.Start(w.stopChannel)

if ok := w.WaitForNamedCacheSync(name, resource.HasSynced); !ok {
Expand Down Expand Up @@ -348,24 +450,46 @@ func (w *PrometheusCRWatcher) LoadConfig(ctx context.Context) (*promconfig.Confi
promCfg := &promconfig.Config{}

if w.resourceSelector != nil {
serviceMonitorInstances, err := w.resourceSelector.SelectServiceMonitors(ctx, w.informers[monitoringv1.ServiceMonitorName].ListAllByNamespace)
if err != nil {
return nil, err
// Initialize empty maps for all resource types
serviceMonitorInstances := make(map[string]*monitoringv1.ServiceMonitor)
podMonitorInstances := make(map[string]*monitoringv1.PodMonitor)
probeInstances := make(map[string]*monitoringv1.Probe)
scrapeConfigInstances := make(map[string]*promv1alpha1.ScrapeConfig)

// Get ServiceMonitors if the informer exists
if informer, ok := w.informers[monitoringv1.ServiceMonitorName]; ok {
instances, err := w.resourceSelector.SelectServiceMonitors(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
serviceMonitorInstances = instances
}

podMonitorInstances, err := w.resourceSelector.SelectPodMonitors(ctx, w.informers[monitoringv1.PodMonitorName].ListAllByNamespace)
if err != nil {
return nil, err
// Get PodMonitors if the informer exists
if informer, ok := w.informers[monitoringv1.PodMonitorName]; ok {
instances, err := w.resourceSelector.SelectPodMonitors(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
podMonitorInstances = instances
}

probeInstances, err := w.resourceSelector.SelectProbes(ctx, w.informers[monitoringv1.ProbeName].ListAllByNamespace)
if err != nil {
return nil, err
// Get Probes if the informer exists
if informer, ok := w.informers[monitoringv1.ProbeName]; ok {
instances, err := w.resourceSelector.SelectProbes(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
probeInstances = instances
}

scrapeConfigInstances, err := w.resourceSelector.SelectScrapeConfigs(ctx, w.informers[promv1alpha1.ScrapeConfigName].ListAllByNamespace)
if err != nil {
return nil, err
// Get ScrapeConfigs if the informer exists
if informer, ok := w.informers[promv1alpha1.ScrapeConfigName]; ok {
instances, err := w.resourceSelector.SelectScrapeConfigs(ctx, informer.ListAllByNamespace)
if err != nil {
return nil, err
}
scrapeConfigInstances = instances
}

generatedConfig, err := w.configGenerator.GenerateServerConfiguration(
Expand Down
Loading
Loading