diff --git a/README.md b/README.md index 48216780..ca1cfe89 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,29 @@ ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/jetstack/version-checker) version-checker is a Kubernetes utility for observing the current versions of -images running in the cluster, as well as the latest available upstream. These -checks get exposed as Prometheus metrics to be viewed on a dashboard, or _soft_ -alert cluster operators. +images running in the cluster, as well as the latest available upstream. Additionally, +it monitors the Kubernetes cluster version against the latest available releases +using official Kubernetes release channels. These checks get exposed as Prometheus +metrics to be viewed on a dashboard, or _soft_ alert cluster operators. + +## Features + +- **Container Image Version Checking**: Monitor and compare container image versions running in the cluster against their latest upstream versions +- **Kubernetes Version Monitoring**: Track your cluster's Kubernetes version against the latest available releases from official Kubernetes channels +- **Prometheus Metrics Integration**: Export all version information as Prometheus metrics for monitoring and alerting +- **Flexible Channel Selection**: Configure which Kubernetes release channel to track (stable, latest, etc.) --- ## Why Use version-checker? -- **Improved Security**: Ensures images are up-to-date, reducing the risk of using vulnerable or compromised versions. -- **Enhanced Visibility**: Provides a clear overview of all running container versions across clusters. -- **Operational Efficiency**: Automates image tracking and reduces manual intervention in version management. -- **Compliance and Policy**: Enforcement: Helps maintain version consistency and adherence to organizational policies. +- **Improved Security**: Ensures images and Kubernetes clusters are up-to-date, reducing the risk of using vulnerable or compromised versions. +- **Enhanced Visibility**: Provides a clear overview of all running container versions and cluster versions across clusters. +- **Operational Efficiency**: Automates image and Kubernetes version tracking and reduces manual intervention in version management. +- **Compliance and Policy Enforcement**: Helps maintain version consistency and adherence to organizational policies for both applications and infrastructure. - **Incremental Upgrades**: Facilitates frequent, incremental updates to reduce the risk of large, disruptive upgrades. - **Add-On Compatibility**: Ensures compatibility with the latest versions of Kubernetes add-ons and dependencies. +- **Proactive Cluster Management**: Stay informed about Kubernetes security updates and new features through automated version monitoring. --- @@ -45,6 +54,7 @@ These registries support authentication. - [Installation Guide](docs/installation.md) - [Metrics](docs/metrics.md) +- [New Features](docs/new_features.md) --- diff --git a/cmd/app/app.go b/cmd/app/app.go index bff6a403..9cced0d9 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -110,7 +110,9 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup image registry clients: %s", err) } - c := controller.NewPodReconciler(opts.CacheTimeout, + _ = client + + podController := controller.NewPodReconciler(opts.CacheTimeout, metricsServer, client, mgr.GetClient(), @@ -118,11 +120,26 @@ func NewCommand(ctx context.Context) *cobra.Command { opts.RequeueDuration, opts.DefaultTestAll, ) - - if err := c.SetupWithManager(mgr); err != nil { + if err := podController.SetupWithManager(mgr); err != nil { return err } + kubeController := controller.NewKubeReconciler( + log, + mgr.GetConfig(), + metricsServer, + opts.KubeInterval, + opts.KubeChannel, + ) + + // Only add to manager if controller was created (channel was specified) + if kubeController != nil { + if err := mgr.Add(kubeController); err != nil { + return err + } + log.WithField("channel", opts.KubeChannel).Info("Kubernetes version checking enabled") + } + // Start the manager and all controllers log.Info("Starting controller manager") if err := mgr.Start(ctx); err != nil { diff --git a/cmd/app/options.go b/cmd/app/options.go index f358ac25..5df7b59a 100644 --- a/cmd/app/options.go +++ b/cmd/app/options.go @@ -75,6 +75,10 @@ type Options struct { CacheSyncPeriod time.Duration RequeueDuration time.Duration + KubeChannel string + KubeInterval time.Duration + + // kubeConfigFlags holds the flags for the kubernetes client kubeConfigFlags *genericclioptions.ConfigFlags selfhosted selfhosted.Options @@ -141,7 +145,15 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.DurationVarP(&o.CacheSyncPeriod, "cache-sync-period", "", 5*time.Hour, - "The time in which all resources should be updated.") + "The duration in which all resources should be updated.") + + fs.DurationVarP(&o.KubeInterval, + "kube-interval", "", o.CacheSyncPeriod, + "The time in which kubernetes channels updates are checked.") + + fs.StringVarP(&o.KubeChannel, + "kube-channel", "", "stable", + "The Kubernetes channel to check against for cluster updates.") fs.DurationVarP(&o.GracefulShutdownTimeout, "graceful-shutdown-timeout", "", 10*time.Second, diff --git a/docs/metrics.md b/docs/metrics.md index 8393f8b6..49405e1d 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -2,16 +2,32 @@ By default, version-checker exposes the following Prometheus metrics on `0.0.0.0:8080/metrics`: +## Container Image Metrics + - `version_checker_is_latest_version`: Indicates whether the container in use is using the latest upstream registry version. - `version_checker_last_checked`: Timestamp when the image was last checked. - `version_checker_image_lookup_duration`: Duration of the image version check. - `version_checker_image_failures_total`: Total of errors encountered during image version checks. +## Kubernetes Version Metrics + +- `version_checker_is_latest_kube_version`: Indicates whether the cluster is running the latest version from the configured Kubernetes release channel. + - Labels: `current_version`, `latest_version`, `channel` + - Value `1`: Cluster is up-to-date + - Value `0`: Update available + --- -## Example Prometheus Query +## Example Prometheus Queries +### Check container image versions ```sh QUERY="version_checker_is_latest_version" curl -s --get --data-urlencode query=$QUERY -``` \ No newline at end of file +``` + +### Check Kubernetes cluster version +```sh +QUERY="version_checker_is_latest_kube_version" +curl -s --get --data-urlencode query=$QUERY +``` diff --git a/docs/new_features.md b/docs/new_features.md new file mode 100644 index 00000000..e6560d7c --- /dev/null +++ b/docs/new_features.md @@ -0,0 +1,62 @@ +# Kubernetes Version Monitoring + +version-checker now includes built-in Kubernetes cluster version monitoring capabilities. This feature automatically compares your cluster's current Kubernetes version against the latest available versions from official Kubernetes release channels. + +### How It Works + +The Kubernetes version checker: +- Fetches the current cluster version using the Kubernetes Discovery API +- Compares it against the latest version from the configured Kubernetes release channel (using official `https://dl.k8s.io/release/` endpoints) +- Exposes the comparison as Prometheus metrics for monitoring and alerting +- Strips metadata from versions for accurate semantic version comparison (e.g., `v1.28.2-gke.1` becomes `v1.28.2`) + +### Configuration + +You can configure the Kubernetes version checking behavior using the following CLI flags: + +- `--kube-channel`: Specifies which Kubernetes release channel to check against (default: `"stable"`) + - Examples: `stable`, `latest`, `stable-1.28`, `latest-1.29` +- `--kube-interval`: How often to check for Kubernetes version updates (default: same as `--cache-sync-period`, 5 hours) + +### Metrics + +The Kubernetes version monitoring exposes the following Prometheus metric: + +``` +version_checker_is_latest_kube_version{current_version="1.28.2", latest_version="1.29.1", channel="stable"} 0 +``` + +- Value `1`: Cluster is running the latest version from the specified channel +- Value `0`: Cluster is not running the latest version (update available) + +### Supported Channels + +version-checker uses official Kubernetes release channels: + +- `stable` - Latest stable Kubernetes release (recommended) +- `latest` - Latest Kubernetes release (including pre-releases) +- `latest-1.28` - Latest patch for Kubernetes 1.28.x +- `latest-1.27` - Latest patch for Kubernetes 1.27.x + +### Examples + +```bash +# Check against latest stable Kubernetes +version-checker --kube-version-channel=stable + +# Check against latest Kubernetes (including alpha/beta) +version-checker --kube-version-channel=latest + +# Check against latest 1.28.x patch +version-checker --kube-version-channel=latest-1.28 + +# Monitor against a specific version channel with custom interval +./version-checker --kube-channel=stable-1.28 --kube-interval=1h +``` + +### Managed Kubernetes Support + +Works with all managed Kubernetes services: +- **Amazon EKS**: Compares `v1.28.2-eks-abc123` against upstream `v1.28.2` +- **Google GKE**: Compares `v1.28.2-gke.1034000` against upstream `v1.28.2` +- **Azure AKS**: Compares `v1.28.2-aks-xyz789` against upstream `v1.28.2` \ No newline at end of file diff --git a/pkg/client/fallback/fallback.go b/pkg/client/fallback/fallback.go index 19748215..253f9b5a 100644 --- a/pkg/client/fallback/fallback.go +++ b/pkg/client/fallback/fallback.go @@ -56,9 +56,9 @@ func (c *Client) Tags(ctx context.Context, host, repo, image string) (tags []api remaining := len(c.clients) - i - 1 if remaining == 0 { - c.log.Debugf("failed to lookup via %q, Giving up, no more clients", client.Name()) + c.log.Infof("failed to lookup via %q, Giving up, no more clients", client.Name()) } else { - c.log.Debugf("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) + c.log.Infof("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) } } diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go new file mode 100644 index 00000000..5246458e --- /dev/null +++ b/pkg/controller/kube_controller.go @@ -0,0 +1,182 @@ +package controller + +import ( + "context" + "fmt" + "io" + "net/url" + "strings" + "time" + + "github.com/sirupsen/logrus" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/hashicorp/go-retryablehttp" + "github.com/jetstack/version-checker/pkg/metrics" + "github.com/jetstack/version-checker/pkg/version/semver" +) + +const channelURLSuffix = "https://dl.k8s.io/release/" + +type ClusterVersionScheduler struct { + client kubernetes.Interface + log *logrus.Entry + metrics *metrics.Metrics + interval time.Duration + channel string +} + +func NewKubeReconciler( + log *logrus.Entry, + config *rest.Config, + metrics *metrics.Metrics, + interval time.Duration, + channel string, +) *ClusterVersionScheduler { + // If no channel is specified, return nil to indicate disabled + if channel == "" { + log.Info("Kubernetes version checking disabled (no channel specified)") + return nil + } + + return &ClusterVersionScheduler{ + log: log.WithField("channel", channel), + client: kubernetes.NewForConfigOrDie(config), + interval: interval, + metrics: metrics, + channel: channel, + } +} + +func (s *ClusterVersionScheduler) Start(ctx context.Context) error { + go s.runScheduler(ctx) + return s.reconcile() +} + +func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + s.log.WithField("interval", s.interval).WithField("channel", s.channel). + Info("ClusterVersionScheduler started") + + for { + select { + case <-ctx.Done(): + s.log.Info("ClusterVersionScheduler stopping") + return + case <-ticker.C: + if err := s.reconcile(); err != nil { + s.log.Error(err, "Failed to reconcile cluster version") + } + } + } +} + +func (s *ClusterVersionScheduler) reconcile() error { + // Get current cluster version + current, err := s.client.Discovery().ServerVersion() + if err != nil { + return fmt.Errorf("getting cluster version: %w", err) + } + + // Get latest version from specified channel + latest, err := getLatestVersion(s.channel) + if err != nil { + return fmt.Errorf("fetching latest version from channel %s: %w", s.channel, err) + } + + latestSemVer := semver.Parse(latest) + currentSemVer := semver.Parse(current.GitVersion) + + // Create version strings without metadata for comparison + currentSemVerNoMeta := fmt.Sprintf("%d.%d.%d", currentSemVer.Major(), currentSemVer.Minor(), currentSemVer.Patch()) + latestSemVerNoMeta := fmt.Sprintf("%d.%d.%d", latestSemVer.Major(), latestSemVer.Minor(), latestSemVer.Patch()) + + // Parse the versions without metadata for comparison + currentComparable := semver.Parse(currentSemVerNoMeta) + latestComparable := semver.Parse(latestSemVerNoMeta) + + // Register metrics! + s.metrics.RegisterKubeVersion(!currentComparable.LessThan(latestComparable), + currentSemVerNoMeta, latestSemVerNoMeta, + s.channel, + ) + + s.log.WithFields(logrus.Fields{ + "currentVersion": currentSemVerNoMeta, + "latestVersion": latestSemVerNoMeta, + "channel": s.channel, + }).Info("Cluster version check complete") + + return nil +} + +func getLatestVersion(channel string) (string, error) { + // Always use upstream Kubernetes channels - this is the authoritative source + // Platform detection is kept for logging purposes only + return getLatestVersionFromUpstream(channel) +} + +func getLatestVersionFromUpstream(channel string) (string, error) { + // Validate channel - only allow known Kubernetes channels + if !isValidKubernetesChannel(channel) { + return "", fmt.Errorf("unsupported channel: %s. Valid channels: stable, latest, latest-1.xx", channel) + } + + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + channelURL, err := url.JoinPath(channelURLSuffix, channel) + if err != nil { + return "", fmt.Errorf("failed to join channel URL: %w", err) + } + + client := retryablehttp.NewClient() + client.RetryMax = 3 + client.RetryWaitMin = 1 * time.Second + client.RetryWaitMax = 30 * time.Second + client.Logger = nil + + resp, err := client.Get(channelURL) + if err != nil { + return "", fmt.Errorf("failed to fetch from channel URL %s: %w", channelURL, err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("unexpected status code %d when fetching channel %s", resp.StatusCode, channel) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response body: %w", err) + } + + version := strings.TrimSpace(string(body)) + if version == "" { + return "", fmt.Errorf("empty version returned from channel %s", channel) + } + + return version, nil +} + +func isValidKubernetesChannel(channel string) bool { + // Only allow official Kubernetes channels + validChannels := []string{"stable", "latest"} + + // Allow latest-X.Y format + if strings.HasPrefix(channel, "latest-1.") { + return true + } + + for _, valid := range validChannels { + if channel == valid { + return true + } + } + return false +} diff --git a/pkg/metrics/kubernetes.go b/pkg/metrics/kubernetes.go new file mode 100644 index 00000000..c4f326d8 --- /dev/null +++ b/pkg/metrics/kubernetes.go @@ -0,0 +1,21 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +func (m *Metrics) RegisterKubeVersion(isLatest bool, currentVersion, latestVersion, channel string) { + m.mu.Lock() + defer m.mu.Unlock() + + isLatestF := 0.0 + if isLatest { + isLatestF = 1.0 + } + + m.kubernetesVersion.With( + prometheus.Labels{ + "current_version": currentVersion, + "latest_version": latestVersion, + "channel": channel, + }, + ).Set(isLatestF) +} diff --git a/pkg/metrics/kubernetes_test.go b/pkg/metrics/kubernetes_test.go new file mode 100644 index 00000000..509e3061 --- /dev/null +++ b/pkg/metrics/kubernetes_test.go @@ -0,0 +1,140 @@ +package metrics + +import ( + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var fakeK8sClient = fake.NewFakeClient() + +func TestRegisterKubeVersion(t *testing.T) { + tests := []struct { + name string + isLatest bool + currentVersion string + latestVersion string + channel string + expectedValue float64 + }{ + { + name: "cluster is up to date", + isLatest: true, + currentVersion: "1.28.2", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "cluster needs update", + isLatest: false, + currentVersion: "1.27.1", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 0.0, + }, + { + name: "cluster is ahead of stable", + isLatest: true, + currentVersion: "1.29.0", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "latest channel with pre-release", + isLatest: false, + currentVersion: "1.28.1", + latestVersion: "1.29.0-alpha.1", + channel: "latest", + expectedValue: 0.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a new metrics instance for each test to avoid interference + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register the Kubernetes version + m.RegisterKubeVersion(tt.isLatest, tt.currentVersion, tt.latestVersion, tt.channel) + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 1, "Should have exactly one metric value") + + metric := kubeMetric.GetMetric()[0] + assert.Equal(t, tt.expectedValue, metric.GetGauge().GetValue()) + + // Check labels + labels := metric.GetLabel() + assert.Len(t, labels, 3, "Should have 3 labels: current_version, latest_version, channel") + + labelMap := make(map[string]string) + for _, label := range labels { + labelMap[label.GetName()] = label.GetValue() + } + + assert.Equal(t, tt.currentVersion, labelMap["current_version"]) + assert.Equal(t, tt.latestVersion, labelMap["latest_version"]) + assert.Equal(t, tt.channel, labelMap["channel"]) + }) + } +} + +func TestRegisterKubeVersion_MultipleChannels(t *testing.T) { + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register metrics for different channels + m.RegisterKubeVersion(true, "1.28.2", "1.28.2", "stable") + m.RegisterKubeVersion(false, "1.28.2", "1.29.0-alpha.1", "latest") + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 2, "Should have exactly two metric values for different channels") + + // Check that both metrics are present + channels := make(map[string]float64) + for _, metric := range kubeMetric.GetMetric() { + labelMap := make(map[string]string) + for _, label := range metric.GetLabel() { + labelMap[label.GetName()] = label.GetValue() + } + channels[labelMap["channel"]] = metric.GetGauge().GetValue() + } + + assert.Equal(t, 1.0, channels["stable"], "Stable channel should be up to date") + assert.Equal(t, 0.0, channels["latest"], "Latest channel should need update") +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index c9324c14..f9235d45 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -28,6 +28,9 @@ type Metrics struct { containerImageDuration *prometheus.GaugeVec containerImageErrors *prometheus.CounterVec + // Kubernetes version metric + kubernetesVersion *prometheus.GaugeVec + cache k8sclient.Reader // Contains all metrics for the roundtripper @@ -81,6 +84,16 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R "namespace", "pod", "container", "image", }, ) + kubernetesVersion := promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "version_checker", + Name: "is_latest_kube_version", + Help: "Where the current cluster is using the latest release channel version", + }, + []string{ + "current_version", "latest_version", "channel", + }, + ) return &Metrics{ log: log.WithField("module", "metrics"), @@ -91,6 +104,7 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R containerImageDuration: containerImageDuration, containerImageChecked: containerImageChecked, containerImageErrors: containerImageErrors, + kubernetesVersion: kubernetesVersion, roundTripper: NewRoundTripper(reg), } }