Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/cluster-version-operator/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,20 @@ func init() {
cmd.PersistentFlags().StringVar(&opts.NodeName, "node-name", opts.NodeName, "kubernetes node name CVO is scheduled on.")
cmd.PersistentFlags().BoolVar(&opts.EnableAutoUpdate, "enable-auto-update", opts.EnableAutoUpdate, "Enables the autoupdate controller.")
cmd.PersistentFlags().StringVar(&opts.ReleaseImage, "release-image", opts.ReleaseImage, "The Openshift release image url.")

// metrics serving options
cmd.PersistentFlags().StringVar(&opts.ServingCertFile, "serving-cert-file", opts.ServingCertFile, "The X.509 certificate file for serving metrics over HTTPS. You must set both --serving-cert-file and --serving-key-file unless you set --listen empty.")
cmd.PersistentFlags().StringVar(&opts.ServingKeyFile, "serving-key-file", opts.ServingKeyFile, "The X.509 key file for serving metrics over HTTPS. You must set both --serving-cert-file and --serving-key-file unless you set --listen empty.")
cmd.PersistentFlags().StringVar(&opts.ServingClientCAFile, "serving-client-certificate-authorities-file", opts.ServingClientCAFile, "The X.509 certificates file containing a series of PEM encoded certificates for Certificate Authorities trusted to sign metrics client certificates. If set, only clients who provide mTLS client certs signed by those CAs will be trusted. If unset, only clients with valid tokens for the prometheus-k8s ServiceAccount in the openshift-monitoring namespace will be trusted.")
Copy link
Contributor

@DavidHurta DavidHurta Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently have a --metrics-ca-bundle-file flag. For consistency, maybe we can rename the new flag to something like --serving-client-ca-bundle-file (if my terminology is correct). Not that important, but any potential renaming of flags is not fun after they are integrated into other systems. I will not block on this.


// metrics/PromQL client options
cmd.PersistentFlags().StringVar(&opts.PromQLTarget.CABundleFile, "metrics-ca-bundle-file", opts.PromQLTarget.CABundleFile, "The service CA bundle file containing one or more X.509 certificate files for validating certificates generated from the service CA for the respective remote PromQL query service.")
cmd.PersistentFlags().StringVar(&opts.PromQLTarget.BearerTokenFile, "metrics-token-file", opts.PromQLTarget.BearerTokenFile, "The bearer token file used to access the remote PromQL query service.")
cmd.PersistentFlags().StringVar(&opts.PromQLTarget.KubeSvc.Namespace, "metrics-namespace", opts.PromQLTarget.KubeSvc.Namespace, "The name of the namespace where the the remote PromQL query service resides. Must be specified when --use-dns-for-services is disabled.")
cmd.PersistentFlags().StringVar(&opts.PromQLTarget.KubeSvc.Name, "metrics-service", opts.PromQLTarget.KubeSvc.Name, "The name of the remote PromQL query service. Must be specified when --use-dns-for-services is disabled.")
cmd.PersistentFlags().BoolVar(&opts.PromQLTarget.UseDNS, "use-dns-for-services", opts.PromQLTarget.UseDNS, "Configures the CVO to use DNS for resolution of services in the cluster.")
cmd.PersistentFlags().StringVar(&opts.PrometheusURLString, "metrics-url", opts.PrometheusURLString, "The URL used to access the remote PromQL query service.")

cmd.PersistentFlags().BoolVar(&opts.HyperShift, "hypershift", opts.HyperShift, "This options indicates whether the CVO is running inside a hosted control plane.")
cmd.PersistentFlags().StringVar(&opts.UpdateService, "update-service", opts.UpdateService, "The preferred update service. If set, this option overrides any upstream value configured in ClusterVersion spec.")
cmd.PersistentFlags().StringSliceVar(&opts.AlwaysEnableCapabilities, "always-enable-capabilities", opts.AlwaysEnableCapabilities, "List of the cluster capabilities which will always be implicitly enabled.")
Expand Down
152 changes: 79 additions & 73 deletions pkg/cvo/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -173,6 +174,7 @@ func (a *authHandler) authorize(token string) (bool, error) {
}

func (a *authHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: empty line

authHeader := r.Header.Get("Authorization")
if authHeader == "" {
http.Error(w, "failed to get the Authorization header", http.StatusUnauthorized)
Expand Down Expand Up @@ -246,11 +248,24 @@ func handleServerResult(result asyncResult, lastLoopError error) error {
// Also detects changes to metrics certificate files upon which
// the metrics HTTP server is shutdown and recreated with a new
// TLS configuration.
func RunMetrics(runContext context.Context, shutdownContext context.Context, listenAddress, certFile, keyFile string, restConfig *rest.Config) error {
func RunMetrics(runContext context.Context, shutdownContext context.Context, listenAddress, certFile, keyFile, clientCAFile string, restConfig *rest.Config) error {
checksums := map[string][]byte{
certFile: nil,
keyFile: nil,
}
if clientCAFile != "" {
checksums[clientCAFile] = nil
}

checksums, err := checksumFiles(checksums)
if err != nil {
return err
}

var tlsConfig *tls.Config
if listenAddress != "" {
var err error
tlsConfig, err = makeTLSConfig(certFile, keyFile)
tlsConfig, err = makeTLSConfig(certFile, keyFile, clientCAFile)
if err != nil {
return fmt.Errorf("Failed to create TLS config: %w", err)
}
Expand All @@ -270,16 +285,9 @@ func RunMetrics(runContext context.Context, shutdownContext context.Context, lis

go startListening(server, tlsConfig, listenAddress, resultChannel)

certDir := filepath.Dir(certFile)
keyDir := filepath.Dir(keyFile)

origCertChecksum, err := checksumFile(certFile)
if err != nil {
return fmt.Errorf("Failed to initialize certificate file checksum: %w", err)
}
origKeyChecksum, err := checksumFile(keyFile)
if err != nil {
return fmt.Errorf("Failed to initialize key file checksum: %w", err)
watchDirectories := map[string]struct{}{}
for fName := range checksums {
watchDirectories[filepath.Dir(fName)] = struct{}{}
}

// Set up and start the file watcher.
Expand All @@ -288,12 +296,9 @@ func RunMetrics(runContext context.Context, shutdownContext context.Context, lis
return fmt.Errorf("Failed to create file watcher for certificate and key rotation: %w", err)
} else {
defer watcher.Close()
if err := watcher.Add(certDir); err != nil {
return fmt.Errorf("Failed to add %v to watcher: %w", certDir, err)
}
if certDir != keyDir {
if err := watcher.Add(keyDir); err != nil {
return fmt.Errorf("Failed to add %v to watcher: %w", keyDir, err)
for watchDir := range watchDirectories {
if err := watcher.Add(watchDir); err != nil {
return fmt.Errorf("Failed to add %v to watcher: %w", watchDir, err)
}
}
}
Expand Down Expand Up @@ -326,23 +331,10 @@ func RunMetrics(runContext context.Context, shutdownContext context.Context, lis
loopError = handleServerResult(result, loopError)
case event := <-watcher.Events:
if event.Op != fsnotify.Chmod && event.Op != fsnotify.Remove {
if changed, err := certsChanged(origCertChecksum, origKeyChecksum, certFile, keyFile); changed {

// Update file checksums with latest files.
//
if origCertChecksum, err = checksumFile(certFile); err != nil {
klog.Errorf("Failed to update certificate file checksum: %v", err)
loopError = err
break
}
if origKeyChecksum, err = checksumFile(keyFile); err != nil {
klog.Errorf("Failed to update key file checksum: %v", err)
loopError = err
break
}

tlsConfig, err = makeTLSConfig(certFile, keyFile)
if changed, csums, err := certsChanged(checksums); changed {
tlsConfig, err = makeTLSConfig(certFile, keyFile, clientCAFile)
if err == nil {
checksums = csums
restartServer = true
shutdownHttpServer(shutdownContext, server)
continue
Expand Down Expand Up @@ -712,41 +704,30 @@ func mostRecentTimestamp(cv *configv1.ClusterVersion) int64 {
// Determine if the certificates have changed and need to be updated.
// If no errors occur, returns true if both files have changed and
// neither is an empty file. Otherwise returns false and any error.
Comment on lines 705 to 706
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description is now outdated - return values, parameters.

func certsChanged(origCertChecksum []byte, origKeyChecksum []byte, certFile, keyFile string) (bool, error) {
// Check if both files exist.
certNotEmpty, err := fileExistsAndNotEmpty(certFile)
if err != nil {
return false, fmt.Errorf("Error checking if changed TLS cert file empty/exists: %w", err)
}
keyNotEmpty, err := fileExistsAndNotEmpty(keyFile)
if err != nil {
return false, fmt.Errorf("Error checking if changed TLS key file empty/exists: %w", err)
}
if !certNotEmpty || !keyNotEmpty {
// One of the files is missing despite some file event.
return false, fmt.Errorf("Certificate or key is missing or empty, certificates will not be rotated.")
}

currentCertChecksum, err := checksumFile(certFile)
if err != nil {
return false, fmt.Errorf("Error checking certificate file checksum: %w", err)
func certsChanged(checksums map[string][]byte) (bool, map[string][]byte, error) {
for fName := range checksums {
err := requireFileExistsAndNotEmpty(fName)
if err != nil {
return false, nil, fmt.Errorf("Server configuration file must exist and be non-empty. Certificates will not be rotated. %w", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I am not sure whether the error will always contain the filename.

Suggested change
return false, nil, fmt.Errorf("Server configuration file must exist and be non-empty. Certificates will not be rotated. %w", err)
return false, nil, fmt.Errorf("Server configuration file %s must exist and be non-empty. Certificates will not be rotated. %w", fName, err)

}
}

currentKeyChecksum, err := checksumFile(keyFile)
csums, err := checksumFiles(checksums)
if err != nil {
return false, fmt.Errorf("Error checking key file checksum: %w", err)
return false, nil, err
}

// Check if the non-empty certificate/key files have actually changed.
if !bytes.Equal(origCertChecksum, currentCertChecksum) && !bytes.Equal(origKeyChecksum, currentKeyChecksum) {
klog.V(2).Info("Certificate and key changed. Will recreate metrics server with updated TLS configuration.")
return true, nil
changed := false
for fName, sum := range checksums {
if !bytes.Equal(sum, csums[fName]) {
changed = true
klog.V(2).Infof("Server configuration file %s changed. Will recreate metrics server with updated TLS configuration.", fName)
}
}

return false, nil
return changed, csums, nil
}

func makeTLSConfig(servingCertFile, servingKeyFile string) (*tls.Config, error) {
func makeTLSConfig(servingCertFile, servingKeyFile, clientCAFile string) (*tls.Config, error) {
// Load the initial certificate contents.
certBytes, err := os.ReadFile(servingCertFile)
if err != nil {
Expand All @@ -761,11 +742,26 @@ func makeTLSConfig(servingCertFile, servingKeyFile string) (*tls.Config, error)
return nil, err
}

return crypto.SecureTLSConfig(&tls.Config{
tlsConfig := tls.Config{
GetCertificate: func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) {
return &certificate, nil
},
}), nil
}

if clientCAFile != "" {
clientCABytes, err := os.ReadFile(clientCAFile)
if err != nil {
return nil, err
}

tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert
tlsConfig.ClientCAs = x509.NewCertPool()
if ok := tlsConfig.ClientCAs.AppendCertsFromPEM(clientCABytes); !ok {
return nil, fmt.Errorf("failed to load any client Certificate Authority certificates from %s", clientCAFile)
}
}

return crypto.SecureTLSConfig(&tlsConfig), nil
}

// Compute the sha256 checksum for file 'fName' returning any error.
Expand All @@ -785,15 +781,25 @@ func checksumFile(fName string) ([]byte, error) {
return hash.Sum(nil), nil
}

// Check if a file exists and has file.Size() not equal to 0.
// Returns any error returned by os.Stat other than os.ErrNotExist.
func fileExistsAndNotEmpty(fName string) (bool, error) {
if fi, err := os.Stat(fName); err == nil {
return (fi.Size() != 0), nil
} else if errors.Is(err, os.ErrNotExist) {
return false, nil
} else {
// Some other error, file may not exist.
return false, err
// Compute the sha256 checksums for files 'fNames' returning any error.
func checksumFiles(fNames map[string][]byte) (map[string][]byte, error) {
sums := make(map[string][]byte, len(fNames))
for fName := range fNames {
sum, err := checksumFile(fName)
if err != nil {
return nil, err
}
sums[fName] = sum
}
return sums, nil
}

// requireFileExistsAndNotEmpty errors if a file does not exist or has a file.Size() equal to zero.
func requireFileExistsAndNotEmpty(fName string) error {
if fi, err := os.Stat(fName); err != nil {
return err
} else if fi.Size() == 0 {
return fmt.Errorf("file %s is empty", fName)
}
return nil
}
12 changes: 8 additions & 4 deletions pkg/start/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ const (

// Options are the valid inputs to starting the CVO.
type Options struct {
ReleaseImage string
ServingCertFile string
ServingKeyFile string
ReleaseImage string
ServingCertFile string
ServingKeyFile string
ServingClientCAFile string

Kubeconfig string
NodeName string
Expand Down Expand Up @@ -144,6 +145,9 @@ func (o *Options) ValidateAndComplete() error {
if o.ListenAddr != "" && o.ServingKeyFile == "" {
return fmt.Errorf("--listen was not set empty, so --serving-key-file must be set")
}
if o.ListenAddr == "" && o.ServingClientCAFile != "" {
return fmt.Errorf("--listen was set empty, so --serving-client-certificate-authorities-file must not be set")
}
if o.PrometheusURLString == "" {
return fmt.Errorf("missing --metrics-url flag, it is required")
}
Expand Down Expand Up @@ -350,7 +354,7 @@ func (o *Options) run(ctx context.Context, controllerCtx *Context, lock resource
resultChannelCount++
go func() {
defer utilruntime.HandleCrash()
err := cvo.RunMetrics(postMainContext, shutdownContext, o.ListenAddr, o.ServingCertFile, o.ServingKeyFile, restConfig)
err := cvo.RunMetrics(postMainContext, shutdownContext, o.ListenAddr, o.ServingCertFile, o.ServingKeyFile, o.ServingClientCAFile, restConfig)
resultChannel <- asyncResult{name: "metrics server", error: err}
}()
}
Expand Down