Skip to content

Commit 30ab202

Browse files
committed
Fix service-CA certificate rotation not triggering pod restarts
When service-CA rotates certificates, the cluster-monitoring-operator wasn't watching service-CA generated secrets, causing pods to continue using expired certificates until manually restarted. This change adds detect changes happeing for service-CA secrets in monitoring namespaces (secrets ending with -tls or -cert) and triggers reconciliation when they change, which triggers pod restarts and certificate pickup. Fixes the issue where EUS clusters running 3+ years without upgrades require manual intervention after service-CA rotation.
1 parent af22cdb commit 30ab202

File tree

2 files changed

+104
-0
lines changed

2 files changed

+104
-0
lines changed

pkg/operator/operator.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,16 @@ func (o *Operator) keyFunc(obj interface{}) (string, bool) {
622622
return k, true
623623
}
624624

625+
// isServiceCASecret checks if the given key represents a service-CA generated secret
626+
// that should trigger reconciliation when rotated.
627+
func (o *Operator) isServiceCASecret(key string) bool {
628+
if strings.HasPrefix(key, o.namespace+"/") || strings.HasPrefix(key, o.namespaceUserWorkload+"/") {
629+
secretName := strings.Split(key, "/")[1]
630+
return strings.HasSuffix(secretName, "-tls") || strings.HasSuffix(secretName, "-cert")
631+
}
632+
return false
633+
}
634+
625635
func (o *Operator) handleEvent(obj interface{}) {
626636
cmoConfigMap := o.namespace + "/" + o.configMapName
627637

@@ -671,6 +681,10 @@ func (o *Operator) handleEvent(obj interface{}) {
671681
case federateClientCerts:
672682
case uwmConfigMap:
673683
default:
684+
if o.isServiceCASecret(key) {
685+
klog.V(4).Infof("Service-CA secret updated, triggering reconciliation: %s", key)
686+
break
687+
}
674688
klog.V(5).Infof("ConfigMap or Secret (%s) not triggering an update.", key)
675689
return
676690
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Copyright 2021 The Cluster Monitoring Operator Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package e2e
16+
17+
import (
18+
"context"
19+
"errors"
20+
"fmt"
21+
"testing"
22+
"time"
23+
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
26+
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
27+
)
28+
29+
func TestServiceCASecretRotation(t *testing.T) {
30+
ctx := context.Background()
31+
32+
testCases := []struct {
33+
name string
34+
secretName string
35+
namespace string
36+
}{
37+
{
38+
name: "monitoring-plugin-cert",
39+
secretName: "monitoring-plugin-cert",
40+
namespace: f.Ns,
41+
},
42+
{
43+
name: "prometheus-k8s-tls",
44+
secretName: "prometheus-k8s-tls",
45+
namespace: f.Ns,
46+
},
47+
{
48+
name: "alertmanager-main-tls",
49+
secretName: "alertmanager-main-tls",
50+
namespace: f.Ns,
51+
},
52+
}
53+
54+
for _, tc := range testCases {
55+
t.Run(tc.name, func(t *testing.T) {
56+
// Get the current secret
57+
s, err := f.KubeClient.CoreV1().Secrets(tc.namespace).Get(ctx, tc.secretName, metav1.GetOptions{})
58+
if err != nil {
59+
t.Skipf("secret %s/%s not found, skipping test: %v", tc.namespace, tc.secretName, err)
60+
}
61+
62+
if s.Annotations == nil {
63+
s.Annotations = make(map[string]string)
64+
}
65+
66+
s.Annotations["test.openshift.io/service-ca-test-rotation"] = "true"
67+
68+
if err := f.OperatorClient.CreateOrUpdateSecret(ctx, s); err != nil {
69+
t.Fatalf("error updating secret %s/%s: %v", tc.namespace, tc.secretName, err)
70+
}
71+
72+
// Wait for the cluster-monitoring-operator to detect the change and trigger reconciliation
73+
err = framework.Poll(time.Second, 5*time.Minute, func() error {
74+
s, err := f.KubeClient.CoreV1().Secrets(tc.namespace).Get(ctx, tc.secretName, metav1.GetOptions{})
75+
if err != nil {
76+
return fmt.Errorf("error loading secret %s/%s: %w", tc.namespace, tc.secretName, err)
77+
}
78+
79+
if _, ok := s.Annotations["test.openshift.io/service-ca-test-rotation"]; ok {
80+
return errors.New("rotation did not execute: service-ca-test-rotation annotation set")
81+
}
82+
83+
return nil
84+
})
85+
if err != nil {
86+
t.Fatal(err)
87+
}
88+
})
89+
}
90+
}

0 commit comments

Comments
 (0)