Skip to content

Commit 7adb181

Browse files
committed
Added rotation metric to certificate manager
1 parent b965b34 commit 7adb181

File tree

2 files changed

+81
-12
lines changed

2 files changed

+81
-12
lines changed

staging/src/k8s.io/client-go/util/certificate/certificate_manager.go

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,17 @@ type Config struct {
113113
// quickly replaced with a unique cert/key pair.
114114
BootstrapKeyPEM []byte
115115
// CertificateExpiration will record a metric that shows the remaining
116-
// lifetime of the certificate.
116+
// lifetime of the certificate. This metric is a gauge because only the
117+
// current cert expiry time is really useful. Reading this metric at any
118+
// time simply gives the next expiration date, no need to keep some
119+
// history (histogram) of all previous expiry dates.
117120
CertificateExpiration Gauge
121+
// CertificateRotation will record a metric showing the time in seconds
122+
// that certificates lived before being rotated. This metric is a histogram
123+
// because there is value in keeping a history of rotation cadences. It
124+
// allows one to setup monitoring and alerting of unexpected rotation
125+
// behavior and track trends in rotation frequency.
126+
CertificateRotation Histogram
118127
}
119128

120129
// Store is responsible for getting and updating the current certificate.
@@ -139,6 +148,12 @@ type Gauge interface {
139148
Set(float64)
140149
}
141150

151+
// Histogram will record the time a rotated certificate was used before being
152+
// rotated.
153+
type Histogram interface {
154+
Observe(float64)
155+
}
156+
142157
// NoCertKeyError indicates there is no cert/key currently available.
143158
type NoCertKeyError string
144159

@@ -163,6 +178,7 @@ type manager struct {
163178
certStore Store
164179

165180
certificateExpiration Gauge
181+
certificateRotation Histogram
166182

167183
// the following variables must only be accessed under certAccessLock
168184
certAccessLock sync.RWMutex
@@ -174,6 +190,9 @@ type manager struct {
174190
clientFn CSRClientFunc
175191
stopCh chan struct{}
176192
stopped bool
193+
194+
// Set to time.Now but can be stubbed out for testing
195+
now func() time.Time
177196
}
178197

179198
// NewManager returns a new certificate manager. A certificate manager is
@@ -203,6 +222,8 @@ func NewManager(config *Config) (Manager, error) {
203222
cert: cert,
204223
forceRotation: forceRotation,
205224
certificateExpiration: config.CertificateExpiration,
225+
certificateRotation: config.CertificateRotation,
226+
now: time.Now,
206227
}
207228

208229
return &m, nil
@@ -215,7 +236,7 @@ func NewManager(config *Config) (Manager, error) {
215236
func (m *manager) Current() *tls.Certificate {
216237
m.certAccessLock.RLock()
217238
defer m.certAccessLock.RUnlock()
218-
if m.cert != nil && m.cert.Leaf != nil && time.Now().After(m.cert.Leaf.NotAfter) {
239+
if m.cert != nil && m.cert.Leaf != nil && m.now().After(m.cert.Leaf.NotAfter) {
219240
klog.V(2).Infof("Current certificate is expired.")
220241
return nil
221242
}
@@ -256,7 +277,7 @@ func (m *manager) Start() {
256277
templateChanged := make(chan struct{})
257278
go wait.Until(func() {
258279
deadline := m.nextRotationDeadline()
259-
if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 {
280+
if sleepInterval := deadline.Sub(m.now()); sleepInterval > 0 {
260281
klog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
261282

262283
timer := time.NewTimer(sleepInterval)
@@ -421,7 +442,10 @@ func (m *manager) rotateCerts() (bool, error) {
421442
return false, nil
422443
}
423444

424-
m.updateCached(cert)
445+
if old := m.updateCached(cert); old != nil && m.certificateRotation != nil {
446+
m.certificateRotation.Observe(m.now().Sub(old.Leaf.NotBefore).Seconds())
447+
}
448+
425449
return true, nil
426450
}
427451

@@ -490,14 +514,14 @@ func (m *manager) nextRotationDeadline() time.Time {
490514
// forceRotation is not protected by locks
491515
if m.forceRotation {
492516
m.forceRotation = false
493-
return time.Now()
517+
return m.now()
494518
}
495519

496520
m.certAccessLock.RLock()
497521
defer m.certAccessLock.RUnlock()
498522

499523
if !m.certSatisfiesTemplateLocked() {
500-
return time.Now()
524+
return m.now()
501525
}
502526

503527
notAfter := m.cert.Leaf.NotAfter
@@ -523,13 +547,15 @@ var jitteryDuration = func(totalDuration float64) time.Duration {
523547
return wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3)
524548
}
525549

526-
// updateCached sets the most recent retrieved cert. It also sets the server
527-
// as assumed healthy.
528-
func (m *manager) updateCached(cert *tls.Certificate) {
550+
// updateCached sets the most recent retrieved cert and returns the old cert.
551+
// It also sets the server as assumed healthy.
552+
func (m *manager) updateCached(cert *tls.Certificate) *tls.Certificate {
529553
m.certAccessLock.Lock()
530554
defer m.certAccessLock.Unlock()
531555
m.serverHealth = true
556+
old := m.cert
532557
m.cert = cert
558+
return old
533559
}
534560

535561
// updateServerError takes an error returned by the server and infers

staging/src/k8s.io/client-go/util/certificate/certificate_manager_test.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,17 @@ func TestNewManagerNoRotation(t *testing.T) {
163163
}
164164
}
165165

166-
type gaugeMock struct {
166+
type metricMock struct {
167167
calls int
168168
lastValue float64
169169
}
170170

171-
func (g *gaugeMock) Set(v float64) {
171+
func (g *metricMock) Set(v float64) {
172+
g.calls++
173+
g.lastValue = v
174+
}
175+
176+
func (g *metricMock) Observe(v float64) {
172177
g.calls++
173178
g.lastValue = v
174179
}
@@ -195,7 +200,7 @@ func TestSetRotationDeadline(t *testing.T) {
195200

196201
for _, tc := range testCases {
197202
t.Run(tc.name, func(t *testing.T) {
198-
g := gaugeMock{}
203+
g := metricMock{}
199204
m := manager{
200205
cert: &tls.Certificate{
201206
Leaf: &x509.Certificate{
@@ -206,6 +211,7 @@ func TestSetRotationDeadline(t *testing.T) {
206211
getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} },
207212
usages: []certificates.KeyUsage{},
208213
certificateExpiration: &g,
214+
now: func() time.Time { return now },
209215
}
210216
jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
211217
lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
@@ -383,6 +389,7 @@ func TestCertSatisfiesTemplate(t *testing.T) {
383389
m := manager{
384390
cert: tlsCert,
385391
getTemplate: func() *x509.CertificateRequest { return tc.template },
392+
now: time.Now,
386393
}
387394

388395
result := m.certSatisfiesTemplate()
@@ -407,6 +414,7 @@ func TestRotateCertCreateCSRError(t *testing.T) {
407414
clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
408415
return fakeClient{failureType: createError}, nil
409416
},
417+
now: func() time.Time { return now },
410418
}
411419

412420
if success, err := m.rotateCerts(); success {
@@ -430,6 +438,7 @@ func TestRotateCertWaitingForResultError(t *testing.T) {
430438
clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
431439
return fakeClient{failureType: watchError}, nil
432440
},
441+
now: func() time.Time { return now },
433442
}
434443

435444
defer func(t time.Duration) { certificateWaitTimeout = t }(certificateWaitTimeout)
@@ -945,6 +954,40 @@ func TestServerHealth(t *testing.T) {
945954
}
946955
}
947956

957+
func TestRotationLogsDuration(t *testing.T) {
958+
h := metricMock{}
959+
now := time.Now()
960+
certIss := now.Add(-2 * time.Hour)
961+
m := manager{
962+
cert: &tls.Certificate{
963+
Leaf: &x509.Certificate{
964+
NotBefore: certIss,
965+
NotAfter: now.Add(-1 * time.Hour),
966+
},
967+
},
968+
certStore: &fakeStore{cert: expiredStoreCertData.certificate},
969+
getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} },
970+
clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
971+
return &fakeClient{
972+
certificatePEM: apiServerCertData.certificatePEM,
973+
}, nil
974+
},
975+
certificateRotation: &h,
976+
now: func() time.Time { return now },
977+
}
978+
ok, err := m.rotateCerts()
979+
if err != nil || !ok {
980+
t.Errorf("failed to rotate certs: %v", err)
981+
}
982+
if h.calls != 1 {
983+
t.Errorf("rotation metric was not called")
984+
}
985+
if h.lastValue != now.Sub(certIss).Seconds() {
986+
t.Errorf("rotation metric did not record the right value got: %f; want %f", h.lastValue, now.Sub(certIss).Seconds())
987+
}
988+
989+
}
990+
948991
type fakeClientFailureType int
949992

950993
const (

0 commit comments

Comments
 (0)