@@ -113,8 +113,17 @@ type Config struct {
113
113
// quickly replaced with a unique cert/key pair.
114
114
BootstrapKeyPEM []byte
115
115
// CertificateExpiration will record a metric that shows the remaining
116
- // lifetime of the certificate.
116
+ // lifetime of the certificate. This metric is a gauge because only the
117
+ // current cert expiry time is really useful. Reading this metric at any
118
+ // time simply gives the next expiration date, no need to keep some
119
+ // history (histogram) of all previous expiry dates.
117
120
CertificateExpiration Gauge
121
+ // CertificateRotation will record a metric showing the time in seconds
122
+ // that certificates lived before being rotated. This metric is a histogram
123
+ // because there is value in keeping a history of rotation cadences. It
124
+ // allows one to setup monitoring and alerting of unexpected rotation
125
+ // behavior and track trends in rotation frequency.
126
+ CertificateRotation Histogram
118
127
}
119
128
120
129
// Store is responsible for getting and updating the current certificate.
@@ -139,6 +148,12 @@ type Gauge interface {
139
148
Set (float64 )
140
149
}
141
150
151
+ // Histogram will record the time a rotated certificate was used before being
152
+ // rotated.
153
+ type Histogram interface {
154
+ Observe (float64 )
155
+ }
156
+
142
157
// NoCertKeyError indicates there is no cert/key currently available.
143
158
type NoCertKeyError string
144
159
@@ -163,6 +178,7 @@ type manager struct {
163
178
certStore Store
164
179
165
180
certificateExpiration Gauge
181
+ certificateRotation Histogram
166
182
167
183
// the following variables must only be accessed under certAccessLock
168
184
certAccessLock sync.RWMutex
@@ -174,6 +190,9 @@ type manager struct {
174
190
clientFn CSRClientFunc
175
191
stopCh chan struct {}
176
192
stopped bool
193
+
194
+ // Set to time.Now but can be stubbed out for testing
195
+ now func () time.Time
177
196
}
178
197
179
198
// NewManager returns a new certificate manager. A certificate manager is
@@ -203,6 +222,8 @@ func NewManager(config *Config) (Manager, error) {
203
222
cert : cert ,
204
223
forceRotation : forceRotation ,
205
224
certificateExpiration : config .CertificateExpiration ,
225
+ certificateRotation : config .CertificateRotation ,
226
+ now : time .Now ,
206
227
}
207
228
208
229
return & m , nil
@@ -215,7 +236,7 @@ func NewManager(config *Config) (Manager, error) {
215
236
func (m * manager ) Current () * tls.Certificate {
216
237
m .certAccessLock .RLock ()
217
238
defer m .certAccessLock .RUnlock ()
218
- if m .cert != nil && m .cert .Leaf != nil && time . Now ().After (m .cert .Leaf .NotAfter ) {
239
+ if m .cert != nil && m .cert .Leaf != nil && m . now ().After (m .cert .Leaf .NotAfter ) {
219
240
klog .V (2 ).Infof ("Current certificate is expired." )
220
241
return nil
221
242
}
@@ -256,7 +277,7 @@ func (m *manager) Start() {
256
277
templateChanged := make (chan struct {})
257
278
go wait .Until (func () {
258
279
deadline := m .nextRotationDeadline ()
259
- if sleepInterval := deadline .Sub (time . Now ()); sleepInterval > 0 {
280
+ if sleepInterval := deadline .Sub (m . now ()); sleepInterval > 0 {
260
281
klog .V (2 ).Infof ("Waiting %v for next certificate rotation" , sleepInterval )
261
282
262
283
timer := time .NewTimer (sleepInterval )
@@ -421,7 +442,10 @@ func (m *manager) rotateCerts() (bool, error) {
421
442
return false , nil
422
443
}
423
444
424
- m .updateCached (cert )
445
+ if old := m .updateCached (cert ); old != nil && m .certificateRotation != nil {
446
+ m .certificateRotation .Observe (m .now ().Sub (old .Leaf .NotBefore ).Seconds ())
447
+ }
448
+
425
449
return true , nil
426
450
}
427
451
@@ -490,14 +514,14 @@ func (m *manager) nextRotationDeadline() time.Time {
490
514
// forceRotation is not protected by locks
491
515
if m .forceRotation {
492
516
m .forceRotation = false
493
- return time . Now ()
517
+ return m . now ()
494
518
}
495
519
496
520
m .certAccessLock .RLock ()
497
521
defer m .certAccessLock .RUnlock ()
498
522
499
523
if ! m .certSatisfiesTemplateLocked () {
500
- return time . Now ()
524
+ return m . now ()
501
525
}
502
526
503
527
notAfter := m .cert .Leaf .NotAfter
@@ -523,13 +547,15 @@ var jitteryDuration = func(totalDuration float64) time.Duration {
523
547
return wait .Jitter (time .Duration (totalDuration ), 0.2 ) - time .Duration (totalDuration * 0.3 )
524
548
}
525
549
526
- // updateCached sets the most recent retrieved cert. It also sets the server
527
- // as assumed healthy.
528
- func (m * manager ) updateCached (cert * tls.Certificate ) {
550
+ // updateCached sets the most recent retrieved cert and returns the old cert.
551
+ // It also sets the server as assumed healthy.
552
+ func (m * manager ) updateCached (cert * tls.Certificate ) * tls. Certificate {
529
553
m .certAccessLock .Lock ()
530
554
defer m .certAccessLock .Unlock ()
531
555
m .serverHealth = true
556
+ old := m .cert
532
557
m .cert = cert
558
+ return old
533
559
}
534
560
535
561
// updateServerError takes an error returned by the server and infers
0 commit comments