Skip to content

Commit 1be301f

Browse files
authored
identity: add identity_cert_refresh_count metric (#788)
This commit adds a new `identity_cert_refresh_count` counter metric, which tracks the number of times the proxy's identity certificate has been successfully refreshed by the Identity service. This change was fairly straightforward, but I did make a few internal changes so that the counter could be passed into the daemon task. This resulted in a couple things moving around, but the moved code is largely unchanged. In the future we may want to count cert refresh errors as well, possibly adding labels for successes/failures, or a new metric tracking errors.
1 parent 28084c6 commit 1be301f

File tree

3 files changed

+141
-101
lines changed

3 files changed

+141
-101
lines changed

linkerd/app/src/identity.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,18 @@ impl Config {
4646
match self {
4747
Config::Disabled => Ok(Identity::Disabled),
4848
Config::Enabled { control, certify } => {
49-
let (local, crt_store) = Local::new(&certify);
49+
let (local, daemon) = Local::new(&certify);
5050

5151
let addr = control.addr.clone();
52-
let svc = control.build(
53-
dns,
54-
metrics,
55-
tls::Conditional::Some(certify.trust_anchors.clone()),
56-
);
52+
let svc =
53+
control.build(dns, metrics, tls::Conditional::Some(certify.trust_anchors));
5754

5855
// Save to be spawned on an auxiliary runtime.
5956
let task = {
6057
let addr = addr.clone();
6158
Box::pin(async move {
6259
debug!(peer.addr = ?addr, "running");
63-
certify::daemon(certify, crt_store, svc).await
60+
daemon.run(svc).await
6461
})
6562
};
6663

linkerd/proxy/identity/src/certify.rs

Lines changed: 95 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::{Crt, CrtKey, Csr, Key, Name, TokenSource, TrustAnchors};
22
use http_body::Body as HttpBody;
33
use linkerd2_error::Error;
4+
use linkerd2_metrics::Counter;
45
use linkerd2_proxy_api::identity as api;
56
use linkerd2_proxy_transport::tls;
67
use pin_project::pin_project;
@@ -37,6 +38,7 @@ pub struct Local {
3738
trust_anchors: TrustAnchors,
3839
name: Name,
3940
crt_key: watch::Receiver<Option<CrtKey>>,
41+
refreshes: Arc<Counter>,
4042
}
4143

4244
/// Produces a `Local` identity once a certificate is available.
@@ -48,74 +50,14 @@ pub struct LostDaemon;
4850

4951
pub type CrtKeySender = watch::Sender<Option<CrtKey>>;
5052

51-
pub async fn daemon<T>(config: Config, crt_key_watch: watch::Sender<Option<CrtKey>>, client: T)
52-
where
53-
T: GrpcService<BoxBody>,
54-
T::ResponseBody: Send + 'static,
55-
<T::ResponseBody as Body>::Data: Send,
56-
<T::ResponseBody as HttpBody>::Error: Into<Error> + Send,
57-
{
58-
let mut curr_expiry = UNIX_EPOCH;
59-
let mut client = api::identity_client::IdentityClient::new(client);
60-
61-
loop {
62-
match config.token.load() {
63-
Ok(token) => {
64-
let req = grpc::Request::new(api::CertifyRequest {
65-
token,
66-
identity: config.local_name.as_ref().to_owned(),
67-
certificate_signing_request: config.csr.to_vec(),
68-
});
69-
trace!("daemon certifying");
70-
let rsp = client.certify(req).await;
71-
match rsp {
72-
Err(e) => error!("Failed to certify identity: {}", e),
73-
Ok(rsp) => {
74-
let api::CertifyResponse {
75-
leaf_certificate,
76-
intermediate_certificates,
77-
valid_until,
78-
} = rsp.into_inner();
79-
match valid_until.and_then(|d| SystemTime::try_from(d).ok()) {
80-
None => {
81-
error!("Identity service did not specify a certificate expiration.")
82-
}
83-
Some(expiry) => {
84-
let key = config.key.clone();
85-
let crt = Crt::new(
86-
config.local_name.clone(),
87-
leaf_certificate,
88-
intermediate_certificates,
89-
expiry,
90-
);
91-
92-
match config.trust_anchors.certify(key, crt) {
93-
Err(e) => {
94-
error!("Received invalid ceritficate: {}", e);
95-
}
96-
Ok(crt_key) => {
97-
debug!("daemon certified until {:?}", expiry);
98-
if crt_key_watch.send(Some(crt_key)).is_err() {
99-
// If we can't store a value, than all observations
100-
// have been dropped and we can stop refreshing.
101-
return;
102-
}
103-
104-
curr_expiry = expiry;
105-
}
106-
}
107-
}
108-
}
109-
}
110-
}
111-
}
112-
Err(e) => error!("Failed to read authentication token: {}", e),
113-
}
114-
config.refresh(curr_expiry).await;
115-
}
53+
#[derive(Debug)]
54+
pub struct Daemon {
55+
crt_key_watch: CrtKeySender,
56+
refreshes: Arc<linkerd2_metrics::Counter>,
57+
config: Config,
11658
}
11759

118-
// // === impl Config ===
60+
// === impl Config ===
11961

12062
impl Config {
12163
/// Returns a future that fires when a refresh should occur.
@@ -138,17 +80,101 @@ impl Config {
13880
}
13981
}
14082

83+
// === impl Daemon ===
84+
85+
impl Daemon {
86+
pub async fn run<T>(self, client: T)
87+
where
88+
T: GrpcService<BoxBody>,
89+
T::ResponseBody: Send + 'static,
90+
<T::ResponseBody as Body>::Data: Send,
91+
<T::ResponseBody as HttpBody>::Error: Into<Error> + Send,
92+
{
93+
let Self {
94+
config,
95+
crt_key_watch,
96+
refreshes,
97+
} = self;
98+
let mut curr_expiry = UNIX_EPOCH;
99+
let mut client = api::identity_client::IdentityClient::new(client);
100+
101+
loop {
102+
match config.token.load() {
103+
Ok(token) => {
104+
let req = grpc::Request::new(api::CertifyRequest {
105+
token,
106+
identity: config.local_name.as_ref().to_owned(),
107+
certificate_signing_request: config.csr.to_vec(),
108+
});
109+
trace!("daemon certifying");
110+
let rsp = client.certify(req).await;
111+
match rsp {
112+
Err(e) => error!("Failed to certify identity: {}", e),
113+
Ok(rsp) => {
114+
let api::CertifyResponse {
115+
leaf_certificate,
116+
intermediate_certificates,
117+
valid_until,
118+
} = rsp.into_inner();
119+
match valid_until.and_then(|d| SystemTime::try_from(d).ok()) {
120+
None => error!(
121+
"Identity service did not specify a certificate expiration."
122+
),
123+
Some(expiry) => {
124+
let key = config.key.clone();
125+
let crt = Crt::new(
126+
config.local_name.clone(),
127+
leaf_certificate,
128+
intermediate_certificates,
129+
expiry,
130+
);
131+
132+
match config.trust_anchors.certify(key, crt) {
133+
Err(e) => {
134+
error!("Received invalid ceritficate: {}", e);
135+
}
136+
Ok(crt_key) => {
137+
debug!("daemon certified until {:?}", expiry);
138+
if crt_key_watch.send(Some(crt_key)).is_err() {
139+
// If we can't store a value, than all observations
140+
// have been dropped and we can stop refreshing.
141+
return;
142+
}
143+
144+
refreshes.incr();
145+
curr_expiry = expiry;
146+
}
147+
}
148+
}
149+
}
150+
}
151+
}
152+
}
153+
Err(e) => error!("Failed to read authentication token: {}", e),
154+
}
155+
config.refresh(curr_expiry).await;
156+
}
157+
}
158+
}
159+
141160
// === impl Local ===
142161

143162
impl Local {
144-
pub fn new(config: &Config) -> (Self, CrtKeySender) {
163+
pub fn new(config: &Config) -> (Self, Daemon) {
145164
let (s, w) = watch::channel(None);
165+
let refreshes = Arc::new(Counter::new());
146166
let l = Local {
147167
name: config.local_name.clone(),
148168
trust_anchors: config.trust_anchors.clone(),
149169
crt_key: w,
170+
refreshes: refreshes.clone(),
171+
};
172+
let daemon = Daemon {
173+
config: config.clone(),
174+
refreshes,
175+
crt_key_watch: s,
150176
};
151-
(l, s)
177+
(l, daemon)
152178
}
153179

154180
pub fn name(&self) -> &Name {
@@ -166,7 +192,7 @@ impl Local {
166192
}
167193

168194
pub fn metrics(&self) -> crate::metrics::Report {
169-
crate::metrics::Report::new(self.crt_key.clone())
195+
crate::metrics::Report::new(self.crt_key.clone(), self.refreshes.clone())
170196
}
171197
}
172198

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,70 @@
11
use crate::CrtKey;
2-
use linkerd2_metrics::{metrics, FmtMetrics, Gauge};
3-
use std::{fmt, time::UNIX_EPOCH};
2+
use linkerd2_metrics::{metrics, Counter, FmtMetrics, Gauge};
3+
use std::{fmt, sync::Arc, time::UNIX_EPOCH};
44
use tokio::sync::watch;
55

66
#[derive(Debug, Clone)]
77
pub struct Report {
8-
crt_key_watch: Option<watch::Receiver<Option<CrtKey>>>,
8+
inner: Option<Inner>,
99
}
1010

1111
metrics! {
1212
identity_cert_expiration_timestamp_seconds: Gauge {
13-
"Time when the this proxy's current mTLS identity certificate will expire (in seconds since the UNIX epoch)"
13+
"Time when the this proxy's current mTLS identity certificate will expire (in seconds since the UNIX epoch)."
14+
},
15+
16+
identity_cert_refresh_count: Counter {
17+
"The total number of times this proxy's mTLS identity certificate has been refreshed by the Identity service."
1418
}
1519
}
1620

1721
impl Report {
18-
pub(crate) fn new(watch: watch::Receiver<Option<CrtKey>>) -> Self {
22+
pub(crate) fn new(
23+
crt_key_watch: watch::Receiver<Option<CrtKey>>,
24+
refreshes: Arc<Counter>,
25+
) -> Self {
1926
Self {
20-
crt_key_watch: Some(watch),
27+
inner: Some(Inner {
28+
crt_key_watch,
29+
refreshes,
30+
}),
2131
}
2232
}
2333

2434
pub fn disabled() -> Self {
25-
Self {
26-
crt_key_watch: None,
27-
}
35+
Self { inner: None }
2836
}
2937
}
3038

39+
#[derive(Debug, Clone)]
40+
struct Inner {
41+
crt_key_watch: watch::Receiver<Option<CrtKey>>,
42+
refreshes: Arc<Counter>,
43+
}
44+
3145
impl FmtMetrics for Report {
3246
fn fmt_metrics(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33-
let dur = if let Some(watch) = self.crt_key_watch.as_ref() {
34-
if let Some(ref crt_key) = *(watch.borrow()) {
35-
crt_key
36-
.expiry()
37-
.duration_since(UNIX_EPOCH)
38-
.map_err(|error| {
39-
tracing::warn!(%error, "an identity would expire before the beginning of the UNIX epoch, something is probably wrong");
40-
fmt::Error
41-
})?
42-
} else {
43-
return Ok(());
44-
}
45-
} else {
46-
return Ok(());
47+
let this = match self.inner.as_ref() {
48+
Some(inner) => inner,
49+
None => return Ok(()),
4750
};
4851

49-
identity_cert_expiration_timestamp_seconds.fmt_help(f)?;
50-
identity_cert_expiration_timestamp_seconds.fmt_metric(f, &Gauge::from(dur.as_secs()))?;
52+
if let Some(ref crt_key) = *(this.crt_key_watch.borrow()) {
53+
let dur = crt_key
54+
.expiry()
55+
.duration_since(UNIX_EPOCH)
56+
.map_err(|error| {
57+
tracing::warn!(%error, "an identity would expire before the beginning of the UNIX epoch, something is probably wrong");
58+
fmt::Error
59+
})?;
60+
identity_cert_expiration_timestamp_seconds.fmt_help(f)?;
61+
identity_cert_expiration_timestamp_seconds
62+
.fmt_metric(f, &Gauge::from(dur.as_secs()))?;
63+
}
64+
65+
identity_cert_refresh_count.fmt_help(f)?;
66+
identity_cert_refresh_count.fmt_metric(f, &this.refreshes)?;
67+
5168
Ok(())
5269
}
5370
}

0 commit comments

Comments
 (0)