Skip to content

Commit 988b620

Browse files
committed
feat(app/inbound): introduce request duration metrics
this commit introduces a new middleware layer to the inbound proxy. this instruments inbound traffic with Prometheus telemetry that records request body latency, and emits a histogram of request body durations. as in #4420, the buckets are chosen to mimic the request and response buckets emitted by the outbound proxy, with their granularity flipped. in other words, the inbound proxy is more interested in fine-grained request body metrics than response body metrics, while the outbound proxy is more interested in fine-grained response body metrics than request body metrics. * #4420 Signed-off-by: katelyn martin <kate@buoyant.io>
1 parent 6be67e0 commit 988b620

File tree

3 files changed

+134
-11
lines changed

3 files changed

+134
-11
lines changed

linkerd/app/inbound/src/http/router/metrics.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@ use crate::InboundMetrics;
22
use linkerd_app_core::svc;
33

44
pub use self::{
5-
count_reqs::*, labels::RouteLabels, req_body::*, rsp_body::*, rsp_duration::*, status::*,
5+
count_reqs::*, labels::RouteLabels, req_body::*, req_duration::*, rsp_body::*, rsp_duration::*,
6+
status::*,
67
};
78

89
mod count_reqs;
910
mod labels;
1011
mod req_body;
12+
mod req_duration;
1113
mod rsp_body;
1214
mod rsp_duration;
1315
mod status;
@@ -16,6 +18,7 @@ pub(super) fn layer<N>(
1618
InboundMetrics {
1719
request_count,
1820
request_body_data,
21+
request_duration,
1922
response_body_data,
2023
response_duration,
2124
status_codes,
@@ -34,6 +37,11 @@ pub(super) fn layer<N>(
3437
NewResponseDuration::layer_via(extract)
3538
};
3639

40+
let request_duration = {
41+
let extract = ExtractRequestDurationMetrics(request_duration.clone());
42+
NewRequestDuration::layer_via(extract)
43+
};
44+
3745
let response_body = {
3846
let extract = ExtractResponseBodyDataMetrics::new(response_body_data.clone());
3947
NewRecordResponseBodyData::layer_via(extract)
@@ -50,15 +58,17 @@ pub(super) fn layer<N>(
5058
};
5159

5260
svc::layer::mk(move |inner| {
53-
count.layer(
54-
response_duration.layer(response_body.layer(request_body.layer(status.layer(inner)))),
55-
)
61+
count.layer(response_duration.layer(
62+
request_duration.layer(response_body.layer(request_body.layer(status.layer(inner)))),
63+
))
5664
})
5765
}
5866

5967
/// An `N`-typed service instrumented with metrics middleware.
6068
type Instrumented<N> = NewCountRequests<
6169
NewResponseDuration<
62-
NewRecordResponseBodyData<NewRecordRequestBodyData<NewRecordStatusCode<N>>>,
70+
NewRequestDuration<
71+
NewRecordResponseBodyData<NewRecordRequestBodyData<NewRecordStatusCode<N>>>,
72+
>,
6373
>,
6474
>;
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
use super::RouteLabels;
2+
use crate::policy::PermitVariant;
3+
use linkerd_app_core::{
4+
metrics::prom::{self, EncodeLabelSetMut},
5+
svc,
6+
};
7+
use linkerd_http_prom::{
8+
record_response::{self, Params},
9+
stream_label::with::MkWithLabels,
10+
};
11+
12+
pub type NewRequestDuration<N> =
13+
record_response::NewRequestDuration<MkLabelDuration, ExtractRequestDurationMetrics, N>;
14+
15+
pub type RequestDurationParams =
16+
Params<MkLabelDuration, record_response::RequestMetrics<RequestDurationLabels>>;
17+
18+
#[derive(Clone, Debug)]
19+
pub struct ExtractRequestDurationMetrics(pub RequestDurationFamilies);
20+
21+
#[derive(Clone, Debug)]
22+
pub struct RequestDurationFamilies {
23+
grpc: record_response::RequestMetrics<RequestDurationLabels>,
24+
http: record_response::RequestMetrics<RequestDurationLabels>,
25+
}
26+
27+
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
28+
pub struct RequestDurationLabels {
29+
route: RouteLabels,
30+
}
31+
32+
pub type MkLabelDuration = MkWithLabels<RequestDurationLabels>;
33+
34+
// === impl RequestDurationFamilies ===
35+
36+
impl RequestDurationFamilies {
37+
/// Registers a new [`RequestDurationFamilies`] with the given registry.
38+
pub fn register(
39+
reg: &mut prom::Registry,
40+
histo: impl Clone + IntoIterator<Item = f64>,
41+
) -> Self {
42+
let grpc = {
43+
let reg = reg.sub_registry_with_prefix("grpc");
44+
record_response::RequestMetrics::register(reg, histo.clone())
45+
};
46+
47+
let http = {
48+
let reg = reg.sub_registry_with_prefix("http");
49+
record_response::RequestMetrics::register(reg, histo)
50+
};
51+
52+
Self { grpc, http }
53+
}
54+
}
55+
56+
// === impl ExtractRequestDurationMetrics ===
57+
58+
impl<T> svc::ExtractParam<RequestDurationParams, T> for ExtractRequestDurationMetrics
59+
where
60+
T: svc::Param<PermitVariant> + svc::Param<RouteLabels>,
61+
{
62+
fn extract_param(&self, target: &T) -> RequestDurationParams {
63+
let Self(families) = self;
64+
65+
let labeler = {
66+
let route: RouteLabels = target.param();
67+
let labels = RequestDurationLabels { route };
68+
MkLabelDuration::new(labels)
69+
};
70+
71+
let metric = {
72+
let variant: PermitVariant = target.param();
73+
let RequestDurationFamilies { grpc, http } = families;
74+
match variant {
75+
PermitVariant::Grpc => grpc,
76+
PermitVariant::Http => http,
77+
}
78+
.clone()
79+
};
80+
81+
RequestDurationParams { labeler, metric }
82+
}
83+
}
84+
85+
// === impl RequestDurationLabels ===
86+
87+
impl prom::EncodeLabelSetMut for RequestDurationLabels {
88+
fn encode_label_set(
89+
&self,
90+
encoder: &mut prom::encoding::LabelSetEncoder<'_>,
91+
) -> std::fmt::Result {
92+
let Self { route } = self;
93+
route.encode_label_set(encoder)?;
94+
Ok(())
95+
}
96+
}
97+
98+
impl prom::encoding::EncodeLabelSet for RequestDurationLabels {
99+
fn encode(&self, mut encoder: prom::encoding::LabelSetEncoder<'_>) -> std::fmt::Result {
100+
self.encode_label_set(&mut encoder)
101+
}
102+
}

linkerd/app/inbound/src/metrics.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ pub(crate) mod authz;
1212
pub(crate) mod error;
1313

1414
use crate::http::router::{
15-
RequestBodyFamilies, RequestCountFamilies, ResponseBodyFamilies, ResponseDurationFamilies,
16-
StatusCodeFamilies,
15+
RequestBodyFamilies, RequestCountFamilies, RequestDurationFamilies, ResponseBodyFamilies,
16+
ResponseDurationFamilies, StatusCodeFamilies,
1717
};
1818
pub use linkerd_app_core::metrics::*;
1919

@@ -34,6 +34,7 @@ pub struct InboundMetrics {
3434
pub direct: crate::direct::MetricsFamilies,
3535
pub request_count: RequestCountFamilies,
3636
pub request_body_data: RequestBodyFamilies,
37+
pub request_duration: RequestDurationFamilies,
3738
pub response_body_data: ResponseBodyFamilies,
3839
pub response_duration: ResponseDurationFamilies,
3940
pub status_codes: StatusCodeFamilies,
@@ -48,6 +49,8 @@ impl InboundMetrics {
4849
);
4950
let request_count = RequestCountFamilies::register(reg);
5051
let request_body_data = RequestBodyFamilies::register(reg);
52+
let request_duration =
53+
RequestDurationFamilies::register(reg, Self::REQUEST_BUCKETS.iter().copied());
5154
let response_body_data = ResponseBodyFamilies::register(reg);
5255
let response_duration =
5356
ResponseDurationFamilies::register(reg, Self::RESPONSE_BUCKETS.iter().copied());
@@ -63,24 +66,32 @@ impl InboundMetrics {
6366
direct,
6467
request_count,
6568
request_body_data,
69+
request_duration,
6670
response_body_data,
6771
response_duration,
6872
status_codes,
6973
}
7074
}
7175

7276
// There are two histograms for which we need to register metrics:
73-
// (1) request durations, which are measured on routes. TODO(kate): forthcoming.
77+
// (1) request durations, which are measured on routes.
7478
// (2) response durations, which are measured on route-backends.
7579
//
7680
// Should these change in the future, be sure to consider the outbound proxy's corresponding
7781
// constants measuring request and response latency for *outgoing* traffic.
7882

83+
/// Histogram buckets for request latency.
84+
///
85+
/// Because response duration is the more meaningful metric operationally for the inbound
86+
/// proxy, we opt to preserve higher fidelity for request durations (especially for lower
87+
/// values).
88+
const REQUEST_BUCKETS: &'static [f64] = &[0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 10.0];
89+
7990
/// Histogram buckets for response latency.
8091
///
81-
/// These buckets for this histogram are coarse, eliding several buckets for short response
82-
/// durations to be conservative about the costs of tracking two histograms' respective time
83-
/// series.
92+
/// These buckets for this histogram are coarse than those of [`Self::REQUEST_BUCKETS`],
93+
/// eliding several buckets for short response durations to be conservative about the costs of
94+
/// tracking two histograms' respective time series.
8495
const RESPONSE_BUCKETS: &'static [f64] = &[0.05, 0.5, 1.0, 10.0];
8596
}
8697

0 commit comments

Comments
 (0)