Skip to content

Commit f5e9cea

Browse files
hawkwtustvold
andauthored
trace: Apache Common Log Format access logging (#1319)
This branch builds on @tustvold's work in #601. The original PR description from that branch: > Access logging is very important functionality for my team as we wish > to maintain feature-parity with our existing AWS ALB-based approach. > This functionality was requested > [here](linkerd/linkerd2#1913) and was marked > as help wanted, so thought I'd take a stab at implementing it. > > Creating as a draft as still needs more testing and benchmarking, and > I'm new to tower and so might have made some rookie errors. However, I > wanted to create a draft as an opportunity to get some early feedback. > > The basic design consists of an AccessLogLayer that instruments both > requests and responses that flow through it, in a similar manner to > how handle_time is already computed. I'm not a massive fan of this, > but it was the only way I could easily see to get accurate processing > time metrics. I've tried to avoid any memory allocation on the hot > path, although there are possibly more atomic increments than I would > like. The performance impact with the feature disabled, i.e. > LINKERD2_PROXY_ACCESS_LOG_FILE, not set should be minimal. > > The results of this instrumentation are then sent over a mpsc channel > to an AccessLogCollector that writes them in a space-delimited format > to a file specified as an environment variable. It buffers in memory > and flushes on termination and on write if more than > FLUSH_TIMEOUT_SECS since the last flush. This makes the access logging > best effort much like AWS ALBs. > > An example deployment scenario using this functionality might deploy a > fluent-bit sidecar to ship the logs, or write to /dev/stderr and use a > log shipper deployed as a DaemonSet. The additional changes in this branch are: - Updated against the latest state of the `main` branch. - Changed the `tracing` configuration to use per-layer filtering, so that the access log layer _only_ sees access log spans, while the stdout logging layer does not see the access log spans (although, it _could_ if we wanted it to...) - Changed the format for outputting the access log to the Apache Common Log Format. Note that this format does *not* include all the data that the access log spans currently collect; I excluded that data so that the output is compatible with tools that ingest the Apache log format. In a follow-up PR, we can add the ability to control what format the access log is written in, and add an alternative format that includes all the access log data that Linkerd's spans can collect (I suggest newline-delimited JSON for this). Of course, a huge thank you to @tustvold for all their work on this; I only updated the branch with the latest changes and made some minor improvements. :) Co-authored-by: Raphael Taylor-Davies <[email protected]>
1 parent 8f7be6f commit f5e9cea

File tree

12 files changed

+466
-13
lines changed

12 files changed

+466
-13
lines changed

Cargo.lock

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,12 @@ version = "1.0.1"
537537
source = "registry+https://github.com/rust-lang/crates.io-index"
538538
checksum = "6456b8a6c8f33fee7d958fcd1b60d55b11940a79e63ae87013e6d22e26034440"
539539

540+
[[package]]
541+
name = "humantime"
542+
version = "2.1.0"
543+
source = "registry+https://github.com/rust-lang/crates.io-index"
544+
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
545+
540546
[[package]]
541547
name = "hyper"
542548
version = "0.14.16"
@@ -867,6 +873,7 @@ dependencies = [
867873
"libfuzzer-sys",
868874
"linkerd-app-core",
869875
"linkerd-app-test",
876+
"linkerd-http-access-log",
870877
"linkerd-io",
871878
"linkerd-meshtls",
872879
"linkerd-meshtls-rustls",
@@ -1061,6 +1068,22 @@ dependencies = [
10611068
"tokio",
10621069
]
10631070

1071+
[[package]]
1072+
name = "linkerd-http-access-log"
1073+
version = "0.1.0"
1074+
dependencies = [
1075+
"futures-core",
1076+
"http",
1077+
"humantime",
1078+
"linkerd-identity",
1079+
"linkerd-proxy-transport",
1080+
"linkerd-stack",
1081+
"linkerd-tls",
1082+
"linkerd-tracing",
1083+
"pin-project",
1084+
"tracing",
1085+
]
1086+
10641087
[[package]]
10651088
name = "linkerd-http-box"
10661089
version = "0.1.0"

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ members = [
2424
"linkerd/errno",
2525
"linkerd/error-respond",
2626
"linkerd/exp-backoff",
27+
"linkerd/http-access-log",
2728
"linkerd/http-box",
2829
"linkerd/http-classify",
2930
"linkerd/http-metrics",

linkerd/app/inbound/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ bytes = "1"
1414
http = "0.2"
1515
futures = { version = "0.3", default-features = false }
1616
linkerd-app-core = { path = "../core" }
17+
linkerd-http-access-log = { path = "../../http-access-log" }
1718
linkerd-server-policy = { path = "../../server-policy" }
1819
linkerd-tonic-watch = { path = "../../tonic-watch" }
1920
linkerd2-proxy-api = { version = "0.3", features = ["client", "inbound"] }

linkerd/app/inbound/src/http/server.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ use linkerd_app_core::{
1111
proxy::http,
1212
svc::{self, ExtractParam, Param},
1313
tls,
14-
transport::OrigDstAddr,
14+
transport::{ClientAddr, OrigDstAddr, Remote},
1515
Error, Result,
1616
};
17+
use linkerd_http_access_log::NewAccessLog;
1718
use tracing::debug_span;
1819

1920
#[derive(Copy, Clone, Debug)]
@@ -26,7 +27,8 @@ impl<H> Inbound<H> {
2627
+ Param<http::normalize_uri::DefaultAuthority>
2728
+ Param<tls::ConditionalServerTls>
2829
+ Param<ServerLabel>
29-
+ Param<OrigDstAddr>,
30+
+ Param<OrigDstAddr>
31+
+ Param<Remote<ClientAddr>>,
3032
T: Clone + Send + Unpin + 'static,
3133
I: io::AsyncRead + io::AsyncWrite + io::PeerAddr + Send + Unpin + 'static,
3234
H: svc::NewService<T, Service = HSvc> + Clone + Send + Sync + Unpin + 'static,
@@ -79,6 +81,7 @@ impl<H> Inbound<H> {
7981
.push(http::BoxResponse::layer()),
8082
)
8183
.check_new_service::<T, http::Request<_>>()
84+
.push(NewAccessLog::layer())
8285
.instrument(|t: &T| debug_span!("http", v = %Param::<Version>::param(t)))
8386
.push(http::NewServeHttp::layer(h2_settings, rt.drain.clone()))
8487
.push_on_service(svc::BoxService::layer())

linkerd/http-access-log/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "linkerd-http-access-log"
3+
version = "0.1.0"
4+
authors = ["Linkerd Developers <[email protected]>"]
5+
license = "Apache-2.0"
6+
edition = "2018"
7+
publish = false
8+
9+
[dependencies]
10+
futures-core = "0.3"
11+
http = "0.2"
12+
humantime = "2"
13+
pin-project = "1"
14+
linkerd-stack = { path = "../stack" }
15+
linkerd-identity = { path = "../identity" }
16+
linkerd-tls = { path = "../tls" }
17+
linkerd-proxy-transport = { path = "../proxy/transport" }
18+
linkerd-tracing = { path = "../tracing" }
19+
tracing = "0.1.19"

linkerd/http-access-log/src/lib.rs

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
#![deny(warnings, rust_2018_idioms)]
2+
#![forbid(unsafe_code)]
3+
4+
use futures_core::TryFuture;
5+
use linkerd_identity as identity;
6+
use linkerd_proxy_transport::{ClientAddr, Remote};
7+
use linkerd_stack as svc;
8+
use linkerd_tls as tls;
9+
use linkerd_tracing::access_log::TRACE_TARGET;
10+
use pin_project::pin_project;
11+
use std::{
12+
future::Future,
13+
net::SocketAddr,
14+
pin::Pin,
15+
task::{Context, Poll},
16+
time::{Duration, Instant, SystemTime},
17+
};
18+
use svc::{NewService, Param};
19+
use tracing::{field, span, Level, Span};
20+
21+
#[derive(Clone, Debug)]
22+
pub struct NewAccessLog<N> {
23+
inner: N,
24+
}
25+
26+
#[derive(Clone, Debug)]
27+
pub struct AccessLogContext<S> {
28+
inner: S,
29+
client_addr: SocketAddr,
30+
client_id: Option<identity::Name>,
31+
}
32+
33+
struct ResponseFutureInner {
34+
span: Span,
35+
start: Instant,
36+
processing: Duration,
37+
}
38+
39+
#[pin_project]
40+
pub struct AccessLogFuture<F> {
41+
data: Option<ResponseFutureInner>,
42+
43+
#[pin]
44+
inner: F,
45+
}
46+
47+
impl<N> NewAccessLog<N> {
48+
/// Returns a new `NewAccessLog` layer that wraps an inner service with
49+
/// access logging middleware.
50+
///
51+
/// The access log is recorded by adding a `tracing` span to the service's
52+
/// future. If access logging is not enabled by the `tracing` subscriber,
53+
/// this span will never be enabled, and it can be skipped cheaply. When
54+
/// access logging *is* enabled, additional data will be recorded when the
55+
/// response future completes.
56+
///
57+
/// Recording the access log will introduce additional overhead in the
58+
/// request path, but this is largely avoided when access logging is not
59+
/// enabled.
60+
#[inline]
61+
pub fn layer() -> impl svc::layer::Layer<N, Service = Self> {
62+
svc::layer::mk(|inner| NewAccessLog { inner })
63+
}
64+
}
65+
66+
impl<N, T> NewService<T> for NewAccessLog<N>
67+
where
68+
T: Param<tls::ConditionalServerTls> + Param<Remote<ClientAddr>>,
69+
N: NewService<T>,
70+
{
71+
type Service = AccessLogContext<N::Service>;
72+
73+
fn new_service(&self, target: T) -> Self::Service {
74+
let Remote(ClientAddr(client_addr)) = target.param();
75+
let tls: tls::ConditionalServerTls = target.param();
76+
let client_id = tls
77+
.value()
78+
.and_then(|tls| tls.client_id().map(|tls::ClientId(name)| name.clone()));
79+
let inner = self.inner.new_service(target);
80+
AccessLogContext {
81+
inner,
82+
client_addr,
83+
client_id,
84+
}
85+
}
86+
}
87+
88+
impl<S, B1, B2> svc::Service<http::Request<B1>> for AccessLogContext<S>
89+
where
90+
S: svc::Service<http::Request<B1>, Response = http::Response<B2>>,
91+
{
92+
type Response = S::Response;
93+
type Error = S::Error;
94+
type Future = AccessLogFuture<S::Future>;
95+
96+
#[inline]
97+
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), S::Error>> {
98+
self.inner.poll_ready(cx)
99+
}
100+
101+
fn call(&mut self, request: http::Request<B1>) -> Self::Future {
102+
let get_header = |name: http::header::HeaderName| {
103+
request
104+
.headers()
105+
.get(name)
106+
.and_then(|x| x.to_str().ok())
107+
.unwrap_or_default()
108+
};
109+
110+
let trace_id = || {
111+
let headers = request.headers();
112+
headers
113+
.get("x-b3-traceid")
114+
.or_else(|| headers.get("x-request-id"))
115+
.or_else(|| headers.get("x-amzn-trace-id"))
116+
.and_then(|x| x.to_str().ok())
117+
.unwrap_or_default()
118+
};
119+
120+
let span = span!(target: TRACE_TARGET, Level::INFO, "http",
121+
client.addr = %self.client_addr,
122+
client.id = self.client_id.as_ref().map(|n| n.as_str()).unwrap_or("-"),
123+
timestamp = %now(),
124+
method = request.method().as_str(),
125+
uri = %request.uri(),
126+
version = ?request.version(),
127+
trace_id = trace_id(),
128+
request_bytes = get_header(http::header::CONTENT_LENGTH),
129+
status = field::Empty,
130+
response_bytes = field::Empty,
131+
total_ns = field::Empty,
132+
processing_ns = field::Empty,
133+
user_agent = get_header(http::header::USER_AGENT),
134+
host = get_header(http::header::HOST),
135+
);
136+
137+
// The access log span is only enabled by the `tracing` subscriber if
138+
// access logs are being recorded. If it's disabled, we can skip
139+
// recording additional data in the response future.
140+
if span.is_disabled() {
141+
return AccessLogFuture {
142+
data: None,
143+
inner: self.inner.call(request),
144+
};
145+
}
146+
147+
AccessLogFuture {
148+
data: Some(ResponseFutureInner {
149+
span,
150+
start: Instant::now(),
151+
processing: Duration::from_secs(0),
152+
}),
153+
inner: self.inner.call(request),
154+
}
155+
}
156+
}
157+
158+
impl<F, B2> Future for AccessLogFuture<F>
159+
where
160+
F: TryFuture<Ok = http::Response<B2>>,
161+
{
162+
type Output = Result<F::Ok, F::Error>;
163+
164+
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
165+
let mut this = self.project();
166+
167+
let data: &mut ResponseFutureInner = match &mut this.data {
168+
Some(data) => data,
169+
None => return this.inner.try_poll(cx),
170+
};
171+
172+
let _enter = data.span.enter();
173+
let poll_start = Instant::now();
174+
175+
let response: http::Response<B2> = match this.inner.try_poll(cx) {
176+
Poll::Pending => {
177+
data.processing += Instant::now().duration_since(poll_start);
178+
return Poll::Pending;
179+
}
180+
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
181+
Poll::Ready(Ok(response)) => response,
182+
};
183+
184+
let now = Instant::now();
185+
let total_ns = now.duration_since(data.start).as_nanos();
186+
let processing_ns = (now.duration_since(poll_start) + data.processing).as_nanos();
187+
188+
let span = &data.span;
189+
190+
response
191+
.headers()
192+
.get(http::header::CONTENT_LENGTH)
193+
.and_then(|x| x.to_str().ok())
194+
.map(|x| span.record("response_bytes", &x));
195+
196+
span.record("status", &response.status().as_u16());
197+
span.record("total_ns", &field::display(total_ns));
198+
span.record("processing_ns", &field::display(processing_ns));
199+
200+
Poll::Ready(Ok(response))
201+
}
202+
}
203+
204+
#[inline]
205+
fn now() -> humantime::Rfc3339Timestamp {
206+
humantime::format_rfc3339(SystemTime::now())
207+
}

linkerd/http-retry/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -938,7 +938,7 @@ mod tests {
938938
tx: Tx(tx),
939939
initial,
940940
replay,
941-
_trace: linkerd_tracing::test::with_default_filter("linkerd_http_retry=debug"),
941+
_trace: linkerd_tracing::test::with_default_filter("linkerd_http_retry=debug").0,
942942
}
943943
}
944944
}

linkerd/tls/src/server.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,17 @@ impl fmt::Display for NoServerTls {
294294
}
295295
}
296296

297+
// === impl ServerTls ===
298+
299+
impl ServerTls {
300+
pub fn client_id(&self) -> Option<&ClientId> {
301+
match self {
302+
ServerTls::Established { ref client_id, .. } => client_id.as_ref(),
303+
_ => None,
304+
}
305+
}
306+
}
307+
297308
#[cfg(test)]
298309
mod tests {
299310
use super::*;

linkerd/tracing/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,6 @@ tracing-log = "0.1.2"
1818

1919
[dependencies.tracing-subscriber]
2020
version = "0.3"
21-
features = ["env-filter","smallvec", "tracing-log", "json", "parking_lot"]
21+
default-features = false
22+
features = ["env-filter", "fmt", "smallvec", "tracing-log", "json", "parking_lot", "registry"]
2223

0 commit comments

Comments
 (0)