Skip to content

Commit d5fe0e6

Browse files
authored
admin: add an endpoint to dump spawned Tokio tasks (#595)
## Motivation When debugging proxy issues, it can be useful to inspect the list of currently spawned Tokio tasks and their states. This can be used similarly to the thread or coroutine dumps provided by other languages' runtimes. ## Solution This branch adds a new endpoint to the proxy's admin server, `/tasks`, that returns a dump of all tasks currently spawned on the Tokio runtime, using the new Tracing instrumentation added in tokio-rs/tokio#2655, and a work-in-progress [`tokio-trace`] crate that provides Tokio-specific Tracing layers. Currently, the `/tasks` admin endpoint records the following information about each task: * Whether it is a normal, local, or blocking task (not relevant to us currently, since Linkerd does not use local or blocking tasks...but we might eventually!) * Whether the task is active (currently being polled) or idle (waiting to be polled) * The type of the future that was spawned * The Tracing span context from which the task was spawned * The total number of times the task has been polled * Timing statistics about the task, including: - The time in nanoseconds between when the task was spawned and when it was first polled (essentially, measuring the Tokio scheduler's latency) - The total time in nanoseconds the task has existed - The task's _busy time_ in nanoseconds (time it was actively being polled) - The tasks _idle time_ in nanoseconds (time it was _not_ being polled) In the future, Tokio will likely expose additional Tracing information, which we'll be able to collect as well. The task dump can be accessed either as an HTML table or as JSON. JSON is returned if the request has an `Accept: application/json` header, or whenever the path `/tasks.json` is requested; otherwise, the data is rendered as an HTML table. Like the `/proxy-log-level` endpoint, access is denied to requests coming from sources other than localhost, to help restrict access to authorized users (since a high volume of requests for task dumps could be used to starve the proxy). Example JSON output (in Firefox Dev Edition's extremely nice GUI JSON viewer): ![Screenshot_20200715_121938](https://user-images.githubusercontent.com/2796466/87598059-b9f68380-c6a7-11ea-8f21-842b57793baa.png) Zoomed in on the timing data for a single task: ![Screenshot_20200715_122047](https://user-images.githubusercontent.com/2796466/87598089-c4b11880-c6a7-11ea-93ac-895f7ecee0f0.png) And HTML: ![Screenshot_20200715_143155](https://user-images.githubusercontent.com/2796466/87598414-fe821f00-c6a7-11ea-93b8-d18e4837346c.png) Because the task data is generated from Tracing spans emitted by Tokio, the task spans must be enabled for it to be used. This can be done by setting a trace filter that enables the `trace` level for the target `tokio::task`, e.g.: ``` tokio::task=trace ``` or ``` tokio=trace ``` ## Notes * This branch depends on unreleased code from upstream, including a Tokio change that has merged to master but not been published, and my unreleased work-in-progress [`tokio-trace`] crate. Therefore, I've pinned these upstreams to fixed Git SHAs, to guard against dependencies changing under us unexpectedly. * I considered requiring a build-time feature flag to enable this feature, the way we do for the mock SO_ORIG_DST implementation for testing. However, this would make it harder to use task tracking to debug issues in proxies not built with the flag. I'm happy to change this code to be feature flagged if we think that's the right approach. [`tokio-trace`]: https://github.com/hawkw/tokio-trace Closes linkerd/linkerd2#3803 Signed-off-by: Eliza Weisman <[email protected]>
1 parent c234a92 commit d5fe0e6

File tree

12 files changed

+312
-102
lines changed

12 files changed

+312
-102
lines changed

Cargo.lock

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ dependencies = [
2626

2727
[[package]]
2828
name = "aho-corasick"
29-
version = "0.6.4"
29+
version = "0.7.13"
3030
source = "registry+https://github.com/rust-lang/crates.io-index"
31-
checksum = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
31+
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
3232
dependencies = [
3333
"memchr 2.3.3",
3434
]
@@ -541,6 +541,15 @@ dependencies = [
541541
"winapi 0.3.8",
542542
]
543543

544+
[[package]]
545+
name = "html-escape"
546+
version = "0.2.5"
547+
source = "registry+https://github.com/rust-lang/crates.io-index"
548+
checksum = "7c185ca7c57375b4d21eb5a1343a152099b9ea3e7e776971fce80f8d907a7ffb"
549+
dependencies = [
550+
"utf8-width",
551+
]
552+
544553
[[package]]
545554
name = "http"
546555
version = "0.1.21"
@@ -752,6 +761,7 @@ dependencies = [
752761
"bytes 0.5.4",
753762
"futures 0.3.5",
754763
"h2 0.2.6",
764+
"html-escape",
755765
"http 0.2.1",
756766
"http-body",
757767
"hyper",
@@ -767,7 +777,7 @@ dependencies = [
767777
"linkerd2-proxy-api",
768778
"net2",
769779
"quickcheck",
770-
"regex 1.0.0",
780+
"regex 1.2.1",
771781
"ring",
772782
"rustls",
773783
"tokio",
@@ -789,6 +799,7 @@ dependencies = [
789799
"async-trait",
790800
"bytes 0.5.4",
791801
"futures 0.3.5",
802+
"html-escape",
792803
"http 0.2.1",
793804
"http-body",
794805
"hyper",
@@ -838,10 +849,12 @@ dependencies = [
838849
"prost-types",
839850
"quickcheck",
840851
"rand 0.7.2",
841-
"regex 1.0.0",
852+
"regex 1.2.1",
853+
"serde_json",
842854
"tokio",
843855
"tokio-test",
844856
"tokio-timer",
857+
"tokio-trace",
845858
"tonic",
846859
"tower",
847860
"tower-request-modifier",
@@ -1456,7 +1469,7 @@ dependencies = [
14561469
"prost-types",
14571470
"quickcheck",
14581471
"rand 0.7.2",
1459-
"regex 1.0.0",
1472+
"regex 1.2.1",
14601473
"tokio",
14611474
"tonic",
14621475
"tower",
@@ -2155,15 +2168,14 @@ dependencies = [
21552168

21562169
[[package]]
21572170
name = "regex"
2158-
version = "1.0.0"
2171+
version = "1.2.1"
21592172
source = "registry+https://github.com/rust-lang/crates.io-index"
2160-
checksum = "75ecf88252dce580404a22444fc7d626c01815debba56a7f4f536772a5ff19d3"
2173+
checksum = "88c3d9193984285d544df4a30c23a4e62ead42edf70a4452ceb76dac1ce05c26"
21612174
dependencies = [
2162-
"aho-corasick 0.6.4",
2175+
"aho-corasick 0.7.13",
21632176
"memchr 2.3.3",
21642177
"regex-syntax 0.6.11",
2165-
"thread_local 0.3.5",
2166-
"utf8-ranges 1.0.0",
2178+
"thread_local 0.3.6",
21672179
]
21682180

21692181
[[package]]
@@ -2457,12 +2469,11 @@ dependencies = [
24572469

24582470
[[package]]
24592471
name = "thread_local"
2460-
version = "0.3.5"
2472+
version = "0.3.6"
24612473
source = "registry+https://github.com/rust-lang/crates.io-index"
2462-
checksum = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
2474+
checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
24632475
dependencies = [
24642476
"lazy_static",
2465-
"unreachable",
24662477
]
24672478

24682479
[[package]]
@@ -2478,9 +2489,9 @@ dependencies = [
24782489

24792490
[[package]]
24802491
name = "tokio"
2481-
version = "0.2.21"
2492+
version = "0.2.22"
24822493
source = "registry+https://github.com/rust-lang/crates.io-index"
2483-
checksum = "d099fa27b9702bed751524694adbe393e18b36b204da91eb1cbbbbb4a5ee2d58"
2494+
checksum = "5d34ca54d84bf2b5b4d7d31e901a8464f7b60ac145a284fba25ceb801f2ddccd"
24842495
dependencies = [
24852496
"bytes 0.5.4",
24862497
"fnv",
@@ -2497,6 +2508,7 @@ dependencies = [
24972508
"signal-hook-registry",
24982509
"slab",
24992510
"tokio-macros",
2511+
"tracing",
25002512
"winapi 0.3.8",
25012513
]
25022514

@@ -2586,6 +2598,18 @@ dependencies = [
25862598
"tokio-executor",
25872599
]
25882600

2601+
[[package]]
2602+
name = "tokio-trace"
2603+
version = "0.1.0"
2604+
source = "git+https://github.com/hawkw/tokio-trace?rev=a8240c5cbb4ff981def84920d4087ef23b5edb93#a8240c5cbb4ff981def84920d4087ef23b5edb93"
2605+
dependencies = [
2606+
"num_cpus",
2607+
"serde",
2608+
"tokio",
2609+
"tracing-core",
2610+
"tracing-subscriber",
2611+
]
2612+
25892613
[[package]]
25902614
name = "tokio-util"
25912615
version = "0.3.1"
@@ -2706,9 +2730,9 @@ dependencies = [
27062730

27072731
[[package]]
27082732
name = "tracing"
2709-
version = "0.1.15"
2733+
version = "0.1.16"
27102734
source = "registry+https://github.com/rust-lang/crates.io-index"
2711-
checksum = "a41f40ed0e162c911ac6fcb53ecdc8134c46905fdbbae8c50add462a538b495f"
2735+
checksum = "c2e2a2de6b0d5cbb13fc21193a2296888eaab62b6044479aafb3c54c01c29fcd"
27122736
dependencies = [
27132737
"cfg-if",
27142738
"log",
@@ -2718,9 +2742,9 @@ dependencies = [
27182742

27192743
[[package]]
27202744
name = "tracing-attributes"
2721-
version = "0.1.8"
2745+
version = "0.1.9"
27222746
source = "registry+https://github.com/rust-lang/crates.io-index"
2723-
checksum = "99bbad0de3fd923c9c3232ead88510b783e5a4d16a6154adffa3d53308de984c"
2747+
checksum = "f0693bf8d6f2bf22c690fc61a9d21ac69efdbb894a17ed596b9af0f01e64b84b"
27242748
dependencies = [
27252749
"proc-macro2 1.0.10",
27262750
"quote 1.0.2",
@@ -2729,9 +2753,9 @@ dependencies = [
27292753

27302754
[[package]]
27312755
name = "tracing-core"
2732-
version = "0.1.10"
2756+
version = "0.1.11"
27332757
source = "registry+https://github.com/rust-lang/crates.io-index"
2734-
checksum = "0aa83a9a47081cd522c09c81b31aec2c9273424976f922ad61c053b58350b715"
2758+
checksum = "94ae75f0d28ae10786f3b1895c55fe72e79928fd5ccdebb5438c75e93fec178f"
27352759
dependencies = [
27362760
"lazy_static",
27372761
]
@@ -2778,7 +2802,7 @@ dependencies = [
27782802
"lazy_static",
27792803
"matchers",
27802804
"parking_lot",
2781-
"regex 1.0.0",
2805+
"regex 1.2.1",
27822806
"serde",
27832807
"serde_json",
27842808
"sharded-slab",
@@ -2867,15 +2891,6 @@ version = "0.2.0"
28672891
source = "registry+https://github.com/rust-lang/crates.io-index"
28682892
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
28692893

2870-
[[package]]
2871-
name = "unreachable"
2872-
version = "1.0.0"
2873-
source = "registry+https://github.com/rust-lang/crates.io-index"
2874-
checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
2875-
dependencies = [
2876-
"void",
2877-
]
2878-
28792894
[[package]]
28802895
name = "untrusted"
28812896
version = "0.7.0"
@@ -2906,16 +2921,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
29062921
checksum = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
29072922

29082923
[[package]]
2909-
name = "version_check"
2910-
version = "0.1.5"
2924+
name = "utf8-width"
2925+
version = "0.1.3"
29112926
source = "registry+https://github.com/rust-lang/crates.io-index"
2912-
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
2927+
checksum = "6f2c54fe5e8d6907c60dc6fba532cc8529245d97ff4e26cb490cb462de114ba4"
29132928

29142929
[[package]]
2915-
name = "void"
2916-
version = "1.0.2"
2930+
name = "version_check"
2931+
version = "0.1.5"
29172932
source = "registry+https://github.com/rust-lang/crates.io-index"
2918-
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
2933+
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
29192934

29202935
[[package]]
29212936
name = "want"

Dockerfile

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,21 @@ ARG PROXY_UNOPTIMIZED
3737
ARG PROXY_FEATURES
3838

3939
RUN --mount=type=cache,target=/var/lib/apt/lists \
40-
--mount=type=cache,target=/var/tmp \
41-
apt update && apt install -y time cmake
40+
--mount=type=cache,target=/var/tmp \
41+
apt update && apt install -y time cmake
4242

4343
WORKDIR /usr/src/linkerd2-proxy
4444
COPY . .
4545
RUN --mount=type=cache,target=target \
46-
--mount=type=cache,from=rust:1.44.1-buster,source=/usr/local/cargo,target=/usr/local/cargo \
47-
mkdir -p /out && \
48-
if [ -n "$PROXY_UNOPTIMIZED" ]; then \
49-
(cd linkerd2-proxy && /usr/bin/time -v cargo build --locked --features="$PROXY_FEATURES") && \
50-
mv target/debug/linkerd2-proxy /out/linkerd2-proxy ; \
51-
else \
52-
(cd linkerd2-proxy && /usr/bin/time -v cargo build --locked --release --features="$PROXY_FEATURES") && \
53-
mv target/release/linkerd2-proxy /out/linkerd2-proxy ; \
54-
fi
46+
--mount=type=cache,from=rust:1.44.1-buster,source=/usr/local/cargo,target=/usr/local/cargo \
47+
mkdir -p /out && \
48+
if [ -n "$PROXY_UNOPTIMIZED" ]; then \
49+
(cd linkerd2-proxy && /usr/bin/time -v cargo build --locked --features="$PROXY_FEATURES") && \
50+
mv target/debug/linkerd2-proxy /out/linkerd2-proxy ; \
51+
else \
52+
(cd linkerd2-proxy && /usr/bin/time -v cargo build --locked --release --features="$PROXY_FEATURES") && \
53+
mv target/release/linkerd2-proxy /out/linkerd2-proxy ; \
54+
fi
5555

5656
## Install the proxy binary into the base runtime image.
5757
FROM $RUNTIME_IMAGE as runtime

linkerd/app/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ tonic = { version = "0.2", default-features = false, features = ["prost"] }
3030
tower = "0.3"
3131
tracing = "0.1.9"
3232
tracing-futures = { version = "0.2", features = ["std-future"]}
33+
html-escape = "0.2.5"
3334

3435
[dev-dependencies]
3536
bytes = "0.5"

linkerd/app/core/Cargo.toml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ linkerd2-stack-tracing = { path = "../../stack/tracing" }
6363
linkerd2-trace-context = { path = "../../trace-context" }
6464
rand = { version = "0.7", features = ["small_rng"] }
6565
regex = "1.0.0"
66-
tokio = { version = "0.2", features = ["macros", "sync", "parking_lot"]}
66+
tokio = { version = "0.2.22", features = ["macros", "sync", "parking_lot"]}
6767
tokio-timer = "0.2"
6868
tower-request-modifier = { git = "https://github.com/tower-rs/tower-http" }
6969
tonic = { version = "0.2", default-features = false, features = ["prost"] }
@@ -72,11 +72,16 @@ tracing-futures = { version = "0.2", features = ["std-future"] }
7272
tracing-log = "0.1"
7373
pin-project = "0.4"
7474

75+
# task tracking
76+
html-escape = "0.2.5"
77+
tokio-trace = { git = "https://github.com/hawkw/tokio-trace", rev = "a8240c5cbb4ff981def84920d4087ef23b5edb93", features = ["serde"] }
78+
serde_json = "1"
79+
7580
[dependencies.tracing-subscriber]
7681
version = "0.2.8"
77-
# we don't need `chrono` time formatting
82+
# we don't need `chrono` time formatting or ANSI colored output
7883
default-features = false
79-
features = ["env-filter", "fmt", "smallvec", "tracing-log", "ansi", "json", "parking_lot"]
84+
features = ["env-filter", "fmt", "smallvec", "tracing-log", "json", "parking_lot"]
8085

8186
[dependencies.tower]
8287
version = "0.3"

linkerd/app/core/src/admin/mod.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! * `/metrics` -- reports prometheus-formatted metrics.
44
//! * `/ready` -- returns 200 when the proxy is ready to participate in meshed traffic.
55
6-
use crate::{svc, transport::tls::accept::Connection};
6+
use crate::{svc, trace, transport::tls::accept::Connection};
77
use futures::{future, TryFutureExt};
88
use http::StatusCode;
99
use hyper::{Body, Request, Response};
@@ -16,15 +16,17 @@ use std::task::{Context, Poll};
1616
use tower::{service_fn, Service};
1717

1818
mod readiness;
19+
mod tasks;
1920
mod trace_level;
2021

2122
pub use self::readiness::{Latch, Readiness};
22-
use self::trace_level::TraceLevel;
23+
use self::{tasks::Tasks, trace_level::TraceLevel};
2324

2425
#[derive(Debug, Clone)]
2526
pub struct Admin<M: FmtMetrics> {
2627
metrics: metrics::Serve<M>,
2728
trace_level: TraceLevel,
29+
tasks: Tasks,
2830
ready: Readiness,
2931
}
3032

@@ -38,10 +40,18 @@ pub type ResponseFuture =
3840
Pin<Box<dyn Future<Output = Result<Response<Body>, io::Error>> + Send + 'static>>;
3941

4042
impl<M: FmtMetrics> Admin<M> {
41-
pub fn new(m: M, ready: Readiness, trace_level: TraceLevel) -> Self {
43+
pub fn new(
44+
m: M,
45+
ready: Readiness,
46+
trace::Handle {
47+
level: trace_level,
48+
tasks,
49+
}: trace::Handle,
50+
) -> Self {
4251
Self {
4352
metrics: metrics::Serve::new(m),
4453
trace_level,
54+
tasks: tasks.into(),
4555
ready,
4656
}
4757
}
@@ -87,6 +97,7 @@ impl<M: FmtMetrics> Service<Request<Body>> for Admin<M> {
8797
"/proxy-log-level" => self.trace_level.call(req),
8898
"/ready" => Box::pin(future::ok(self.ready_rsp())),
8999
"/live" => Box::pin(future::ok(self.live_rsp())),
100+
path if path.starts_with("/tasks") => Box::pin(self.tasks.call(req)),
90101
_ => Box::pin(future::ok(rsp(StatusCode::NOT_FOUND, Body::empty()))),
91102
}
92103
}
@@ -130,6 +141,25 @@ fn rsp(status: StatusCode, body: impl Into<Body>) -> Response<Body> {
130141
.expect("builder with known status code must not fail")
131142
}
132143

144+
fn check_loopback<B>(req: &Request<B>) -> Result<(), Response<Body>> {
145+
if let Some(addr) = req.extensions().get::<ClientAddr>() {
146+
let addr = addr.addr();
147+
if addr.ip().is_loopback() {
148+
return Ok(());
149+
}
150+
tracing::warn!(%addr, "denying request from non-loopback IP");
151+
Err(rsp(
152+
StatusCode::FORBIDDEN,
153+
"access to /proxy-log-level and /trace only allowed from loopback interface",
154+
))
155+
} else {
156+
// TODO: should we panic if this was unset? It's a bug, but should
157+
// it crash the proxy?
158+
tracing::error!("ClientAddr extension should always be set");
159+
Err(rsp(StatusCode::INTERNAL_SERVER_ERROR, Body::empty()))
160+
}
161+
}
162+
133163
#[cfg(test)]
134164
mod tests {
135165
use super::*;
@@ -144,7 +174,7 @@ mod tests {
144174
let (r, l0) = Readiness::new();
145175
let l1 = l0.clone();
146176

147-
let mut srv = Admin::new((), r, TraceLevel::dangling());
177+
let mut srv = Admin::new((), r, trace::Handle::dangling());
148178
macro_rules! call {
149179
() => {{
150180
let r = Request::builder()

0 commit comments

Comments
 (0)