Skip to content

Commit 9508c5b

Browse files
committed
Add a traitlet to disable recording HTTP request metrics
Since this records a series of metrics for each HTTP handler class, this quickly leads to an explosion of cardinality and makes storing metrics quite difficult. For example, just accessing the metrics endpoint creates the following 17 metrics: ``` http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.005",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.01",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.025",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.05",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.075",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.1",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.25",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.5",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="0.75",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="1.0",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="2.5",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="5.0",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="7.5",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="10.0",method="GET",status_code="200"} 9.0 http_request_duration_seconds_bucket{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",le="+Inf",method="GET",status_code="200"} 9.0 http_request_duration_seconds_count{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",method="GET",status_code="200"} 9.0 http_request_duration_seconds_sum{handler="jupyter_server.base.handlers.PrometheusMetricsHandler",method="GET",status_code="200"} 0.009019851684570312 ``` This has what has stalled prior attempts at collecting metrics from jupyter_server usefully in multitenant deployments (see berkeley-dsep-infra/datahub#1977). This PR adds a traitlet that allows hub admins to turn these metrics off.
1 parent d3a6c60 commit 9508c5b

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

jupyter_server/log.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,16 @@ def _scrub_uri(uri: str) -> str:
4141
return uri
4242

4343

44-
def log_request(handler):
44+
def log_request(handler, record_prometheus_metrics=True):
4545
"""log a bit more information about each request than tornado's default
4646
4747
- move static file get success to debug-level (reduces noise)
4848
- get proxied IP instead of proxy IP
4949
- log referer for redirect and failed requests
5050
- log user-agent for failed requests
51+
52+
if record_prometheus_metrics is true, will record a histogram prometheus
53+
metric (http_request_duration_seconds) for each request handler
5154
"""
5255
status = handler.get_status()
5356
request = handler.request
@@ -97,4 +100,5 @@ def log_request(handler):
97100
headers[header] = request.headers[header]
98101
log_method(json.dumps(headers, indent=2))
99102
log_method(msg.format(**ns))
100-
prometheus_log_method(handler)
103+
if record_prometheus_metrics:
104+
prometheus_log_method(handler)

jupyter_server/serverapp.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Distributed under the terms of the Modified BSD License.
55
from __future__ import annotations
66

7+
from functools import partial
78
import datetime
89
import errno
910
import gettext
@@ -410,7 +411,7 @@ def init_settings(
410411

411412
settings = {
412413
# basics
413-
"log_function": log_request,
414+
"log_function": partial(log_request, record_prometheus_metrics=jupyter_app.record_http_request_metrics),
414415
"base_url": base_url,
415416
"default_url": default_url,
416417
"template_path": template_path,
@@ -1993,6 +1994,18 @@ def _default_terminals_enabled(self) -> bool:
19931994
config=True,
19941995
)
19951996

1997+
record_http_request_metrics = Bool(
1998+
True,
1999+
help="""
2000+
REcord http_request_duration_seconds metric in the metrics endpoint.
2001+
2002+
Since a histogram is exposed for each request handler, this can create a
2003+
*lot* of metrics, creating operational challenges for multitenant deployments.
2004+
2005+
Set to False to disable recording the http_request_duration_seconds metric.
2006+
"""
2007+
)
2008+
19962009
static_immutable_cache = List(
19972010
Unicode(),
19982011
help="""

0 commit comments

Comments
 (0)