Skip to content

Commit dfcea5b

Browse files
authored
Merge pull request #4036 from Hyaxia/metrics_terminal
Added metrics for currently running terminals and labeled by type kernels
2 parents fa7b40b + 4c1d62f commit dfcea5b

File tree

7 files changed

+69
-17
lines changed

7 files changed

+69
-17
lines changed

appveyor.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ install:
1717
- cmd: conda config --set show_channel_urls true
1818
- cmd: conda config --add channels conda-forge
1919
#- cmd: conda update --yes --quiet conda
20-
- cmd: conda install -y pyzmq tornado jupyter_client nbformat nbconvert ipykernel pip nodejs nose
20+
- cmd: conda install -y pyzmq tornado jupyter_client nbformat ipykernel pip nodejs nose
21+
# not using `conda install -y` on nbconvent package because there is
22+
# currently a bug with the version that the anaconda installs, so we will just install it with pip
23+
- cmd: pip install nbconvert
2124
- cmd: python setup.py build
2225
- cmd: pip install .[test]
2326

notebook/log.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
import json
99
from tornado.log import access_log
10-
from .metrics import prometheus_log_method
10+
from .prometheus.log_functions import prometheus_log_method
11+
1112

1213
def log_request(handler):
1314
"""log a bit more information about each request than tornado's default

notebook/prometheus/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
A package containing all the functionality and
3+
configuration connected to the prometheus metrics
4+
"""

notebook/metrics.py renamed to notebook/prometheus/log_functions.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,5 @@
1-
"""
2-
Prometheus metrics exported by Jupyter Notebook Server
1+
from ..prometheus.metrics import HTTP_REQUEST_DURATION_SECONDS
32

4-
Read https://prometheus.io/docs/practices/naming/ for naming
5-
conventions for metrics & labels.
6-
"""
7-
8-
from prometheus_client import Histogram
9-
10-
# This is a fairly standard name for HTTP duration latency reporting
11-
HTTP_REQUEST_DURATION_SECONDS = Histogram(
12-
'http_request_duration_seconds',
13-
'duration in seconds for all HTTP requests',
14-
['method', 'handler', 'status_code'],
15-
)
163

174
def prometheus_log_method(handler):
185
"""

notebook/prometheus/metrics.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""
2+
Prometheus metrics exported by Jupyter Notebook Server
3+
4+
Read https://prometheus.io/docs/practices/naming/ for naming
5+
conventions for metrics & labels.
6+
"""
7+
8+
9+
from prometheus_client import Histogram, Gauge
10+
11+
12+
HTTP_REQUEST_DURATION_SECONDS = Histogram(
13+
'http_request_duration_seconds',
14+
'duration in seconds for all HTTP requests',
15+
['method', 'handler', 'status_code'],
16+
)
17+
18+
TERMINAL_CURRENTLY_RUNNING_TOTAL = Gauge(
19+
'terminal_currently_running_total',
20+
'counter for how many terminals are running',
21+
)
22+
23+
KERNEL_CURRENTLY_RUNNING_TOTAL = Gauge(
24+
'kernel_currently_running_total',
25+
'counter for how many kernels are running labeled by type',
26+
['type']
27+
)

notebook/services/kernels/kernelmanager.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from notebook._tz import utcnow, isoformat
2727
from ipython_genutils.py3compat import getcwd
2828

29+
from notebook.prometheus.metrics import KERNEL_CURRENTLY_RUNNING_TOTAL
30+
2931

3032
class MappingKernelManager(MultiKernelManager):
3133
"""A KernelManager that handles notebook mapping and HTTP error handling"""
@@ -168,6 +170,13 @@ def start_kernel(self, kernel_id=None, path=None, **kwargs):
168170
lambda : self._handle_kernel_died(kernel_id),
169171
'dead',
170172
)
173+
174+
# Increase the metric of number of kernels running
175+
# for the relevant kernel type by 1
176+
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
177+
type=self._kernels[kernel_id].kernel_name
178+
).inc()
179+
171180
else:
172181
self._check_kernel_id(kernel_id)
173182
self.log.info("Using existing kernel: %s" % kernel_id)
@@ -278,6 +287,13 @@ def shutdown_kernel(self, kernel_id, now=False):
278287
self.stop_buffering(kernel_id)
279288
self._kernel_connections.pop(kernel_id, None)
280289
self.last_kernel_activity = utcnow()
290+
291+
# Decrease the metric of number of kernels
292+
# running for the relevant kernel type by 1
293+
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
294+
type=self._kernels[kernel_id].kernel_name
295+
).dec()
296+
281297
return super(MappingKernelManager, self).shutdown_kernel(kernel_id, now=now)
282298

283299
def restart_kernel(self, kernel_id):

notebook/terminal/api_handlers.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import json
22
from tornado import web, gen
33
from ..base.handlers import APIHandler
4-
from ..utils import url_path_join
4+
from ..prometheus.metrics import TERMINAL_CURRENTLY_RUNNING_TOTAL
5+
56

67
class TerminalRootHandler(APIHandler):
78
@web.authenticated
@@ -10,12 +11,20 @@ def get(self):
1011
terms = [{'name': name} for name in tm.terminals]
1112
self.finish(json.dumps(terms))
1213

14+
# Update the metric below to the length of the list 'terms'
15+
TERMINAL_CURRENTLY_RUNNING_TOTAL.set(
16+
len(terms)
17+
)
18+
1319
@web.authenticated
1420
def post(self):
1521
"""POST /terminals creates a new terminal and redirects to it"""
1622
name, _ = self.terminal_manager.new_named_terminal()
1723
self.finish(json.dumps({'name': name}))
1824

25+
# Increase the metric by one because a new terminal was created
26+
TERMINAL_CURRENTLY_RUNNING_TOTAL.inc()
27+
1928

2029
class TerminalHandler(APIHandler):
2130
SUPPORTED_METHODS = ('GET', 'DELETE')
@@ -36,5 +45,10 @@ def delete(self, name):
3645
yield tm.terminate(name, force=True)
3746
self.set_status(204)
3847
self.finish()
48+
49+
# Decrease the metric below by one
50+
# because a terminal has been shutdown
51+
TERMINAL_CURRENTLY_RUNNING_TOTAL.dec()
52+
3953
else:
4054
raise web.HTTPError(404, "Terminal not found: %r" % name)

0 commit comments

Comments
 (0)