Skip to content

Commit caacae0

Browse files
committed
Make metric definitions/handling more abstract to prepare for more highly extensible configuration capapble of using (almost) all psutil metrics.
1 parent 7427121 commit caacae0

File tree

3 files changed

+104
-54
lines changed

3 files changed

+104
-54
lines changed

CONTRIBUTING.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ We recommend using [pipenv](https://docs.pipenv.org/) to make development easier
3232
4. Do a dev install of nbresuse and its dependencies
3333

3434
```bash
35-
pip install --editable .
35+
pip install --editable .[resources]
3636
```
3737

38+
To test the behavior of NBResuse without `psutil` installed, run `pip install --editable .` instead.
39+
3840
5. Install and enable the nbextension for use with Jupyter Classic Notebook.
3941

4042
```bash

nbresuse/metrics.py

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import NamedTuple, Optional
1+
from typing import NamedTuple
2+
from typing import Optional
23

34
try:
45
import psutil
@@ -7,47 +8,79 @@
78

89

910
class MemoryMetrics(NamedTuple):
10-
current_memory: int
11-
max_memory: int
11+
rss: int
12+
virtual_memory: int
1213

1314

1415
class CPUMetrics(NamedTuple):
15-
cpu_max: float
16-
cpu_usage: float
16+
cpu_percent: float
17+
cpu_count: int
1718

1819

19-
def memory_metrics() -> Optional[MemoryMetrics]:
20-
if psutil:
21-
cur_process = psutil.Process()
22-
all_processes = [cur_process] + cur_process.children(recursive=True)
20+
def per_process_metric(metric_name, metric_kwargs={}, metric_attribute=None):
21+
if psutil is None:
22+
return None
23+
else:
24+
current_process = psutil.Process()
25+
all_processes = [current_process] + current_process.children(recursive=True)
2326

24-
rss = sum([p.memory_info().rss for p in all_processes])
25-
virtual_memory = psutil.virtual_memory().total
27+
def get_per_process_metric(
28+
process, metric_name, metric_kwargs, metric_attribute=None
29+
):
30+
try:
31+
metric_value = getattr(process, metric_name)(**metric_kwargs)
32+
if metric_attribute is not None:
33+
return getattr(metric_value, metric_attribute)
34+
return metric_value
35+
# Avoid littering logs with stack traces
36+
# complaining about dead processes
37+
except BaseException:
38+
return 0
2639

40+
per_process_metric_value = lambda process: get_per_process_metric(
41+
process, metric_name, metric_kwargs, metric_attribute
42+
)
43+
44+
return sum([per_process_metric_value(process) for process in all_processes])
45+
46+
47+
def system_metric(metric_name, metric_kwargs={}, metric_attribute=None):
48+
if psutil is None:
49+
return None
2750
else:
51+
metric_value = getattr(psutil, metric_name)(**metric_kwargs)
52+
if metric_attribute is not None:
53+
return getattr(metric_value, metric_attribute)
54+
return metric_attribute
55+
56+
57+
def memory_metrics() -> Optional[MemoryMetrics]:
58+
59+
rss = {"metric_name": "memory_info", "metric_attribute": "rss"}
60+
rss_value = per_process_metric(**rss)
61+
62+
virtual_memory = {"metric_name": "virtual_memory", "metric_attribute": "total"}
63+
virtual_memory_value = system_metric(**virtual_memory)
64+
65+
memory_metric_values = {"rss": rss_value, "virtual_memory": virtual_memory_value}
66+
67+
if any(value is None for value in memory_metric_values.values()):
2868
return None
2969

30-
return MemoryMetrics(rss, virtual_memory)
70+
return MemoryMetrics(**memory_metric_values)
3171

3272

3373
def cpu_metrics() -> Optional[CPUMetrics]:
34-
if psutil:
35-
cur_process = psutil.Process()
36-
all_processes = [cur_process] + cur_process.children(recursive=True)
3774

38-
cpu_count = psutil.cpu_count()
75+
cpu_percent = {"metric_name": "cpu_percent", "metric_kwargs": {"interval": 0.05}}
76+
cpu_percent_value = per_process_metric(**cpu_percent)
3977

40-
def get_cpu_percent(p):
41-
try:
42-
return p.cpu_percent(interval=0.05)
43-
# Avoid littering logs with stack traces complaining
44-
# about dead processes having no CPU usage
45-
except BaseException:
46-
return 0
78+
cpu_count = {"metric_name": "cpu_count"}
79+
cpu_count_value = system_metric(**cpu_count)
4780

48-
cpu_percent = sum([get_cpu_percent(p) for p in all_processes])
81+
cpu_metric_values = {"cpu_percent": cpu_percent_value, "cpu_count": cpu_count_value}
4982

50-
else:
83+
if any(value is None for value in cpu_metric_values.values()):
5184
return None
5285

53-
return CPUMetrics(cpu_count * 100.0, cpu_percent)
86+
return CPUMetrics(**cpu_metric_values)

nbresuse/prometheus.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
from typing import Optional
2+
13
from notebook.notebookapp import NotebookApp
24
from prometheus_client import Gauge
35
from tornado import gen
4-
from typing import Optional
56

67
from nbresuse.metrics import cpu_metrics
78
from nbresuse.metrics import CPUMetrics
@@ -14,45 +15,59 @@
1415
except ImportError:
1516
from .utils import Callable
1617

17-
TOTAL_MEMORY_USAGE = Gauge("total_memory_usage", "counter for total memory usage", [])
18-
19-
MAX_MEMORY_USAGE = Gauge("max_memory_usage", "counter for max memory usage", [])
20-
21-
TOTAL_CPU_USAGE = Gauge("total_cpu_usage", "counter for total cpu usage", [])
22-
23-
MAX_CPU_USAGE = Gauge("max_cpu_usage", "counter for max cpu usage", [])
24-
2518

2619
class PrometheusHandler(Callable):
2720
def __init__(self, nbapp: NotebookApp):
2821
super().__init__()
2922
self.config = nbapp.web_app.settings["nbresuse_display_config"]
3023
self.session_manager = nbapp.session_manager
3124

25+
self.TOTAL_MEMORY_USAGE = Gauge(
26+
"total_memory_usage", "counter for total memory usage", []
27+
)
28+
self.MAX_MEMORY_USAGE = Gauge(
29+
"max_memory_usage", "counter for max memory usage", []
30+
)
31+
32+
self.TOTAL_CPU_USAGE = Gauge(
33+
"total_cpu_usage", "counter for total cpu usage", []
34+
)
35+
self.MAX_CPU_USAGE = Gauge("max_cpu_usage", "counter for max cpu usage", [])
36+
3237
@gen.coroutine
3338
def __call__(self, *args, **kwargs):
34-
metrics = self.apply_memory_limits(memory_metrics())
35-
if metrics is not None:
36-
TOTAL_MEMORY_USAGE.set(metrics.current_memory)
37-
MAX_MEMORY_USAGE.set(metrics.max_memory)
39+
memory_metric_values = memory_metrics()
40+
if memory_metric_values is not None:
41+
self.TOTAL_MEMORY_USAGE.set(memory_metric_values.rss)
42+
self.MAX_MEMORY_USAGE.set(self.apply_memory_limit(memory_metric_values))
3843
if self.config.track_cpu_percent:
39-
metrics = self.apply_cpu_limits(cpu_metrics())
40-
if metrics is not None:
41-
TOTAL_CPU_USAGE.set(metrics.cpu_usage)
42-
MAX_CPU_USAGE.set(metrics.cpu_max)
44+
cpu_metric_values = cpu_metrics()
45+
if cpu_metric_values is not None:
46+
self.TOTAL_CPU_USAGE.set(cpu_metric_values.cpu_percent)
47+
self.MAX_CPU_USAGE.set(self.apply_cpu_limit(cpu_metric_values))
4348

44-
def apply_memory_limits(self, metrics: Optional[MemoryMetrics]) -> Optional[MemoryMetrics]:
45-
if metrics is not None:
49+
def apply_memory_limit(
50+
self, memory_metric_values: Optional[MemoryMetrics]
51+
) -> Optional[int]:
52+
if memory_metric_values is None:
53+
return None
54+
else:
4655
if callable(self.config.mem_limit):
47-
metrics.max_memory = self.config.mem_limit(rss=metrics.current_memory)
56+
return self.config.mem_limit(rss=memory_metric_values.rss)
4857
elif self.config.mem_limit > 0: # mem_limit is an Int
49-
metrics.max_memory = self.config.mem_limit
50-
return metrics
58+
return self.config.mem_limit
59+
else:
60+
return memory_metric_values.virtual_memory
5161

52-
def apply_cpu_limits(self, metrics: Optional[CPUMetrics]) -> Optional[CPUMetrics]:
53-
if metrics is not None:
62+
def apply_cpu_limit(
63+
self, cpu_metric_values: Optional[CPUMetrics]
64+
) -> Optional[float]:
65+
if cpu_metric_values is None:
66+
return None
67+
else:
5468
if callable(self.config.cpu_limit):
55-
metrics.cpu_max = self.config.cpu_limit(cpu_percent=metrics.cpu_usage)
69+
return self.config.cpu_limit(cpu_percent=cpu_metric_values.cpu_percent)
5670
elif self.config.cpu_limit > 0.0: # cpu_limit is a Float
57-
metrics.cpu_max = self.config.cpu_limit
58-
return metrics
71+
return self.config.cpu_limit
72+
else:
73+
return 100.0 * cpu_metric_values.cpu_count

0 commit comments

Comments
 (0)