Skip to content

Commit 821e71c

Browse files
committed
opentelemetry-instrumentation-system-metrics: add process metrics
Add process metrics as of 1.30.0 semconv to the system metrics instrumentation. We still keep around the old process.runtime metrics because the semconv suggest to not break current users. Still discourage their use in the doc and state explicitly they are deprecated.
1 parent d7bc137 commit 821e71c

File tree

2 files changed

+304
-35
lines changed

2 files changed

+304
-35
lines changed

instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py

Lines changed: 172 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,20 @@
3434
"system.network.io": ["transmit", "receive"],
3535
"system.network.connections": ["family", "type"],
3636
"system.thread_count": None
37+
"process.context_switches": ["involuntary", "voluntary"],
38+
"process.count": None,
39+
"process.cpu.time": ["user", "system"],
40+
"process.cpu.utilization": None,
41+
"process.memory.usage": None,
42+
"process.memory.virtual": None,
43+
"process.open_file_descriptor.count": None,
44+
"process.thread.count": None,
3745
"process.runtime.memory": ["rss", "vms"],
3846
"process.runtime.cpu.time": ["user", "system"],
3947
"process.runtime.gc_count": None,
4048
"process.runtime.thread_count": None,
4149
"process.runtime.cpu.utilization": None,
4250
"process.runtime.context_switches": ["involuntary", "voluntary"],
43-
"process.open_file_descriptor.count": None,
4451
}
4552
4653
Usage
@@ -66,12 +73,17 @@
6673
"system.memory.usage": ["used", "free", "cached"],
6774
"system.cpu.time": ["idle", "user", "system", "irq"],
6875
"system.network.io": ["transmit", "receive"],
69-
"process.runtime.memory": ["rss", "vms"],
70-
"process.runtime.cpu.time": ["user", "system"],
71-
"process.runtime.context_switches": ["involuntary", "voluntary"],
76+
"process.memory.usage": None,
77+
"process.memory.virtual": None,
78+
"process.cpu.time": ["user", "system"],
79+
"process.context_switches": ["involuntary", "voluntary"],
7280
}
7381
SystemMetricsInstrumentor(config=configuration).instrument()
7482
83+
84+
Out-of-spec `process.runtime` prefixed metrics are deprecated and will be remobed in future versions, users are encouraged to move
85+
to the `process` metrics.
86+
7587
API
7688
---
7789
"""
@@ -92,6 +104,9 @@
92104
from opentelemetry.instrumentation.system_metrics.package import _instruments
93105
from opentelemetry.instrumentation.system_metrics.version import __version__
94106
from opentelemetry.metrics import CallbackOptions, Observation, get_meter
107+
from opentelemetry.semconv._incubating.metrics.process_metrics import (
108+
create_process_cpu_utilization,
109+
)
95110

96111
_logger = logging.getLogger(__name__)
97112

@@ -112,13 +127,20 @@
112127
"system.network.io": ["transmit", "receive"],
113128
"system.network.connections": ["family", "type"],
114129
"system.thread_count": None,
130+
"process.context_switches": ["involuntary", "voluntary"],
131+
"process.count": None,
132+
"process.cpu.time": ["user", "system"],
133+
"process.cpu.utilization": ["user", "system"],
134+
"process.memory.usage": None,
135+
"process.memory.virtual": None,
136+
"process.open_file_descriptor.count": None,
137+
"process.thread.count": None,
115138
"process.runtime.memory": ["rss", "vms"],
116139
"process.runtime.cpu.time": ["user", "system"],
117140
"process.runtime.gc_count": None,
118141
"process.runtime.thread_count": None,
119142
"process.runtime.cpu.utilization": None,
120143
"process.runtime.context_switches": ["involuntary", "voluntary"],
121-
"process.open_file_descriptor.count": None,
122144
}
123145

124146
if sys.platform == "darwin":
@@ -165,13 +187,20 @@ def __init__(
165187

166188
self._system_thread_count_labels = self._labels.copy()
167189

190+
self._context_switches_labels = self._labels.copy()
191+
self._cpu_time_labels = self._labels.copy()
192+
self._cpu_utilization_labels = self._labels.copy()
193+
self._memory_usage_labels = self._labels.copy()
194+
self._memory_virtual_labels = self._labels.copy()
195+
self._open_file_descriptor_count_labels = self._labels.copy()
196+
self._thread_count_labels = self._labels.copy()
197+
168198
self._runtime_memory_labels = self._labels.copy()
169199
self._runtime_cpu_time_labels = self._labels.copy()
170200
self._runtime_gc_count_labels = self._labels.copy()
171201
self._runtime_thread_count_labels = self._labels.copy()
172202
self._runtime_cpu_utilization_labels = self._labels.copy()
173203
self._runtime_context_switches_labels = self._labels.copy()
174-
self._open_file_descriptor_count_labels = self._labels.copy()
175204

176205
def instrumentation_dependencies(self) -> Collection[str]:
177206
return _instruments
@@ -186,27 +215,30 @@ def _instrument(self, **kwargs: Any):
186215
schema_url="https://opentelemetry.io/schemas/1.11.0",
187216
)
188217

218+
# system metrics
219+
189220
if "system.cpu.time" in self._config:
190221
self._meter.create_observable_counter(
191222
name="system.cpu.time",
192223
callbacks=[self._get_system_cpu_time],
193-
description="System CPU time",
224+
description="Seconds each logical CPU spent on each mode",
194225
unit="s",
195226
)
196227

228+
# FIXME: double check this is divided by cpu core
197229
if "system.cpu.utilization" in self._config:
198230
self._meter.create_observable_gauge(
199231
name="system.cpu.utilization",
200232
callbacks=[self._get_system_cpu_utilization],
201-
description="System CPU utilization",
233+
description="Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs",
202234
unit="1",
203235
)
204236

205237
if "system.memory.usage" in self._config:
206238
self._meter.create_observable_gauge(
207239
name="system.memory.usage",
208240
callbacks=[self._get_system_memory_usage],
209-
description="System memory usage",
241+
description="Reports memory in use by state.",
210242
unit="By",
211243
)
212244

@@ -218,6 +250,7 @@ def _instrument(self, **kwargs: Any):
218250
unit="1",
219251
)
220252

253+
# FIXME: system.swap is gone in favour of system.paging
221254
if "system.swap.usage" in self._config:
222255
self._meter.create_observable_gauge(
223256
name="system.swap.usage",
@@ -269,6 +302,7 @@ def _instrument(self, **kwargs: Any):
269302
unit="operations",
270303
)
271304

305+
# FIXME: this has been replaced by system.disk.operation.time
272306
if "system.disk.time" in self._config:
273307
self._meter.create_observable_counter(
274308
name="system.disk.time",
@@ -299,6 +333,7 @@ def _instrument(self, **kwargs: Any):
299333
# TODO Filesystem information can be obtained with os.statvfs in Unix-like
300334
# OSs, how to do the same in Windows?
301335

336+
# FIXME: this is now just system.network.dropped
302337
if "system.network.dropped.packets" in self._config:
303338
self._meter.create_observable_counter(
304339
name="system.network.dropped_packets",
@@ -339,13 +374,72 @@ def _instrument(self, **kwargs: Any):
339374
unit="connections",
340375
)
341376

377+
# FIXME: this is gone
342378
if "system.thread_count" in self._config:
343379
self._meter.create_observable_gauge(
344380
name="system.thread_count",
345381
callbacks=[self._get_system_thread_count],
346382
description="System active threads count",
347383
)
348384

385+
# process metrics
386+
387+
if "process.cpu.time" in self._config:
388+
self._meter.create_observable_counter(
389+
name="process.cpu.time",
390+
callbacks=[self._get_cpu_time],
391+
description="Total CPU seconds broken down by different states.",
392+
unit="s",
393+
)
394+
395+
if "process.cpu.utilization" in self._config:
396+
create_process_cpu_utilization(
397+
self._meter, callbacks=[self._get_cpu_utilization]
398+
)
399+
400+
if "process.context_switches" in self._config:
401+
self._meter.create_observable_counter(
402+
name="process.context_switches",
403+
callbacks=[self._get_context_switches],
404+
description="Number of times the process has been context switched.",
405+
)
406+
407+
if "process.memory.usage" in self._config:
408+
self._meter.create_observable_up_down_counter(
409+
name="process.memory.usage",
410+
callbacks=[self._get_memory_usage],
411+
description="The amount of physical memory in use.",
412+
unit="By",
413+
)
414+
415+
if "process.memory.virtual" in self._config:
416+
self._meter.create_observable_up_down_counter(
417+
name="process.memory.virtual",
418+
callbacks=[self._get_memory_virtual],
419+
description="The amount of committed virtual memory.",
420+
unit="By",
421+
)
422+
423+
if (
424+
sys.platform != "win32"
425+
and "process.open_file_descriptor.count" in self._config
426+
):
427+
self._meter.create_observable_up_down_counter(
428+
name="process.open_file_descriptor.count",
429+
callbacks=[self._get_open_file_descriptors],
430+
description="Number of file descriptors in use by the process.",
431+
)
432+
433+
if "process.thread.count" in self._config:
434+
self._meter.create_observable_up_down_counter(
435+
name="process.thread.count",
436+
callbacks=[self._get_thread_count],
437+
description="Process threads count.",
438+
)
439+
440+
# FIXME: process.runtime keys are deprecated and will be removed in subsequent releases.
441+
# When removing them, remember to clean also the callbacks and labels
442+
349443
if "process.runtime.memory" in self._config:
350444
self._meter.create_observable_up_down_counter(
351445
name=f"process.runtime.{self._python_implementation}.memory",
@@ -398,16 +492,6 @@ def _instrument(self, **kwargs: Any):
398492
unit="switches",
399493
)
400494

401-
if (
402-
sys.platform != "win32"
403-
and "process.open_file_descriptor.count" in self._config
404-
):
405-
self._meter.create_observable_up_down_counter(
406-
name="process.open_file_descriptor.count",
407-
callbacks=[self._get_open_file_descriptors],
408-
description="Number of file descriptors in use by the process.",
409-
)
410-
411495
def _uninstrument(self, **kwargs: Any):
412496
pass
413497

@@ -685,6 +769,75 @@ def _get_system_thread_count(
685769
threading.active_count(), self._system_thread_count_labels
686770
)
687771

772+
# process callbacks
773+
774+
def _get_context_switches(
775+
self, options: CallbackOptions
776+
) -> Iterable[Observation]:
777+
"""Observer callback for context switches"""
778+
ctx_switches = self._proc.num_ctx_switches()
779+
for metric in self._config["process.context_switches"]:
780+
if hasattr(ctx_switches, metric):
781+
self._context_switches_labels["type"] = metric
782+
yield Observation(
783+
getattr(ctx_switches, metric),
784+
self._context_switches_labels.copy(),
785+
)
786+
787+
def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]:
788+
"""Observer callback for CPU time"""
789+
proc_cpu = self._proc.cpu_times()
790+
for metric in self._config["process.cpu.time"]:
791+
if hasattr(proc_cpu, metric):
792+
self._cpu_time_labels["type"] = metric
793+
yield Observation(
794+
getattr(proc_cpu, metric),
795+
self._cpu_time_labels.copy(),
796+
)
797+
798+
def _get_cpu_utilization(
799+
self, options: CallbackOptions
800+
) -> Iterable[Observation]:
801+
"""Observer callback for CPU utilization"""
802+
proc_cpu_percent = self._proc.cpu_percent()
803+
num_cpus = psutil.cpu_count()
804+
yield Observation(
805+
proc_cpu_percent / 100 / num_cpus,
806+
self._cpu_utilization_labels.copy(),
807+
)
808+
809+
def _get_memory_usage(
810+
self, options: CallbackOptions
811+
) -> Iterable[Observation]:
812+
"""Observer callback for memory usage"""
813+
proc_memory = self._proc.memory_info()
814+
if hasattr(proc_memory, "rss"):
815+
yield Observation(
816+
getattr(proc_memory, "rss"),
817+
self._memory_usage_labels.copy(),
818+
)
819+
820+
def _get_memory_virtual(
821+
self, options: CallbackOptions
822+
) -> Iterable[Observation]:
823+
"""Observer callback for memory virtual"""
824+
proc_memory = self._proc.memory_info()
825+
if hasattr(proc_memory, "vms"):
826+
yield Observation(
827+
getattr(proc_memory, "vms"),
828+
self._memory_virtual_labels.copy(),
829+
)
830+
831+
def _get_thread_count(
832+
self, options: CallbackOptions
833+
) -> Iterable[Observation]:
834+
"""Observer callback for active thread count"""
835+
yield Observation(
836+
self._proc.num_threads(), self._thread_count_labels.copy()
837+
)
838+
839+
# runtime callbacks
840+
688841
def _get_runtime_memory(
689842
self, options: CallbackOptions
690843
) -> Iterable[Observation]:

0 commit comments

Comments
 (0)