Skip to content

Commit a7c0acd

Browse files
ct2034RichardvdK
andauthored
Port cpu_monitor to ROS2 (#326) (#385)
* add cpu monitor to the common diagnostics * add 3 unittests * add psutil to package xml * add debug print for cpu percentages and change checking percentage to -1 * update code with review comments. Added a SetUp() method in the test and move the sleep to there to make sure this is done every test in a systemactic way --------- Co-authored-by: Richardvdketterij <[email protected]> (cherry picked from commit f89beba) Co-authored-by: Richard <[email protected]>
1 parent de345ff commit a7c0acd

File tree

6 files changed

+271
-10
lines changed

6 files changed

+271
-10
lines changed

diagnostic_common_diagnostics/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ find_package(ament_cmake_python REQUIRED)
88
ament_python_install_package(${PROJECT_NAME})
99

1010
install(PROGRAMS
11+
${PROJECT_NAME}/cpu_monitor.py
1112
${PROJECT_NAME}/ntp_monitor.py
1213
${PROJECT_NAME}/ram_monitor.py
1314
${PROJECT_NAME}/sensors_monitor.py
@@ -19,10 +20,14 @@ if(BUILD_TESTING)
1920
ament_lint_auto_find_test_dependencies()
2021

2122
find_package(ament_cmake_pytest REQUIRED)
23+
ament_add_pytest_test(
24+
test_cpu_monitor
25+
test/systemtest/test_cpu_monitor.py
26+
TIMEOUT 10)
2227
ament_add_pytest_test(
2328
test_ntp_monitor
2429
test/systemtest/test_ntp_monitor.py
2530
TIMEOUT 10)
2631
endif()
2732

28-
ament_package()
33+
ament_package()

diagnostic_common_diagnostics/README.md

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,31 @@ Currently only the NTP monitor is ported to ROS2.
77

88
# Nodes
99

10+
## cpu_monitor.py
11+
The `cpu_monitor` module allows users to monitor the CPU usage of their system in real-time.
12+
It publishes the usage percentage in a diagnostic message.
13+
14+
* Name of the node is "cpu_monitor_" + hostname.
15+
* Uses the following args:
16+
* warning_percentage: If the CPU usage is > warning_percentage, a WARN status will be publised.
17+
* window: the maximum length of the used collections.deque for queuing CPU readings.
18+
19+
### Published Topics
20+
#### /diagnostics
21+
diagnostic_msgs/DiagnosticArray
22+
The diagnostics information.
23+
24+
### Parameters
25+
#### warning_percentage
26+
(default: 90)
27+
warning percentage threshold.
28+
29+
#### window
30+
(default: 1)
31+
Length of CPU readings queue.
32+
1033
## ntp_monitor.py
11-
Runs 'ntpdate' to check if the system clock is synchronized with the NTP server.
34+
Runs 'ntpdate' to check if the system clock is synchronized with the NTP server.
1235
* If the offset is smaller than `offset-tolerance`, an `OK` status will be published.
1336
* If the offset is larger than the configured `offset-tolerance`, a `WARN` status will be published,
1437
* if it is bigger than `error-offset-tolerance`, an `ERROR` status will be published.
@@ -20,7 +43,7 @@ diagnostic_msgs/DiagnosticArray
2043
The diagnostics information.
2144

2245
### Parameters
23-
#### ntp_hostname
46+
#### ntp_hostname
2447
(default: "pool.ntp.org")
2548
Hostname of NTP server.
2649

@@ -46,9 +69,6 @@ Disable self test.
4669
## hd_monitor.py
4770
**To be ported**
4871

49-
## cpu_monitor.py
50-
**To be ported**
51-
5272
## ram_monitor.py
5373
The `ram_monitor` module allows users to monitor the RAM usage of their system in real-time.
5474
It publishes the usage percentage in a diagnostic message.
@@ -89,4 +109,4 @@ The diagnostics information.
89109
Whether to ignore the fan speed.
90110

91111
## tf_monitor.py
92-
**To be ported**
112+
**To be ported**
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Software License Agreement (BSD License)
5+
#
6+
# Copyright (c) 2017, TNO IVS, Helmond, Netherlands
7+
# All rights reserved.
8+
#
9+
# Redistribution and use in source and binary forms, with or without
10+
# modification, are permitted provided that the following conditions
11+
# are met:
12+
#
13+
# * Redistributions of source code must retain the above copyright
14+
# notice, this list of conditions and the following disclaimer.
15+
# * Redistributions in binary form must reproduce the above
16+
# copyright notice, this list of conditions and the following
17+
# disclaimer in the documentation and/or other materials provided
18+
# with the distribution.
19+
# * Neither the name of the TNO IVS nor the names of its
20+
# contributors may be used to endorse or promote products derived
21+
# from this software without specific prior written permission.
22+
#
23+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26+
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34+
# POSSIBILITY OF SUCH DAMAGE.
35+
36+
# \author Rein Appeldoorn
37+
38+
from collections import deque
39+
import socket
40+
import traceback
41+
42+
from diagnostic_msgs.msg import DiagnosticStatus
43+
44+
from diagnostic_updater import DiagnosticTask, Updater
45+
46+
import psutil
47+
48+
import rclpy
49+
from rclpy.node import Node
50+
51+
52+
class CpuTask(DiagnosticTask):
53+
54+
def __init__(self, warning_percentage=90, window=1):
55+
DiagnosticTask.__init__(self, 'CPU Information')
56+
57+
self._warning_percentage = int(warning_percentage)
58+
self._readings = deque(maxlen=window)
59+
60+
def _get_average_reading(self):
61+
def avg(lst):
62+
return float(sum(lst)) / len(lst) if lst else float('nan')
63+
64+
return [avg(cpu_percentages)
65+
for cpu_percentages in zip(*self._readings)]
66+
67+
def run(self, stat):
68+
self._readings.append(psutil.cpu_percent(percpu=True))
69+
cpu_percentages = self._get_average_reading()
70+
cpu_average = sum(cpu_percentages) / len(cpu_percentages)
71+
72+
stat.add('CPU Load Average', f'{cpu_average:.2f}')
73+
74+
warn = False
75+
for idx, cpu_percentage in enumerate(cpu_percentages):
76+
stat.add(f'CPU {idx} Load', f'{cpu_percentage:.2f}')
77+
if cpu_percentage > self._warning_percentage:
78+
warn = True
79+
80+
if warn:
81+
stat.summary(DiagnosticStatus.WARN,
82+
f'At least one CPU exceeds {self._warning_percentage} percent')
83+
else:
84+
stat.summary(DiagnosticStatus.OK,
85+
f'CPU Average {cpu_average:.2f} percent')
86+
87+
return stat
88+
89+
90+
def main(args=None):
91+
rclpy.init(args=args)
92+
93+
# Create the node
94+
hostname = socket.gethostname()
95+
node = Node(f'cpu_monitor_{hostname.replace("-", "_")}')
96+
97+
# Declare and get parameters
98+
node.declare_parameter('warning_percentage', 90)
99+
node.declare_parameter('window', 1)
100+
101+
warning_percentage = node.get_parameter(
102+
'warning_percentage').get_parameter_value().integer_value
103+
window = node.get_parameter('window').get_parameter_value().integer_value
104+
105+
# Create diagnostic updater with default updater rate of 1 hz
106+
updater = Updater(node)
107+
updater.setHardwareID(hostname)
108+
updater.add(CpuTask(warning_percentage=warning_percentage, window=window))
109+
110+
rclpy.spin(node)
111+
112+
113+
if __name__ == '__main__':
114+
try:
115+
main()
116+
except KeyboardInterrupt:
117+
pass
118+
except Exception:
119+
traceback.print_exc()

diagnostic_common_diagnostics/mainpage.dox

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
\b diagnostic_common_diagnostics contains a few common diagnostic nodes
66

7+
- cpu_monitor publishes diagnostic messages with the CPU usage of the system.
78
- ntp_monitor publishes diagnostic messages for how well the NTP time sync is working.
89
- tf_monitor used to publish diagnostic messages reporting on the health of
910
the TF tree. It is based on tfwtf. It is not ported to ROS2.

diagnostic_common_diagnostics/package.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@
1818
<buildtool_depend>ament_cmake</buildtool_depend>
1919
<buildtool_depend>ament_cmake_python</buildtool_depend>
2020

21-
<exec_depend>rclpy</exec_depend>
2221
<exec_depend>diagnostic_updater</exec_depend>
2322
<exec_depend>lm-sensors</exec_depend>
2423
<exec_depend>python3-ntplib</exec_depend>
24+
<exec_depend>python3-psutil</exec_depend>
2525

2626
<test_depend>ament_lint_auto</test_depend>
2727
<!-- Usage of ament_lint_common is locked by https://github.com/ament/ament_lint/issues/423
2828
For now, enable the linters that do support exlusions.
29-
Once all files are migrated to ROS2, all linters can be
29+
Once all files are migrated to ROS2, all linters can be
3030
enabled vi ament_lint_common as the files are cleaned up. -->
3131
<!-- <test_depend>ament_cmake_flake8</test_depend> -->
3232
<!-- <test_depend>ament_cmake_pep257</test_depend> -->
3333
<test_depend>ament_cmake_xmllint</test_depend>
3434
<test_depend>ament_cmake_lint_cmake</test_depend>
35-
35+
3636
<test_depend>ament_cmake_pytest</test_depend>
3737

3838
<export>
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# -*- coding: utf-8 -*-
2+
# Software License Agreement (BSD License)
3+
#
4+
# Copyright (c) 2023, Robert Bosch GmbH
5+
# All rights reserved.
6+
#
7+
# Redistribution and use in source and binary forms, with or without
8+
# modification, are permitted provided that the following conditions
9+
# are met:
10+
#
11+
# * Redistributions of source code must retain the above copyright
12+
# notice, this list of conditions and the following disclaimer.
13+
# * Redistributions in binary form must reproduce the above
14+
# copyright notice, this list of conditions and the following
15+
# disclaimer in the documentation and/or other materials provided
16+
# with the distribution.
17+
# * Neither the name of the Willow Garage nor the names of its
18+
# contributors may be used to endorse or promote products derived
19+
# from this software without specific prior written permission.
20+
#
21+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24+
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32+
# POSSIBILITY OF SUCH DAMAGE.
33+
34+
import time
35+
import unittest
36+
37+
from diagnostic_common_diagnostics.cpu_monitor import CpuTask
38+
39+
from diagnostic_msgs.msg import DiagnosticStatus
40+
41+
from diagnostic_updater import DiagnosticArray, Updater
42+
from diagnostic_updater import DiagnosticStatusWrapper
43+
44+
import rclpy
45+
from rclpy.node import Node
46+
47+
48+
class TestCPUMonitor(unittest.TestCase):
49+
50+
@classmethod
51+
def setUpClass(cls):
52+
rclpy.init(args=None)
53+
54+
@classmethod
55+
def tearDownClass(cls):
56+
if rclpy.ok():
57+
rclpy.shutdown()
58+
59+
def setUp(self):
60+
# In this case is recommended for accuracy that psutil.cpu_percent()
61+
# function be called with at least 0.1 seconds between calls.
62+
time.sleep(0.1)
63+
64+
def diagnostics_callback(self, msg):
65+
self.message_recieved = True
66+
self.assertEqual(len(msg.status), 1)
67+
68+
def test_ok(self):
69+
warning_percentage = 100
70+
task = CpuTask(warning_percentage)
71+
stat = DiagnosticStatusWrapper()
72+
task.run(stat)
73+
self.assertEqual(task.name, 'CPU Information')
74+
self.assertEqual(stat.level, DiagnosticStatus.OK)
75+
self.assertIn(str('CPU Average'), stat.message)
76+
77+
# Check for at least 1 CPU Load Average and 1 CPU Load
78+
self.assertGreaterEqual(len(stat.values), 2)
79+
80+
def test_warn(self):
81+
warning_percentage = -1
82+
task = CpuTask(warning_percentage)
83+
stat = DiagnosticStatusWrapper()
84+
task.run(stat)
85+
print(f'Raw readings: {task._readings}')
86+
self.assertEqual(task.name, 'CPU Information')
87+
self.assertEqual(stat.level, DiagnosticStatus.WARN)
88+
self.assertIn(str('At least one CPU exceeds'), stat.message)
89+
90+
# Check for at least 1 CPU Load Average and 1 CPU Load
91+
self.assertGreaterEqual(len(stat.values), 2)
92+
93+
def test_updater(self):
94+
self.message_recieved = False
95+
96+
node = Node('cpu_monitor_test')
97+
updater = Updater(node)
98+
updater.setHardwareID('test_id')
99+
updater.add(CpuTask())
100+
101+
node.create_subscription(
102+
DiagnosticArray, '/diagnostics', self.diagnostics_callback, 10)
103+
104+
start_time = time.time()
105+
timeout = 5.0 # Timeout in seconds
106+
107+
while not self.message_recieved:
108+
rclpy.spin_once(node)
109+
time.sleep(0.1)
110+
elapsed_time = time.time() - start_time
111+
if elapsed_time >= timeout:
112+
self.fail('No diagnostics received')
113+
114+
115+
if __name__ == '__main__':
116+
unittest.main()

0 commit comments

Comments
 (0)