Skip to content

Commit f8710be

Browse files
ct2034Timple
andauthored
Aggregator: publish diagnostics_toplevel_state immediately on every degradation (#324) (#357)
* Aggregator: publish diagnostics_toplevel_state immediately on every degradation * Update docs * Re-use the publishData function such that also the actual diagnostics are available * Expand test with more degradation cases (cherry picked from commit dbaec04) Co-authored-by: Tim Clephas <[email protected]>
1 parent c373e26 commit f8710be

File tree

2 files changed

+40
-33
lines changed

2 files changed

+40
-33
lines changed

diagnostic_aggregator/src/aggregator.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -151,28 +151,11 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg)
151151
checkTimestamp(diag_msg);
152152

153153
bool analyzed = false;
154+
bool immediate_report = false;
154155
{ // lock the whole loop to ensure nothing in the analyzer group changes during it.
155156
std::lock_guard<std::mutex> lock(mutex_);
156157
for (auto j = 0u; j < diag_msg->status.size(); ++j) {
157158
analyzed = false;
158-
159-
const bool top_level_state_transition_to_error =
160-
(last_top_level_state_ != DiagnosticStatus::ERROR) &&
161-
(diag_msg->status[j].level == DiagnosticStatus::ERROR);
162-
163-
if (critical_ && top_level_state_transition_to_error) {
164-
RCLCPP_DEBUG(
165-
logger_, "Received error message: %s, publishing error immediately",
166-
diag_msg->status[j].name.c_str());
167-
DiagnosticStatus diag_toplevel_state;
168-
diag_toplevel_state.name = "toplevel_state_critical";
169-
diag_toplevel_state.level = diag_msg->status[j].level;
170-
toplevel_state_pub_->publish(diag_toplevel_state);
171-
172-
// store the last published state
173-
last_top_level_state_ = diag_toplevel_state.level;
174-
}
175-
176159
auto item = std::make_shared<StatusItem>(&diag_msg->status[j]);
177160

178161
if (analyzer_group_->match(item->getName())) {
@@ -182,8 +165,17 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg)
182165
if (!analyzed) {
183166
other_analyzer_->analyze(item);
184167
}
168+
169+
// In case there is a degraded state, publish immediately
170+
if (critical_ && item->getLevel() > last_top_level_state_) {
171+
immediate_report = true;
172+
}
185173
}
186174
}
175+
176+
if (immediate_report) {
177+
publishData();
178+
}
187179
}
188180

189181
Aggregator::~Aggregator()

diagnostic_aggregator/test/test_critical_pub.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,41 +67,56 @@ def publish_message(self, level):
6767
rclpy.spin_once(self.node)
6868
return self.node.get_clock().now()
6969

70-
def test_critical_publisher(self):
70+
def critical_publisher_test(
71+
self, initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR
72+
):
7173
# Publish the ok message and wait till the toplevel state is received
72-
state = DiagnosticStatus.OK
73-
time_0 = self.publish_message(state)
74+
time_0 = self.publish_message(initial_state)
7475

75-
assert (self.received_state[0] == state), \
76+
assert (self.received_state[0] == initial_state), \
7677
('Received state is not the same as the sent state:'
77-
+ f"'{self.received_state[0]}' != '{state}'")
78+
+ f"'{self.received_state[0]}' != '{initial_state}'")
7879
self.received_state.clear()
7980

8081
# Publish the ok message and expect the toplevel state after 1 second period
81-
time_1 = self.publish_message(state)
82+
time_1 = self.publish_message(initial_state)
8283
assert (time_1 - time_0 > rclpy.duration.Duration(seconds=0.99)), \
8384
'OK message received too early'
84-
assert (self.received_state[0] == state), \
85+
assert (self.received_state[0] == initial_state), \
8586
('Received state is not the same as the sent state:'
86-
+ f"'{self.received_state[0]}' != '{state}'")
87+
+ f"'{self.received_state[0]}' != '{initial_state}'")
8788
self.received_state.clear()
8889

8990
# Publish the message and expect the critical error message immediately
90-
state = DiagnosticStatus.ERROR
91-
time_2 = self.publish_message(state)
91+
time_2 = self.publish_message(new_state)
9292

9393
assert (time_2 - time_1 < rclpy.duration.Duration(seconds=0.1)), \
9494
'Critical error message not received within 0.1 second'
95-
assert (self.received_state[0] == state), \
95+
assert (self.received_state[0] == new_state), \
9696
('Received state is not the same as the sent state:'
97-
+ f"'{self.received_state[0]}' != '{state}'")
97+
+ f"'{self.received_state[0]}' != '{new_state}'")
9898
self.received_state.clear()
9999

100100
# Next error message should be sent at standard 1 second rate
101-
time_3 = self.publish_message(state)
101+
time_3 = self.publish_message(new_state)
102102

103103
assert (time_3 - time_1 > rclpy.duration.Duration(seconds=0.99)), \
104104
'Periodic error message received too early'
105-
assert (self.received_state[0] == state), \
105+
assert (self.received_state[0] == new_state), \
106106
('Received state is not the same as the sent state:'
107-
+ f"'{self.received_state[0]}' != '{state}'")
107+
+ f"'{self.received_state[0]}' != '{new_state}'")
108+
109+
def test_critical_publisher_ok_error(self):
110+
self.critical_publisher_test(
111+
initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR
112+
)
113+
114+
def test_critical_publisher_ok_warn(self):
115+
self.critical_publisher_test(
116+
initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.WARN
117+
)
118+
119+
def test_critical_publisher_warn_error(self):
120+
self.critical_publisher_test(
121+
initial_state=DiagnosticStatus.WARN, new_state=DiagnosticStatus.ERROR
122+
)

0 commit comments

Comments
 (0)