Skip to content

Commit a01f0d5

Browse files
committed
Better messaging for alerts unavailability
1 parent f80df37 commit a01f0d5

File tree

3 files changed

+215
-178
lines changed

3 files changed

+215
-178
lines changed

src/gort/overwatcher/alerts.py

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class ActiveAlert(enum.Flag):
6767
CAMERA_TEMPERATURE = enum.auto()
6868
O2 = enum.auto()
6969
E_STOPS = enum.auto()
70-
ALERTS_UNAVAILABLE = enum.auto()
70+
ALERTS_DATA_UNAVAILABLE = enum.auto()
7171
DISCONNECTED = enum.auto()
7272
DOME_LOCKED = enum.auto()
7373
IDLE = enum.auto()
@@ -78,37 +78,75 @@ class ActiveAlert(enum.Flag):
7878
NO_CLOSE = DOOR | E_STOPS | ENGINEERING_OVERRIDE
7979

8080

81+
ACTIVE_ALARM_DESCRIPTIONS: dict[ActiveAlert, str] = {
82+
ActiveAlert.HUMIDITY: "Humidity is above the threshold",
83+
ActiveAlert.DEW_POINT: "Ambient temperature is below dew point temperature",
84+
ActiveAlert.WIND: "Wind speed is above the threshold",
85+
ActiveAlert.RAIN: "Rain detected",
86+
ActiveAlert.DOOR: "Enclosure door is open",
87+
ActiveAlert.CAMERA_TEMPERATURE: "Spec camera temperature is above the threshold",
88+
ActiveAlert.O2: "O2 levels are below the safe range",
89+
ActiveAlert.E_STOPS: "Emergency stops have been triggered",
90+
ActiveAlert.ALERTS_DATA_UNAVAILABLE: "Alerts data is unavailable",
91+
ActiveAlert.DISCONNECTED: "Connectivity lost",
92+
ActiveAlert.DOME_LOCKED: "Dome is locked",
93+
ActiveAlert.IDLE: "Overwatcher has been idle for too long",
94+
ActiveAlert.ENGINEERING_OVERRIDE: "Engineering mode is enabled",
95+
ActiveAlert.UNKNOWN: "Unknown alert",
96+
}
97+
98+
8199
class AlertsMonitorTask(OverwatcherModuleTask["AlertsOverwatcher"]):
82100
"""Monitors the alerts state."""
83101

84102
name = "alerts_monitor"
85103
keep_alive = True
86104
restart_on_error = True
87105

88-
INTERVAL: float = 20
106+
INTERVAL: float = 30
107+
N_FAILURES: int = 5
89108

90109
async def task(self):
91110
"""Updates the alerts data."""
92111

93112
n_failures: int = 0
113+
last_error: str = "Undefined error"
94114

95115
while True:
96116
try:
97117
await self.update_alerts()
118+
98119
except Exception as err:
99-
self.log.error(f"Failed to get alerts data: {decap(err)}")
120+
self.log.error(f"Failed retriving alerts data: {decap(err)}")
100121
n_failures += 1
122+
last_error = str(err)
123+
101124
else:
102125
self.module.last_updated = time()
103-
self.module.unavailable = False
126+
127+
# Send a resolution message if needed.
128+
if self.module.alerts_data_unavailable:
129+
await self.module.notify(
130+
"[RESOLVED]: Alerts data is now available.",
131+
level="critical",
132+
)
133+
134+
self.module.alerts_data_unavailable = False
135+
104136
n_failures = 0
137+
last_error = "Undefined error"
138+
105139
finally:
106-
if self.module.unavailable is False and n_failures >= 5:
140+
if (
141+
self.module.alerts_data_unavailable is False
142+
and n_failures >= self.N_FAILURES
143+
):
107144
await self.module.notify(
108-
"Alerts data is unavailable.",
145+
"Failed to retrieve alerts data multiple times: "
146+
f"{decap(last_error, add_period=True)}",
109147
level="critical",
110148
)
111-
self.module.unavailable = True
149+
self.module.alerts_data_unavailable = True
112150

113151
await asyncio.sleep(self.INTERVAL)
114152

@@ -118,12 +156,12 @@ async def update_alerts(self):
118156
data = await self.module.update_status()
119157

120158
if data is None:
121-
raise ValueError("No alerts data available.")
159+
raise ValueError("API /alerts response failed or returned no data.")
122160

123161
# For some very critical alerts, we require them to be not null (null here
124162
# means no data was available or the API failed getting the alert data).
125163
if data.rain is None or data.humidity_alert is None or data.wind_alert is None:
126-
raise ValueError("Incomplete alerts data.")
164+
raise ValueError("Incomplete weather data in API /alerts response.")
127165

128166

129167
class AlertsOverwatcher(OverwatcherModule):
@@ -141,7 +179,8 @@ def __init__(self, *args, **kwargs):
141179

142180
self.last_updated: float = 0.0
143181
self.idle_since: float = 0.0
144-
self.unavailable: bool = False
182+
183+
self.alerts_data_unavailable: bool = False
145184

146185
async def is_safe(self) -> tuple[bool, ActiveAlert]:
147186
"""Determines whether it is safe to open."""
@@ -159,17 +198,17 @@ async def is_safe(self) -> tuple[bool, ActiveAlert]:
159198
else:
160199
self.idle_since = 0
161200

162-
if self.unavailable:
201+
if self.alerts_data_unavailable:
163202
self.log.warning("Alerts data is unavailable.")
164-
active_alerts |= ActiveAlert.ALERTS_UNAVAILABLE
203+
active_alerts |= ActiveAlert.ALERTS_DATA_UNAVAILABLE
165204
return False, active_alerts
166205

167-
if self.unavailable is False and time() - self.last_updated > 300:
206+
if self.alerts_data_unavailable is False and time() - self.last_updated > 300:
168207
# If the data is not unavailable but it has not been updated
169208
# in the last 5 minutes, something is wrong. We mark it as unavailable.
170209
self.log.warning("Alerts data has not been updated in the last 5 minutes.")
171-
self.unavailable = True
172-
active_alerts |= ActiveAlert.ALERTS_UNAVAILABLE
210+
self.alerts_data_unavailable = True
211+
active_alerts |= ActiveAlert.ALERTS_DATA_UNAVAILABLE
173212
return False, active_alerts
174213

175214
if self.state.rain:

src/gort/overwatcher/overwatcher.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ async def handle_unsafe(self):
241241

242242
try:
243243
await ow.shutdown(
244-
reason=f"Unsafe conditions detected: {alert_names}",
244+
reason=f"Unsafe conditions detected - {alert_names}",
245245
close_dome=close_dome,
246246
disable_overwatcher=disable_overwatcher,
247247
)
@@ -572,9 +572,7 @@ async def shutdown(
572572
if not reason:
573573
message = "Triggering shutdown."
574574
else:
575-
if not reason.endswith("."):
576-
reason += "."
577-
message = f"Triggering shutdown. Reason: {decap(reason)}"
575+
message = f"Triggering shutdown. Reason: {decap(reason, add_period=True)}"
578576

579577
await self.notify(message, level=level)
580578

0 commit comments

Comments
 (0)