Skip to content

Commit b31d618

Browse files
fix(modules): auto-restart crashed watchers and fix toggle for dead processes
Three bugs fixed: 1. **Module toggle required two clicks for crashed modules**: `toggle()` checked `self.started` instead of `self.is_alive()`, so clicking a crashed module first called `stop()` (just cleaning state), then required a second click to actually start it. 2. **Crashed modules never auto-restarted**: `check_module_status()` ran once at startup, then incorrectly scheduled `rebuild_modules_menu` instead of rescheduling itself. After the first 2-second check, module crashes were never detected again. 3. **Dialog restart button missing testing arg**: `restart_button.clicked.connect(module.start)` didn't pass the `testing` parameter. Changes: - `Module.toggle()` now uses `is_alive()` to decide stop vs start - `check_module_status()` reschedules itself every 5s (was one-shot) - Crashed modules are auto-restarted up to 3 times with tray notifications - After max restarts, shows dialog with manual restart option - Manual toggle/restart resets the auto-restart counter - Added unit tests for toggle behavior with crashed processes Refs: ActivityWatch/aw-watcher-window#101, #103
1 parent 7967ff8 commit b31d618

File tree

3 files changed

+173
-11
lines changed

3 files changed

+173
-11
lines changed

aw_qt/manager.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,12 @@ def stop(self) -> None:
200200
self.started = False
201201

202202
def toggle(self, testing: bool) -> None:
203-
if self.started:
203+
if self.is_alive():
204204
self.stop()
205205
else:
206+
if self.started:
207+
# Process died unexpectedly, clean up state
208+
self.stop()
206209
self.start(testing)
207210

208211
def is_alive(self) -> bool:

aw_qt/trayicon.py

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ def open_dir(d: str) -> None:
7373

7474

7575
class TrayIcon(QSystemTrayIcon):
76+
MAX_AUTO_RESTARTS = 3
77+
7678
def __init__(
7779
self,
7880
manager: Manager,
@@ -86,6 +88,7 @@ def __init__(
8688

8789
self.manager = manager
8890
self.testing = testing
91+
self._restart_counts: Dict[str, int] = {}
8992

9093
self.root_url = f"http://localhost:{5666 if self.testing else 5600}"
9194
self.activated.connect(self.on_activated)
@@ -137,11 +140,24 @@ def _build_rootmenu(self) -> None:
137140
def show_module_failed_dialog(module: Module) -> None:
138141
box = QMessageBox(self._parent)
139142
box.setIcon(QMessageBox.Icon.Warning)
140-
box.setText(f"Module {module.name} quit unexpectedly")
143+
box.setText(
144+
f"Module {module.name} quit unexpectedly"
145+
+ (
146+
f" after {self.MAX_AUTO_RESTARTS} auto-restart attempts"
147+
if self._restart_counts.get(module.name, 0)
148+
>= self.MAX_AUTO_RESTARTS
149+
else ""
150+
)
151+
)
141152
box.setDetailedText(module.read_log(self.testing))
142153

143154
restart_button = QPushButton("Restart", box)
144-
restart_button.clicked.connect(module.start)
155+
156+
def on_manual_restart() -> None:
157+
self._restart_counts.pop(module.name, None)
158+
module.start(self.testing)
159+
160+
restart_button.clicked.connect(on_manual_restart)
145161
box.addButton(restart_button, QMessageBox.ButtonRole.AcceptRole)
146162
box.setStandardButtons(QMessageBox.StandardButton.Cancel)
147163

@@ -153,31 +169,53 @@ def rebuild_modules_menu() -> None:
153169
module: Module = action.data()
154170
alive = module.is_alive()
155171
action.setChecked(alive)
156-
# print(module.text(), alive)
157172

158-
# TODO: Do it in a better way, singleShot isn't pretty...
159173
QtCore.QTimer.singleShot(2000, rebuild_modules_menu)
160174

161175
QtCore.QTimer.singleShot(2000, rebuild_modules_menu)
162176

163177
def check_module_status() -> None:
164178
unexpected_exits = self.manager.get_unexpected_stops()
165-
if unexpected_exits:
166-
for module in unexpected_exits:
179+
for module in unexpected_exits:
180+
count = self._restart_counts.get(module.name, 0)
181+
if count < self.MAX_AUTO_RESTARTS:
182+
logger.info(
183+
f"Auto-restarting crashed module {module.name} "
184+
f"(attempt {count + 1}/{self.MAX_AUTO_RESTARTS})"
185+
)
186+
module.stop() # Clean up state
187+
module.start(self.testing)
188+
self._restart_counts[module.name] = count + 1
189+
self.showMessage(
190+
"ActivityWatch",
191+
f"Module {module.name} crashed and was auto-restarted",
192+
QSystemTrayIcon.MessageIcon.Warning,
193+
5000,
194+
)
195+
else:
196+
logger.warning(
197+
f"Module {module.name} exceeded max auto-restarts "
198+
f"({self.MAX_AUTO_RESTARTS})"
199+
)
167200
show_module_failed_dialog(module)
168201
module.stop()
169202

170-
# TODO: Do it in a better way, singleShot isn't pretty...
171-
QtCore.QTimer.singleShot(2000, rebuild_modules_menu)
203+
QtCore.QTimer.singleShot(5000, check_module_status)
172204

173-
QtCore.QTimer.singleShot(2000, check_module_status)
205+
QtCore.QTimer.singleShot(5000, check_module_status)
174206

175207
def _build_modulemenu(self, moduleMenu: QMenu) -> None:
176208
moduleMenu.clear()
177209

178210
def add_module_menuitem(module: Module) -> None:
179211
title = module.name
180-
ac = moduleMenu.addAction(title, lambda: module.toggle(self.testing))
212+
213+
def on_toggle(m: Module = module) -> None:
214+
m.toggle(self.testing)
215+
# Reset auto-restart count on manual toggle
216+
self._restart_counts.pop(m.name, None)
217+
218+
ac = moduleMenu.addAction(title, on_toggle)
181219

182220
ac.setData(module)
183221
ac.setCheckable(True)

tests/test_manager.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""Unit tests for the Module manager."""
2+
3+
import subprocess
4+
from pathlib import Path
5+
from unittest.mock import MagicMock, patch
6+
7+
import pytest
8+
9+
from aw_qt.manager import Module
10+
11+
12+
@pytest.fixture
13+
def module():
14+
"""Create a test module with a mock path."""
15+
return Module("aw-test-module", Path("/usr/bin/true"), "system")
16+
17+
18+
class TestModuleToggle:
19+
"""Tests for Module.toggle() behavior, especially with crashed processes."""
20+
21+
def test_toggle_starts_stopped_module(self, module):
22+
"""Toggle should start a module that was never started."""
23+
assert not module.started
24+
assert not module.is_alive()
25+
26+
with patch.object(module, "start") as mock_start:
27+
module.toggle(testing=True)
28+
mock_start.assert_called_once_with(True)
29+
30+
def test_toggle_stops_running_module(self, module):
31+
"""Toggle should stop a module that is running."""
32+
# Simulate a running process
33+
mock_proc = MagicMock(spec=subprocess.Popen)
34+
mock_proc.returncode = None # None means still running
35+
36+
# After terminate+wait, process should report as dead
37+
def fake_wait():
38+
mock_proc.returncode = -15 # SIGTERM
39+
40+
mock_proc.wait.side_effect = fake_wait
41+
module._process = mock_proc
42+
module.started = True
43+
44+
module.toggle(testing=True)
45+
assert not module.started
46+
assert module._process is None
47+
48+
def test_toggle_restarts_crashed_module(self, module):
49+
"""Toggle should restart a module that crashed (started=True, process dead).
50+
51+
This is the key fix: previously, toggle checked `self.started` instead of
52+
`self.is_alive()`, so the first click would only call stop() (cleaning up
53+
state) without starting, requiring a second click to actually start.
54+
"""
55+
# Simulate a crashed process: started=True but process exited
56+
mock_proc = MagicMock(spec=subprocess.Popen)
57+
mock_proc.returncode = 1 # Non-zero = exited
58+
module._process = mock_proc
59+
module.started = True
60+
61+
assert not module.is_alive() # Process is dead
62+
assert module.started # But flag says started
63+
64+
with patch.object(module, "start") as mock_start:
65+
module.toggle(testing=True)
66+
# Should have cleaned up and started in one toggle
67+
mock_start.assert_called_once_with(True)
68+
assert not module.started # stop() was called to clean up
69+
70+
71+
class TestModuleIsAlive:
72+
"""Tests for Module.is_alive() behavior."""
73+
74+
def test_is_alive_no_process(self, module):
75+
assert not module.is_alive()
76+
77+
def test_is_alive_running(self, module):
78+
mock_proc = MagicMock(spec=subprocess.Popen)
79+
mock_proc.returncode = None
80+
module._process = mock_proc
81+
assert module.is_alive()
82+
83+
def test_is_alive_exited(self, module):
84+
mock_proc = MagicMock(spec=subprocess.Popen)
85+
mock_proc.returncode = 0
86+
module._process = mock_proc
87+
assert not module.is_alive()
88+
89+
90+
class TestGetUnexpectedStops:
91+
"""Tests for Manager.get_unexpected_stops()."""
92+
93+
def test_detects_crashed_module(self):
94+
from aw_qt.manager import Manager
95+
96+
with patch.object(Manager, "discover_modules"):
97+
mgr = Manager(testing=True)
98+
99+
mod = Module("aw-test", Path("/usr/bin/true"), "system")
100+
mock_proc = MagicMock(spec=subprocess.Popen)
101+
mock_proc.returncode = 1
102+
mod._process = mock_proc
103+
mod.started = True
104+
mgr.modules = [mod]
105+
106+
unexpected = mgr.get_unexpected_stops()
107+
assert len(unexpected) == 1
108+
assert unexpected[0].name == "aw-test"
109+
110+
def test_ignores_intentionally_stopped(self):
111+
from aw_qt.manager import Manager
112+
113+
with patch.object(Manager, "discover_modules"):
114+
mgr = Manager(testing=True)
115+
116+
mod = Module("aw-test", Path("/usr/bin/true"), "system")
117+
mod.started = False # Intentionally stopped
118+
mgr.modules = [mod]
119+
120+
unexpected = mgr.get_unexpected_stops()
121+
assert len(unexpected) == 0

0 commit comments

Comments
 (0)