Skip to content

Commit 5a9bea4

Browse files
authored
Merge pull request #67 from syseleven/acunin/task-timeout-sigterm
Handle task timeout to send SIGTERM
2 parents f0133f2 + dd7f07a commit 5a9bea4

File tree

8 files changed

+147
-19
lines changed

8 files changed

+147
-19
lines changed

rebootmgr/main.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,15 @@ def run_tasks(tasktype, con, hostname, dryrun, task_timeout):
7070
for task in sorted(os.listdir("/etc/rebootmgr/%s_tasks/" % tasktype)):
7171
task = os.path.join("/etc/rebootmgr/%s_tasks" % tasktype, task)
7272
LOG.info("Run task %s" % task)
73+
p = subprocess.Popen(task, env=env)
7374
try:
74-
subprocess.run(task, check=True, env=env, timeout=(task_timeout * 60))
75-
except subprocess.CalledProcessError as e:
76-
LOG.error("Task %s failed with return code %s. Exit" % (task, e.returncode))
77-
sys.exit(EXIT_TASK_FAILED)
75+
ret = p.wait(timeout=(task_timeout * 60))
7876
except subprocess.TimeoutExpired:
77+
p.terminate()
78+
try:
79+
p.wait(timeout=10)
80+
except subprocess.TimeoutExpired:
81+
p.kill()
7982
LOG.error("Could not finish task %s in %i minutes. Exit" % (task, task_timeout))
8083
LOG.error("Disable rebootmgr in consul for this node")
8184
data = get_config(con, hostname)
@@ -84,6 +87,9 @@ def run_tasks(tasktype, con, hostname, dryrun, task_timeout):
8487
put_config(con, hostname, data)
8588
con.kv.delete("service/rebootmgr/reboot_in_progress")
8689
sys.exit(EXIT_TASK_FAILED)
90+
if ret != 0:
91+
LOG.error("Task %s failed with return code %s. Exit" % (task, ret))
92+
sys.exit(EXIT_TASK_FAILED)
8793
LOG.info("task %s finished" % task)
8894

8995

tests/conftest.py

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import subprocess
77

88
from unittest.mock import DEFAULT
9+
from unittest.mock import MagicMock
910

1011
import pytest
1112
import consul
@@ -40,6 +41,57 @@ def fake_gethostname():
4041
c.agent.service.deregister(name)
4142

4243

44+
@pytest.fixture
45+
def mock_subprocess_popen(mocker):
46+
"""
47+
Fixture for testing with mocked `subprocess.Popen`.
48+
49+
Returns a configured `MagicMock` instance.
50+
51+
You can optionally pass a `side_effect` as a second argument
52+
which will be used as a side_effect for Popen.wait.
53+
54+
`side_effect` can be an Exception and will then be raised;
55+
see the `MagicMock.side_effect` documentation for more information.
56+
57+
Example:
58+
59+
mocked_popen = mock_subprocess_popen(["reboot"])
60+
61+
call_your_tested_code()
62+
63+
mocked_popen.assert_any_call(["reboot"])
64+
mocked_popen.wait.assert_called()
65+
"""
66+
wait_results = {}
67+
68+
def get_wait_result(command):
69+
if isinstance(command, str):
70+
command = [command]
71+
elif isinstance(command, list):
72+
pass
73+
else:
74+
raise ValueError("command must be either string or list")
75+
76+
return wait_results[json.dumps(command)]
77+
78+
def get_mocked_popen(command, *args, **kwargs):
79+
mock = MagicMock()
80+
return_value, side_effect = get_wait_result(command)
81+
mock.wait.return_value = return_value
82+
mock.wait.side_effect = side_effect
83+
return mock
84+
85+
mocked_popen = mocker.patch("subprocess.Popen")
86+
mocked_popen.side_effect = get_mocked_popen
87+
88+
def add(command, wait_return_value=None, wait_side_effect=None):
89+
wait_results[json.dumps(command)] = wait_return_value, wait_side_effect
90+
return mocked_popen
91+
92+
return add
93+
94+
4395
@pytest.fixture
4496
def mock_subprocess_run(mocker):
4597
"""
@@ -101,7 +153,7 @@ def run(*args, catch_exceptions=False, **kwargs):
101153

102154

103155
@pytest.fixture
104-
def reboot_task(mocker, mock_subprocess_run):
156+
def reboot_task(mocker, mock_subprocess_popen):
105157
tasks = {"pre_boot": [], "post_boot": []}
106158

107159
def listdir(directory):
@@ -120,15 +172,17 @@ def create_task(tasktype, filename, exit_code=0, raise_timeout_expired=False):
120172

121173
tasks[tasktype] += [filename]
122174

123-
side_effect = None
124-
if exit_code != 0:
125-
side_effect = subprocess.CalledProcessError(exit_code, filename)
126-
elif raise_timeout_expired:
175+
if raise_timeout_expired:
176+
return_value = None
127177
side_effect = subprocess.TimeoutExpired(filename, 1234)
178+
else:
179+
return_value = exit_code
180+
side_effect = None
128181

129-
mock_subprocess_run(
182+
return mock_subprocess_popen(
130183
["/etc/rebootmgr/{}_tasks/{}".format(tasktype, filename)],
131-
side_effect)
184+
wait_return_value=return_value,
185+
wait_side_effect=side_effect)
132186

133187
return create_task
134188

tests/test_post_reboot.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@ def test_post_reboot_consul_checks_passing(
3030
"""
3131
mocker.patch("time.sleep")
3232
mocked_run = mocker.patch("subprocess.run")
33+
mocked_popen = mocker.patch("subprocess.Popen")
3334

3435
result = run_cli(rebootmgr, ["-v"])
3536

3637
mocked_run.assert_not_called()
38+
mocked_popen.assert_not_called()
3739
assert result.exit_code == 0
3840

3941

@@ -50,10 +52,12 @@ def test_post_reboot_consul_checks_failing(
5052

5153
mocker.patch("time.sleep")
5254
mocked_run = mocker.patch("subprocess.run")
55+
mocked_popen = mocker.patch("subprocess.Popen")
5356

5457
result = run_cli(rebootmgr, ["-v"])
5558

5659
mocked_run.assert_not_called()
60+
mocked_popen.assert_not_called()
5761
assert result.exit_code == EXIT_CONSUL_CHECKS_FAILED
5862

5963

@@ -66,10 +70,12 @@ def test_post_reboot_wait_until_healthy_and_are_healthy(
6670
"""
6771
mocker.patch("time.sleep")
6872
mocked_run = mocker.patch("subprocess.run")
73+
mocked_popen = mocker.patch("subprocess.Popen")
6974

7075
result = run_cli(rebootmgr, ["-v", "--post-reboot-wait-until-healthy"])
7176

7277
mocked_run.assert_not_called()
78+
mocked_popen.assert_not_called()
7379
assert result.exit_code == 0
7480

7581

@@ -103,10 +109,12 @@ def fake_sleep(seconds):
103109

104110
mocker.patch("time.sleep", new=fake_sleep)
105111
mocked_run = mocker.patch("subprocess.run")
112+
mocked_popen = mocker.patch("subprocess.Popen")
106113

107114
result = run_cli(rebootmgr, ["-v", "--post-reboot-wait-until-healthy"])
108115

109116
mocked_run.assert_not_called()
117+
mocked_popen.assert_not_called()
110118
assert sleep_counter == 0
111119
assert result.exit_code == 0
112120

@@ -135,6 +143,7 @@ def test_post_reboot_phase_fails_with_uptime(
135143
run_cli, forward_consul_port, default_config, reboot_in_progress,
136144
reboot_task, mocker):
137145
mocker.patch('rebootmgr.main.open', new=mock_open(read_data='99999999.9 99999999.9'))
146+
mocker.patch("subprocess.run")
138147
reboot_task("post_boot", "50_another_task.sh")
139148

140149
result = run_cli(rebootmgr, ["-v", "--check-uptime"])
@@ -146,6 +155,7 @@ def test_post_reboot_phase_fails_with_uptime(
146155
def test_post_reboot_succeeds_with_current_node_in_maintenance(
147156
run_cli, consul_cluster, reboot_in_progress, forward_consul_port,
148157
default_config, reboot_task, mocker):
158+
mocker.patch("subprocess.run")
149159
consul_cluster[0].agent.service.register("A", tags=["rebootmgr"])
150160
consul_cluster[1].agent.service.register("A", tags=["rebootmgr"])
151161
consul_cluster[2].agent.service.register("A", tags=["rebootmgr"])
@@ -163,6 +173,7 @@ def test_post_reboot_succeeds_with_current_node_in_maintenance(
163173
def test_post_reboot_fails_with_other_node_in_maintenance(
164174
run_cli, consul_cluster, reboot_in_progress, forward_consul_port,
165175
default_config, reboot_task, mocker):
176+
mocker.patch("subprocess.run")
166177
consul_cluster[0].agent.service.register("A", tags=["rebootmgr"])
167178
consul_cluster[1].agent.service.register("A", tags=["rebootmgr"])
168179
consul_cluster[2].agent.service.register("A", tags=["rebootmgr"])
@@ -181,6 +192,7 @@ def test_post_reboot_succeeds_with_other_node_in_maintenance_but_ignoring(
181192
run_cli, consul_cluster, reboot_in_progress, forward_consul_port,
182193
default_config, reboot_task, mocker):
183194

195+
mocker.patch("subprocess.run")
184196
consul_cluster[0].agent.service.register("A", tags=["rebootmgr"])
185197
consul_cluster[1].agent.service.register("A", tags=["rebootmgr", "ignore_maintenance"])
186198
consul_cluster[2].agent.service.register("A", tags=["rebootmgr"])
@@ -228,10 +240,12 @@ def fake_sleep(seconds):
228240

229241
mocker.patch("time.sleep", new=fake_sleep)
230242
mocked_run = mocker.patch("subprocess.run")
243+
mocked_popen = mocker.patch("subprocess.Popen")
231244

232245
result = run_cli(rebootmgr, ["-v", "--post-reboot-wait-until-healthy"])
233246

234247
mocked_run.assert_not_called()
248+
mocked_popen.assert_not_called()
235249
assert sleep_counter == 0
236250
assert 'There were failed consul checks' in result.output
237251
assert '_node_maintenance on consul2' in result.output

tests/test_reboot.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_dryrun_reboot_succeeds_with_tasks(run_cli, forward_consul_port,
4949
reboot_task, mock_subprocess_run,
5050
mocker):
5151
mocked_sleep = mocker.patch("time.sleep")
52-
reboot_task("pre_boot", "00_some_task.sh")
52+
mocked_popen = reboot_task("pre_boot", "00_some_task.sh")
5353
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
5454

5555
result = run_cli(rebootmgr, ["-vv", "--dryrun"])
@@ -58,8 +58,11 @@ def test_dryrun_reboot_succeeds_with_tasks(run_cli, forward_consul_port,
5858
assert "in key service/rebootmgr/reboot_in_progress" in result.output
5959
assert result.exit_code == 0
6060

61-
assert mocked_run.call_count == 1
62-
args, kwargs = mocked_run.call_args
61+
# shutdown must not be called
62+
mocked_run.assert_not_called()
63+
# task should be called
64+
assert mocked_popen.call_count == 1
65+
args, kwargs = mocked_popen.call_args
6366
assert args[0] == "/etc/rebootmgr/pre_boot_tasks/00_some_task.sh"
6467
assert 'env' in kwargs
6568
assert 'REBOOTMGR_DRY_RUN' in kwargs['env']
@@ -80,6 +83,7 @@ def test_reboot_fail(
8083
mock_subprocess_run, mocker):
8184
mocked_sleep = mocker.patch("time.sleep")
8285

86+
mocked_popen = mocker.patch("subprocess.Popen")
8387
mocked_run = mock_subprocess_run(
8488
["shutdown", "-r", "+1"],
8589
side_effect=Exception("Failed to run reboot command"))
@@ -88,6 +92,7 @@ def test_reboot_fail(
8892

8993
assert result.exit_code == 1
9094

95+
mocked_popen.assert_not_called()
9196
mocked_run.assert_any_call(["shutdown", "-r", "+1"], check=True)
9297

9398
# We want rebootmgr to sleep for 2 minutes after running the pre boot tasks,
@@ -112,10 +117,12 @@ def test_reboot_succeeds_if_this_node_is_in_maintenance(
112117
consul_cluster[0].agent.maintenance(True)
113118

114119
mocker.patch("time.sleep")
120+
mocked_popen = mocker.patch("subprocess.Popen")
115121
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
116122

117123
result = run_cli(rebootmgr, ["-v"])
118124

125+
mocked_popen.assert_not_called()
119126
mocked_run.assert_any_call(["shutdown", "-r", "+1"], check=True)
120127
assert result.exit_code == 0
121128

@@ -129,10 +136,12 @@ def test_reboot_fails_if_another_node_is_in_maintenance(
129136
consul_cluster[1].agent.maintenance(True)
130137

131138
mocker.patch("time.sleep")
139+
mocked_popen = mocker.patch("subprocess.Popen")
132140
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
133141

134142
result = run_cli(rebootmgr, ["-v"])
135143

144+
mocked_popen.assert_not_called()
136145
mocked_run.assert_not_called()
137146
assert 'There were failed consul checks' in result.output
138147
assert '_node_maintenance on consul2' in result.output
@@ -149,9 +158,11 @@ def test_reboot_succeeds_if_another_node_is_in_maintenance_but_ignoring(
149158
consul_cluster[1].agent.maintenance(True)
150159

151160
mocker.patch("time.sleep")
161+
mocked_popen = mocker.patch("subprocess.Popen")
152162
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
153163

154164
result = run_cli(rebootmgr, ["-v"])
155165

166+
mocked_popen.assert_not_called()
156167
mocked_run.assert_any_call(["shutdown", "-r", "+1"], check=True)
157168
assert result.exit_code == 0

tests/test_stopflag.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ def test_set_global_stop_flag(
3333
mock_subprocess_run, mocker):
3434
mocked_sleep = mocker.patch("time.sleep")
3535
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
36+
mocked_popen = mocker.patch("subprocess.Popen")
3637
datacenter = "test"
3738

3839
result = run_cli(rebootmgr, ["-v", "--set-global-stop-flag", datacenter])
3940

4041
mocked_sleep.assert_not_called()
4142
mocked_run.assert_not_called()
43+
mocked_popen.assert_not_called()
4244
assert "Set "+datacenter+" global stop flag:" in result.output
4345
idx, data = consul_cluster[0].kv.get("service/rebootmgr/stop", dc=datacenter)
4446
assert idx is not None
@@ -96,12 +98,14 @@ def test_set_local_stop_flag(
9698
mock_subprocess_run, mocker):
9799
mocked_sleep = mocker.patch("time.sleep")
98100
mocked_run = mock_subprocess_run(["shutdown", "-r", "+1"])
101+
mocked_popen = mocker.patch("subprocess.Popen")
99102
hostname = socket.gethostname().split(".")[0]
100103

101104
result = run_cli(rebootmgr, ["-v", "--set-local-stop-flag"])
102105

103106
mocked_sleep.assert_not_called()
104107
mocked_run.assert_not_called()
108+
mocked_popen.assert_not_called()
105109
assert "Set "+hostname+" local stop flag:" in result.output
106110
idx, data = consul_cluster[0].kv.get("service/rebootmgr/nodes/{}/config".format(
107111
hostname))

tests/test_tasks.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def test_reboot_task_timeout(run_cli, consul_cluster, forward_consul_port, defau
1818

1919
def test_reboot_preboot_task_fails(run_cli, consul_cluster, forward_consul_port, default_config, reboot_task, mocker):
2020
mocker.patch("time.sleep")
21-
reboot_task("pre_boot", "00_some_task.sh", exit_code=1)
21+
mocked_run = mocker.patch("subprocess.run")
22+
mocked_popen = reboot_task("pre_boot", "00_some_task.sh", exit_code=1)
2223

2324
result = run_cli(rebootmgr)
2425

@@ -29,13 +30,15 @@ def test_reboot_preboot_task_fails(run_cli, consul_cluster, forward_consul_port,
2930
assert json.loads(data["Value"].decode()) == {
3031
"enabled": True,
3132
}
32-
# TODO(oseibert): check that shutdown is NOT called.
33+
assert mocked_popen.call_count == 1
34+
mocked_run.assert_not_called()
3335

3436

3537
def test_reboot_task_timeout_with_preexisting_config(run_cli, consul_cluster, forward_consul_port, reboot_task, mocker):
3638
consul_cluster[0].kv.put("service/rebootmgr/nodes/{}/config".format(socket.gethostname()), '{"enabled": true, "test_preserved": true}')
3739
mocker.patch("time.sleep")
38-
reboot_task("pre_boot", "00_some_task.sh", raise_timeout_expired=True)
40+
mocked_run = mocker.patch("subprocess.run")
41+
mocked_popen = reboot_task("pre_boot", "00_some_task.sh", raise_timeout_expired=True)
3942

4043
result = run_cli(rebootmgr)
4144

@@ -48,11 +51,13 @@ def test_reboot_task_timeout_with_preexisting_config(run_cli, consul_cluster, fo
4851
"enabled": False,
4952
"message": "Could not finish task /etc/rebootmgr/pre_boot_tasks/00_some_task.sh in 120 minutes"
5053
}
51-
# TODO(oseibert): check that shutdown is NOT called.
54+
assert mocked_popen.call_count == 1
55+
mocked_run.assert_not_called()
5256

5357

5458
def test_post_reboot_phase_task_timeout(run_cli, consul_cluster, forward_consul_port, default_config, reboot_task, mocker):
55-
reboot_task("post_boot", "50_another_task.sh", raise_timeout_expired=True)
59+
mocked_run = mocker.patch("subprocess.run")
60+
mocked_popen = reboot_task("post_boot", "50_another_task.sh", raise_timeout_expired=True)
5661

5762
mocker.patch("time.sleep")
5863
consul_cluster[0].kv.put("service/rebootmgr/reboot_in_progress", socket.gethostname())
@@ -67,3 +72,5 @@ def test_post_reboot_phase_task_timeout(run_cli, consul_cluster, forward_consul_
6772
"enabled": False,
6873
"message": "Could not finish task /etc/rebootmgr/post_boot_tasks/50_another_task.sh in 120 minutes"
6974
}
75+
assert mocked_popen.call_count == 1
76+
mocked_run.assert_not_called()

0 commit comments

Comments
 (0)