Skip to content

Commit 7314eb5

Browse files
cmcmarrowCh3LL
andauthored
[3003.2] Add server alive (#60573)
* add server alive * rename log * change default alive time * add requested changes * format string * reformat string again * run pre * customize * space * remove EOF dead space * fix pre-commit * run pre Co-authored-by: Megan Wilhite <megan.wilhite@gmail.com>
1 parent c64ba92 commit 7314eb5

File tree

3 files changed

+186
-14
lines changed

3 files changed

+186
-14
lines changed

changelog/60216.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Stop SSH from hanging if connection is lost. Also added args to customize grace period.

salt/utils/cloud.py

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@
124124
r"(?:.*sudo)(?:.*)[Pp]assword(?: for .*)?:", re.M
125125
)
126126

127+
SERVER_ALIVE_INTERVAL = 60
128+
SERVER_ALIVE_COUNT_MAX = 3
129+
127130
# Get logging started
128131
log = logging.getLogger(__name__)
129132

@@ -183,18 +186,24 @@ def __ssh_gateway_arguments(kwargs):
183186
ssh_gateway_user = kwargs.get("ssh_gateway_user", "root")
184187

185188
# Setup ProxyCommand
186-
extended_arguments = '-oProxyCommand="ssh {} {} {} {} {}@{} -p {} {}"'.format(
187-
# Don't add new hosts to the host key database
188-
"-oStrictHostKeyChecking=no",
189-
# Set hosts key database path to /dev/null, i.e., non-existing
190-
"-oUserKnownHostsFile=/dev/null",
191-
# Don't re-use the SSH connection. Less failures.
192-
"-oControlPath=none",
193-
ssh_gateway_key,
194-
ssh_gateway_user,
195-
ssh_gateway,
196-
ssh_gateway_port,
197-
ssh_gateway_command,
189+
extended_arguments = " ".join(
190+
(
191+
"ssh",
192+
"-oStrictHostKeyChecking=no",
193+
"-oServerAliveInterval={}".format(
194+
kwargs.get("server_alive_interval", SERVER_ALIVE_INTERVAL)
195+
),
196+
"-oServerAliveCountMax={}".format(
197+
kwargs.get("server_alive_count_max", SERVER_ALIVE_COUNT_MAX)
198+
),
199+
"-oUserKnownHostsFile=/dev/null",
200+
"-oControlPath=none",
201+
str(ssh_gateway_key),
202+
"{}@{}".format(ssh_gateway_user, ssh_gateway),
203+
"-p",
204+
str(ssh_gateway_port),
205+
str(ssh_gateway_command),
206+
)
198207
)
199208

200209
log.info(
@@ -605,7 +614,7 @@ def bootstrap(vm_, opts=None):
605614
"event",
606615
"executing deploy script",
607616
"salt/cloud/{}/deploying".format(vm_["name"]),
608-
args={"kwargs": event_kwargs},
617+
args={"kwargs": salt.utils.data.simple_types_filter(event_kwargs)},
609618
sock_dir=opts.get("sock_dir", os.path.join(__opts__["sock_dir"], "master")),
610619
transport=opts.get("transport", "zeromq"),
611620
)
@@ -691,7 +700,14 @@ def wait_for_fun(fun, timeout=900, **kwargs):
691700
return False
692701

693702

694-
def wait_for_port(host, port=22, timeout=900, gateway=None):
703+
def wait_for_port(
704+
host,
705+
port=22,
706+
timeout=900,
707+
gateway=None,
708+
server_alive_interval=SERVER_ALIVE_INTERVAL,
709+
server_alive_count_max=SERVER_ALIVE_COUNT_MAX,
710+
):
695711
"""
696712
Wait until a connection to the specified port can be made on a specified
697713
host. This is usually port 22 (for SSH), but in the case of Windows
@@ -765,6 +781,9 @@ def wait_for_port(host, port=22, timeout=900, gateway=None):
765781
[
766782
# Don't add new hosts to the host key database
767783
"-oStrictHostKeyChecking=no",
784+
# make sure ssh can time out on connection lose
785+
"-oServerAliveInterval={}".format(server_alive_interval),
786+
"-oServerAliveCountMax={}".format(server_alive_count_max),
768787
# Set hosts key database path to /dev/null, i.e., non-existing
769788
"-oUserKnownHostsFile=/dev/null",
770789
# Don't re-use the SSH connection. Less failures.
@@ -2146,6 +2165,13 @@ def scp_file(dest_path, contents=None, kwargs=None, local_file=None):
21462165
ssh_args = [
21472166
# Don't add new hosts to the host key database
21482167
"-oStrictHostKeyChecking=no",
2168+
# make sure ssh can time out on connection lose
2169+
"-oServerAliveInterval={}".format(
2170+
kwargs.get("server_alive_interval", SERVER_ALIVE_INTERVAL)
2171+
),
2172+
"-oServerAliveCountMax={}".format(
2173+
kwargs.get("server_alive_count_max", SERVER_ALIVE_COUNT_MAX)
2174+
),
21492175
# Set hosts key database path to /dev/null, i.e., non-existing
21502176
"-oUserKnownHostsFile=/dev/null",
21512177
# Don't re-use the SSH connection. Less failures.
@@ -2263,6 +2289,13 @@ def sftp_file(dest_path, contents=None, kwargs=None, local_file=None):
22632289
ssh_args = [
22642290
# Don't add new hosts to the host key database
22652291
"-oStrictHostKeyChecking=no",
2292+
# make sure ssh can time out on connection lose
2293+
"-oServerAliveInterval={}".format(
2294+
kwargs.get("server_alive_interval", SERVER_ALIVE_INTERVAL)
2295+
),
2296+
"-oServerAliveCountMax={}".format(
2297+
kwargs.get("server_alive_count_max", SERVER_ALIVE_COUNT_MAX)
2298+
),
22662299
# Set hosts key database path to /dev/null, i.e., non-existing
22672300
"-oUserKnownHostsFile=/dev/null",
22682301
# Don't re-use the SSH connection. Less failures.

tests/pytests/unit/utils/test_cloud.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import pytest
1414
import salt.utils.cloud as cloud
15+
from salt.utils.cloud import __ssh_gateway_arguments as ssh_gateway_arguments
1516
from tests.support.mock import MagicMock, patch
1617

1718

@@ -222,6 +223,143 @@ def test_winrm_pinnned_version():
222223
# fmt: on
223224

224225

226+
def test_ssh_gateway_arguments_default_alive_args():
227+
server_alive_interval = 60
228+
server_alive_count_max = 3
229+
arguments = ssh_gateway_arguments({"ssh_gateway": "host"})
230+
assert "-oServerAliveInterval={}".format(server_alive_interval) in arguments
231+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in arguments
232+
233+
234+
def test_ssh_gateway_arguments_alive_args():
235+
server_alive_interval = 10
236+
server_alive_count_max = 8
237+
arguments = ssh_gateway_arguments(
238+
{
239+
"ssh_gateway": "host",
240+
"server_alive_interval": server_alive_interval,
241+
"server_alive_count_max": server_alive_count_max,
242+
}
243+
)
244+
assert "-oServerAliveInterval={}".format(server_alive_interval) in arguments
245+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in arguments
246+
247+
248+
def test_wait_for_port_default_alive_args():
249+
server_alive_interval = 60
250+
server_alive_count_max = 3
251+
with patch("salt.utils.cloud.socket", autospec=True), patch(
252+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
253+
) as exec_ssh_cmd:
254+
cloud.wait_for_port(
255+
"127.0.0.1", gateway={"ssh_gateway": "host", "ssh_gateway_user": "user"},
256+
)
257+
assert exec_ssh_cmd.call_count == 2
258+
ssh_call = exec_ssh_cmd.call_args[0][0]
259+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
260+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
261+
262+
263+
def test_wait_for_port_alive_args():
264+
server_alive_interval = 66
265+
server_alive_count_max = 1
266+
with patch("salt.utils.cloud.socket", autospec=True), patch(
267+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
268+
) as exec_ssh_cmd:
269+
cloud.wait_for_port(
270+
"127.0.0.1",
271+
server_alive_interval=server_alive_interval,
272+
server_alive_count_max=server_alive_count_max,
273+
gateway={"ssh_gateway": "host", "ssh_gateway_user": "user"},
274+
)
275+
assert exec_ssh_cmd.call_count == 2
276+
ssh_call = exec_ssh_cmd.call_args[0][0]
277+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
278+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
279+
280+
281+
def test_scp_file_default_alive_args():
282+
server_alive_interval = 60
283+
server_alive_count_max = 3
284+
with patch("salt.utils.cloud.socket", autospec=True), patch(
285+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
286+
) as exec_ssh_cmd:
287+
cloud.scp_file(
288+
"/salt.txt",
289+
contents=None,
290+
kwargs={"hostname": "127.0.0.1", "username": "user"},
291+
local_file="/salt.txt",
292+
)
293+
assert exec_ssh_cmd.call_count == 1
294+
ssh_call = exec_ssh_cmd.call_args[0][0]
295+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
296+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
297+
298+
299+
def test_scp_file_alive_args():
300+
server_alive_interval = 64
301+
server_alive_count_max = 4
302+
with patch("salt.utils.cloud.socket", autospec=True), patch(
303+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
304+
) as exec_ssh_cmd:
305+
cloud.scp_file(
306+
"/salt.txt",
307+
contents=None,
308+
kwargs={
309+
"hostname": "127.0.0.1",
310+
"username": "user",
311+
"server_alive_interval": server_alive_interval,
312+
"server_alive_count_max": server_alive_count_max,
313+
},
314+
local_file="/salt.txt",
315+
)
316+
assert exec_ssh_cmd.call_count == 1
317+
ssh_call = exec_ssh_cmd.call_args[0][0]
318+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
319+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
320+
321+
322+
def test_sftp_file_default_alive_args():
323+
server_alive_interval = 60
324+
server_alive_count_max = 3
325+
with patch("salt.utils.cloud.socket", autospec=True), patch(
326+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
327+
) as exec_ssh_cmd:
328+
cloud.sftp_file(
329+
"/salt.txt",
330+
contents=None,
331+
kwargs={"hostname": "127.0.0.1", "username": "user"},
332+
local_file="/salt.txt",
333+
)
334+
assert exec_ssh_cmd.call_count == 1
335+
ssh_call = exec_ssh_cmd.call_args[0][0]
336+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
337+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
338+
339+
340+
def test_sftp_file_alive_args():
341+
server_alive_interval = 62
342+
server_alive_count_max = 6
343+
with patch("salt.utils.cloud.socket", autospec=True), patch(
344+
"salt.utils.cloud._exec_ssh_cmd", autospec=True, return_value=0
345+
) as exec_ssh_cmd:
346+
cloud.sftp_file(
347+
"/salt.txt",
348+
contents=None,
349+
kwargs={
350+
"hostname": "127.0.0.1",
351+
"username": "user",
352+
"server_alive_interval": server_alive_interval,
353+
"server_alive_count_max": server_alive_count_max,
354+
},
355+
local_file="/salt.txt",
356+
)
357+
assert exec_ssh_cmd.call_count == 1
358+
ssh_call = exec_ssh_cmd.call_args[0][0]
359+
assert "-oServerAliveInterval={}".format(server_alive_interval) in ssh_call
360+
assert "-oServerAliveCountMax={}".format(server_alive_count_max) in ssh_call
361+
362+
225363
def test_deploy_script_ssh_timeout():
226364
with patch("salt.utils.cloud.root_cmd", return_value=False) as root_cmd, patch(
227365
"salt.utils.cloud.wait_for_port", return_value=True

0 commit comments

Comments
 (0)