Skip to content

Commit b239d6c

Browse files
authored
Fix: Check whether a tool was successfully started (#2500)
This fix tracks the status of 'tool.up' command(s) as well as the output generated by Docker. If the output contains 'warning...platform' text, we might be dealing with a container started on an incompatible platform, in which case we wait a bit (because it will crash very soon), check its status, and report the outcome. The fix also adds better error reporting on tool start/stop failures and partial-success reporting. Closes #2456
1 parent d484547 commit b239d6c

File tree

2 files changed

+52
-7
lines changed

2 files changed

+52
-7
lines changed

netsim/cli/external_commands.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,30 @@ def get_local_addr() -> str:
251251
#
252252
# Execute external tool commands
253253
#
254-
def execute_tool_commands(cmds: list, topology: Box) -> None:
254+
def execute_tool_commands(cmds: list, topology: Box) -> typing.Optional[str]:
255255
topology.sys.docker_net = ""
256256
topology.sys.ipaddr = get_local_addr()
257257
if docker_is_used(topology):
258258
topology.sys.docker_net = f"--network={topology.addressing.mgmt.get('_network',None) or 'netlab_mgmt'}"
259259

260+
output = ''
260261
for cmd in cmds:
261262
cmd = strings.eval_format(cmd,topology)
262-
run_command(cmd = [ 'bash', '-c', cmd ],check_result=True)
263+
status = run_command(
264+
cmd = [ 'bash', '-c', cmd + " 2>&1"], # Redirect STDERR to STDOUT to collect it
265+
check_result=True, # I want to get the status
266+
ignore_errors=True, # ... returned but not reported
267+
return_stdout=True) # ... and we need STDOUT content to look for the warnings
268+
if not isinstance(status,str):
269+
log.error(
270+
f'Failed to execute {cmd}',module='tools',
271+
category=log.ErrorAbort,
272+
skip_header=True)
273+
return None
274+
else:
275+
output += status
276+
277+
return output
263278

264279
#
265280
# Get the "how to connect to the tool" message

netsim/cli/up.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import typing
99
import argparse
1010
import os
11+
import re
1112
import sys
13+
import time
1214

1315
from box import Box
1416
from pathlib import Path
@@ -259,6 +261,24 @@ def reload_saved_config(args: argparse.Namespace, topology: Box) -> None:
259261
log.status_success()
260262
print("Saved configurations reloaded")
261263

264+
"""
265+
Check the state of the external tool container
266+
"""
267+
def check_tool_status(tool: str, status: typing.Optional[str], topology: Box) -> bool:
268+
if status is None:
269+
return False
270+
271+
if not re.search('(?i)warning.*platform.*match',status):
272+
return True
273+
274+
log.info(f'Platform mismatch between {tool} container image and your hardware, checking tool status')
275+
time.sleep(0.5)
276+
c_stat = external_commands.run_command(
277+
f'docker inspect {topology.name}_{tool}',
278+
ignore_errors=True,return_stdout=True,check_result=True)
279+
280+
return bool(c_stat)
281+
262282
"""
263283
Deploy external tools
264284
"""
@@ -273,21 +293,31 @@ def start_external_tools(args: argparse.Namespace, topology: Box) -> None:
273293

274294
lab_status_change(topology,f'starting external tools')
275295
log.section_header('Starting','external tools')
296+
t_count = 0
297+
t_success = 0
276298
for tool in topology.tools.keys():
277299
cmds = external_commands.get_tool_command(tool,'up',topology)
278300
if cmds is None:
279301
continue
280-
external_commands.execute_tool_commands(cmds,topology)
302+
303+
t_count += 1
304+
status = external_commands.execute_tool_commands(cmds,topology)
305+
if not is_dry_run() and not check_tool_status(tool,status,topology):
306+
log.error(f'Failed to start {tool}',module='tools',category=Warning,skip_header=True)
307+
continue
281308
msg = external_commands.get_tool_message(tool,topology)
282309
if not is_dry_run():
283-
print(f"... {tool} tool started")
310+
t_success += 1
311+
log.status_success()
312+
print(f"{tool} tool started")
284313

285314
if msg:
286315
print(("DRY_RUN: " if is_dry_run() else "") + msg + "\n")
287316

288-
lab_status_change(topology,f'external tools started')
289-
log.status_success()
290-
print("External tools started")
317+
lab_status_change(topology,f'{t_success}/{t_count} external tools started')
318+
if not is_dry_run():
319+
log.partial_success(t_success,t_count)
320+
print(f"{t_success}/{t_count} external tools started")
291321

292322
"""
293323
Main "lab start" process

0 commit comments

Comments
 (0)