Skip to content

Commit 3281aef

Browse files
committed
Fix: Check whether a tool was successfully started
This fix tracks the status of 'tool.up' command(s) as well as the output generated by Docker. If the output contains 'warning...platform' text, we might be dealing with a container started on an incompatible platform, in which case we wait a bit (because it will crash very soon), check its status, and report the outcome. The fix also adds better error reporting on tool start/stop failures and partial-success reporting. Closes #2456
1 parent d484547 commit 3281aef

File tree

2 files changed

+52
-7
lines changed

2 files changed

+52
-7
lines changed

netsim/cli/external_commands.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,30 @@ def get_local_addr() -> str:
251251
#
252252
# Execute external tool commands
253253
#
254-
def execute_tool_commands(cmds: list, topology: Box) -> None:
254+
def execute_tool_commands(cmds: list, topology: Box) -> typing.Optional[str]:
255255
topology.sys.docker_net = ""
256256
topology.sys.ipaddr = get_local_addr()
257257
if docker_is_used(topology):
258258
topology.sys.docker_net = f"--network={topology.addressing.mgmt.get('_network',None) or 'netlab_mgmt'}"
259259

260+
output = ''
260261
for cmd in cmds:
261262
cmd = strings.eval_format(cmd,topology)
262-
run_command(cmd = [ 'bash', '-c', cmd ],check_result=True)
263+
status = run_command(
264+
cmd = [ 'bash', '-c', cmd + " 2>&1"], # Redirect STDERR to STDOUT to collect it
265+
check_result=True, # I want to get the status
266+
ignore_errors=True, # ... returned but not reported
267+
return_stdout=True) # ... and we need STDOUT content to look for the warnings
268+
if not isinstance(status,str):
269+
log.error(
270+
f'Failed to execute {cmd}',module='tools',
271+
category=log.ErrorAbort,
272+
skip_header=True)
273+
return None
274+
else:
275+
output += status
276+
277+
return output
263278

264279
#
265280
# Get the "how to connect to the tool" message

netsim/cli/up.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import typing
99
import argparse
1010
import os
11+
import re
1112
import sys
13+
import time
1214

1315
from box import Box
1416
from pathlib import Path
@@ -259,6 +261,24 @@ def reload_saved_config(args: argparse.Namespace, topology: Box) -> None:
259261
log.status_success()
260262
print("Saved configurations reloaded")
261263

264+
"""
265+
Check the state of the external tool container
266+
"""
267+
def check_tool_status(tool: str, status: typing.Optional[str], topology: Box) -> bool:
268+
if status is None:
269+
return False
270+
271+
if not re.search('(?i)warning.*platform.*match',status):
272+
return True
273+
274+
log.info(f'Platform mismatch between {tool} container image and your hardware, checking tool status')
275+
time.sleep(0.5)
276+
c_stat = external_commands.run_command(
277+
f'docker inspect {topology.name}_{tool}',
278+
ignore_errors=True,return_stdout=True,check_result=True)
279+
280+
return bool(c_stat)
281+
262282
"""
263283
Deploy external tools
264284
"""
@@ -273,21 +293,31 @@ def start_external_tools(args: argparse.Namespace, topology: Box) -> None:
273293

274294
lab_status_change(topology,f'starting external tools')
275295
log.section_header('Starting','external tools')
296+
t_count = 0
297+
t_success = 0
276298
for tool in topology.tools.keys():
277299
cmds = external_commands.get_tool_command(tool,'up',topology)
278300
if cmds is None:
279301
continue
280-
external_commands.execute_tool_commands(cmds,topology)
302+
303+
t_count += 1
304+
status = external_commands.execute_tool_commands(cmds,topology)
305+
if not is_dry_run() and not check_tool_status(tool,status,topology):
306+
log.error(f'Failed to start {tool}',module='tools',category=Warning,skip_header=True)
307+
continue
281308
msg = external_commands.get_tool_message(tool,topology)
282309
if not is_dry_run():
283-
print(f"... {tool} tool started")
310+
t_success += 1
311+
log.status_success()
312+
print(f"{tool} tool started")
284313

285314
if msg:
286315
print(("DRY_RUN: " if is_dry_run() else "") + msg + "\n")
287316

288-
lab_status_change(topology,f'external tools started')
289-
log.status_success()
290-
print("External tools started")
317+
lab_status_change(topology,f'{t_success}/{t_count} external tools started')
318+
if not is_dry_run():
319+
log.partial_success(t_success,t_count)
320+
print(f"{t_success}/{t_count} external tools started")
291321

292322
"""
293323
Main "lab start" process

0 commit comments

Comments
 (0)