Skip to content

Commit 807cdd4

Browse files
authored
Dev mode (#637)
* Refactoring for error messages and security fix for path echoing * Added dev_no_sleep, dev_no_metrics, dev_no_build * Added healthcheck * Normalization and extra error check * Tests; Improved test speed by 6x * Forcing no_build = False * Healthcheck now handles lists conformant and more strict on health status string * Test-Fix * Test-Fix
1 parent 6c59bd5 commit 807cdd4

File tree

12 files changed

+276
-92
lines changed

12 files changed

+276
-92
lines changed

lib/schema_checker.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ def check_usage_scenario(self, usage_scenario):
9999
Optional("environment"): self.single_or_list(Or(dict,str)),
100100
Optional("ports"): self.single_or_list(Or(str, int)),
101101
Optional("depends_on"): Or([str],dict),
102+
Optional("healthcheck"): {
103+
Optional('test'): Or(list, str),
104+
Optional('interval'): str,
105+
Optional('timeout'): str,
106+
Optional('retries'): int,
107+
Optional('start_period'): str,
108+
# Optional('start_interval'): str, docker CLI does not support this atm
109+
Optional('disable'): bool,
110+
},
102111
Optional("setup-commands"): [str],
103112
Optional("volumes"): self.single_or_list(str),
104113
Optional("folder-destination"):str,

runner.py

Lines changed: 97 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def __init__(self,
8989
name, uri, uri_type, filename='usage_scenario.yml', branch=None,
9090
debug_mode=False, allow_unsafe=False, no_file_cleanup=False, skip_system_checks=False,
9191
skip_unsafe=False, verbose_provider_boot=False, full_docker_prune=False,
92-
dry_run=False, dev_repeat_run=False, docker_prune=False, job_id=None):
92+
dev_no_sleeps=False, dev_no_build=False, dev_no_metrics=False, docker_prune=False, job_id=None):
9393

9494
if skip_unsafe is True and allow_unsafe is True:
9595
raise RuntimeError('Cannot specify both --skip-unsafe and --allow-unsafe')
@@ -104,8 +104,9 @@ def __init__(self,
104104
self._verbose_provider_boot = verbose_provider_boot
105105
self._full_docker_prune = full_docker_prune
106106
self._docker_prune = docker_prune
107-
self._dry_run = dry_run
108-
self._dev_repeat_run = dev_repeat_run
107+
self._dev_no_sleeps = dev_no_sleeps
108+
self._dev_no_build = dev_no_build
109+
self._dev_no_metrics = dev_no_metrics
109110
self._uri = uri
110111
self._uri_type = uri_type
111112
self._original_filename = filename
@@ -145,7 +146,7 @@ def __init__(self,
145146
# self.__filename = self._original_filename # this can be changed later if working directory changes
146147

147148
def custom_sleep(self, sleep_time):
148-
if not self._dry_run:
149+
if not self._dev_no_sleeps:
149150
print(TerminalColors.HEADER, '\nSleeping for : ', sleep_time, TerminalColors.ENDC)
150151
time.sleep(sleep_time)
151152

@@ -387,13 +388,13 @@ def check_running_containers(self):
387388
def populate_image_names(self):
388389
for service_name, service in self._usage_scenario.get('services', {}).items():
389390
if not service.get('image', None): # image is a non-mandatory field. But we need it, so we tmp it
390-
if self._dev_repeat_run:
391+
if self._dev_no_build:
391392
service['image'] = f"{service_name}"
392393
else:
393394
service['image'] = f"{service_name}_{random.randint(500000,10000000)}"
394395

395396
def remove_docker_images(self):
396-
if self._dev_repeat_run:
397+
if self._dev_no_build:
397398
return
398399

399400
print(TerminalColors.HEADER, '\nRemoving all temporary GMT images', TerminalColors.ENDC)
@@ -477,6 +478,10 @@ def update_and_insert_specs(self):
477478
)
478479

479480
def import_metric_providers(self):
481+
if self._dev_no_metrics:
482+
print(TerminalColors.HEADER, '\nSkipping import of metric providers', TerminalColors.ENDC)
483+
return
484+
480485
config = GlobalConfig().config
481486

482487
print(TerminalColors.HEADER, '\nImporting metric providers', TerminalColors.ENDC)
@@ -520,6 +525,10 @@ def import_metric_providers(self):
520525
self.__metric_providers.sort(key=lambda item: 'rapl' not in item.__class__.__name__.lower())
521526

522527
def download_dependencies(self):
528+
if self._dev_no_build:
529+
print(TerminalColors.HEADER, '\nSkipping downloading dependencies', TerminalColors.ENDC)
530+
return
531+
523532
print(TerminalColors.HEADER, '\nDownloading dependencies', TerminalColors.ENDC)
524533
subprocess.run(['docker', 'pull', 'gcr.io/kaniko-project/executor:latest'], check=True)
525534

@@ -567,6 +576,7 @@ def build_docker_images(self):
567576
encoding='UTF-8',
568577
check=True)
569578
# The image exists so exit and don't build
579+
print(f"Image {service['image']} exists in build cache. Skipping build ...")
570580
continue
571581
except subprocess.CalledProcessError:
572582
pass
@@ -656,10 +666,11 @@ def order_service_names(service_name, visited=None):
656666
raise RuntimeError(f"Cycle found in depends_on definition with service '{service_name}'!")
657667
visited.add(service_name)
658668

669+
if service_name not in services:
670+
raise RuntimeError(f"Dependent service '{service_name}' defined in 'depends_on' does not exist in usage_scenario!")
671+
659672
service = services[service_name]
660673
if 'depends_on' in service:
661-
if isinstance(service['depends_on'], dict):
662-
raise RuntimeError(f"Service definition of {service_name} uses the long form of 'depends_on', however, GMT only supports the short form!")
663674
for dep in service['depends_on']:
664675
if dep not in names_ordered:
665676
order_service_names(dep, visited)
@@ -834,6 +845,35 @@ def setup_services(self):
834845
if 'pause-after-phase' in service:
835846
self.__services_to_pause_phase[service['pause-after-phase']] = self.__services_to_pause_phase.get(service['pause-after-phase'], []) + [container_name]
836847

848+
if 'healthcheck' in service: # must come last
849+
if 'disable' in service['healthcheck'] and service['healthcheck']['disable'] is True:
850+
docker_run_string.append('--no-healthcheck')
851+
else:
852+
if 'test' in service['healthcheck']:
853+
docker_run_string.append('--health-cmd')
854+
health_string = service['healthcheck']['test']
855+
if isinstance(service['healthcheck']['test'], list):
856+
health_string_copy = service['healthcheck']['test'].copy()
857+
health_string_command = health_string_copy.pop(0)
858+
if health_string_command not in ['CMD', 'CMD-SHELL']:
859+
raise RuntimeError(f"Healthcheck starts with {health_string_command}. Please use 'CMD' or 'CMD-SHELL' when supplying as list. For disabling do not use 'NONE' but the disable argument.")
860+
health_string = ' '.join(health_string_copy)
861+
docker_run_string.append(health_string)
862+
if 'interval' in service['healthcheck']:
863+
docker_run_string.append('--health-interval')
864+
docker_run_string.append(service['healthcheck']['interval'])
865+
if 'timeout' in service['healthcheck']:
866+
docker_run_string.append('--health-timeout')
867+
docker_run_string.append(service['healthcheck']['timeout'])
868+
if 'retries' in service['healthcheck']:
869+
docker_run_string.append('--health-retries')
870+
docker_run_string.append(service['healthcheck']['retries'])
871+
if 'start_period' in service['healthcheck']:
872+
docker_run_string.append('--health-start-period')
873+
docker_run_string.append(service['healthcheck']['start_period'])
874+
if 'start_interval' in service['healthcheck']:
875+
raise RuntimeError('start_interval is not supported atm in healthcheck')
876+
837877
docker_run_string.append(self.clean_image_name(service['image']))
838878

839879
# Before starting the container, check if the dependent containers are "ready".
@@ -842,29 +882,55 @@ def setup_services(self):
842882
# In the future we want to implement an health check to know if dependent containers are actually ready.
843883
if 'depends_on' in service:
844884
for dependent_container in service['depends_on']:
885+
print(f"Waiting for dependent container {dependent_container}")
845886
time_waited = 0
846-
state = ""
887+
state = ''
888+
health = 'healthy' # default because some containers have no health
847889
max_waiting_time = config['measurement']['boot']['wait_time_dependencies']
848890
while time_waited < max_waiting_time:
849-
# TODO: Check health status instead if `healthcheck` is enabled (https://github.com/green-coding-berlin/green-metrics-tool/issues/423)
850-
# This waiting loop is actually a pre-work for the upcoming health check. For the check if the container is "running", as implemented here, the waiting loop is not needed.
851891
status_output = subprocess.check_output(
852892
["docker", "container", "inspect", "-f", "{{.State.Status}}", dependent_container],
853893
stderr=subprocess.STDOUT,
854-
encoding='utf-8'
894+
encoding='UTF-8',
855895
)
856896

857897
state = status_output.strip()
858-
859-
if state == "running":
898+
print(f"State of container '{dependent_container}': {state}")
899+
900+
if isinstance(service['depends_on'], dict) \
901+
and 'condition' in service['depends_on'][dependent_container]:
902+
903+
condition = service['depends_on'][dependent_container]['condition']
904+
if condition == 'service_healthy':
905+
ps = subprocess.run(
906+
["docker", "container", "inspect", "-f", "{{.State.Health.Status}}", dependent_container],
907+
check=False,
908+
stdout=subprocess.PIPE,
909+
stderr=subprocess.STDOUT, # put both in one stream
910+
encoding='UTF-8'
911+
)
912+
health = ps.stdout.strip()
913+
if ps.returncode != 0 or health == '<nil>':
914+
raise RuntimeError(f"Health check for dependent_container '{dependent_container}' was requested, but container has no healthcheck implemented! (Output was: {health})")
915+
if health == 'unhealthy':
916+
raise RuntimeError('ontainer healthcheck failed terminally with status "unhealthy")')
917+
print(f"Health of container '{dependent_container}': {health}")
918+
elif condition == 'service_started':
919+
pass
920+
else:
921+
raise RuntimeError(f"Unsupported condition in healthcheck for service '{service_name}': {condition}")
922+
923+
if state == 'running' and health == 'healthy':
860924
break
861925

862-
print(f"State of container '{dependent_container}': {state}. Waiting for 1 second")
863-
self.custom_sleep(1)
926+
print('Waiting for 1 second')
927+
time.sleep(1)
864928
time_waited += 1
865929

866-
if state != "running":
867-
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not running after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
930+
if state != 'running':
931+
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not running but {state} after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
932+
if health != 'healthy':
933+
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not healthy but '{health}' after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
868934

869935
if 'command' in service: # must come last
870936
for cmd in service['command'].split():
@@ -946,6 +1012,9 @@ def add_to_log(self, container_name, message, cmd=''):
9461012

9471013

9481014
def start_metric_providers(self, allow_container=True, allow_other=True):
1015+
if self._dev_no_metrics:
1016+
return
1017+
9491018
print(TerminalColors.HEADER, '\nStarting metric providers', TerminalColors.ENDC)
9501019

9511020
for metric_provider in self.__metric_providers:
@@ -1099,6 +1168,9 @@ def run_flows(self):
10991168

11001169
# this function should never be called twice to avoid double logging of metrics
11011170
def stop_metric_providers(self):
1171+
if self._dev_no_metrics:
1172+
return
1173+
11021174
print(TerminalColors.HEADER, 'Stopping metric providers and parsing measurements', TerminalColors.ENDC)
11031175
errors = []
11041176
for metric_provider in self.__metric_providers:
@@ -1454,8 +1526,9 @@ def run(self):
14541526
parser.add_argument('--verbose-provider-boot', action='store_true', help='Boot metric providers gradually')
14551527
parser.add_argument('--full-docker-prune', action='store_true', help='Stop and remove all containers, build caches, volumes and images on the system')
14561528
parser.add_argument('--docker-prune', action='store_true', help='Prune all unassociated build caches, networks volumes and stopped containers on the system')
1457-
parser.add_argument('--dry-run', action='store_true', help='Removes all sleeps. Resulting measurement data will be skewed.')
1458-
parser.add_argument('--dev-repeat-run', action='store_true', help='Checks if a docker image is already in the local cache and will then not build it. Also doesn\'t clear the images after a run')
1529+
parser.add_argument('--dev-no-metrics', action='store_true', help='Skips loading the metric providers. Runs will be faster, but you will have no metric')
1530+
parser.add_argument('--dev-no-sleeps', action='store_true', help='Removes all sleeps. Resulting measurement data will be skewed.')
1531+
parser.add_argument('--dev-no-build', action='store_true', help='Checks if a container images are already in the local cache and will then not build it. Also doesn\'t clear the images after a run. Please note that skipping builds only works the second time you make a run.')
14591532
parser.add_argument('--print-logs', action='store_true', help='Prints the container and process logs to stdout')
14601533

14611534
args = parser.parse_args()
@@ -1470,9 +1543,9 @@ def run(self):
14701543
error_helpers.log_error('--allow-unsafe and skip--unsafe in conjuction is not possible')
14711544
sys.exit(1)
14721545

1473-
if args.dev_repeat_run and (args.docker_prune or args.full_docker_prune):
1546+
if args.dev_no_build and (args.docker_prune or args.full_docker_prune):
14741547
parser.print_help()
1475-
error_helpers.log_error('--dev-repeat-run blocks pruning docker images. Combination is not allowed')
1548+
error_helpers.log_error('--dev-no-build blocks pruning docker images. Combination is not allowed')
14761549
sys.exit(1)
14771550

14781551
if args.full_docker_prune and GlobalConfig().config['postgresql']['host'] == 'green-coding-postgres-container':
@@ -1515,8 +1588,8 @@ def run(self):
15151588
branch=args.branch, debug_mode=args.debug, allow_unsafe=args.allow_unsafe,
15161589
no_file_cleanup=args.no_file_cleanup, skip_system_checks=args.skip_system_checks,
15171590
skip_unsafe=args.skip_unsafe,verbose_provider_boot=args.verbose_provider_boot,
1518-
full_docker_prune=args.full_docker_prune, dry_run=args.dry_run,
1519-
dev_repeat_run=args.dev_repeat_run, docker_prune=args.docker_prune)
1591+
full_docker_prune=args.full_docker_prune, dev_no_sleeps=args.dev_no_sleeps,
1592+
dev_no_build=args.dev_no_build, dev_no_metrics=args.dev_no_metrics, docker_prune=args.docker_prune)
15201593

15211594
# Using a very broad exception makes sense in this case as we have excepted all the specific ones before
15221595
#pylint: disable=broad-except
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
name: Test depends_on
3+
author: Arne Tarara <[email protected]>
4+
description: test
5+
6+
services:
7+
test-container-1:
8+
image: alpine
9+
depends_on:
10+
test-container-2:
11+
condition: service_completed_successfully
12+
test-container-2:
13+
image: alpine
14+
15+
flow:
16+
- name: dummy
17+
container: test-container-1
18+
commands:
19+
- type: console
20+
command: pwd

tests/data/usage_scenarios/depends_on_error_unsupported_long_form.yml renamed to tests/data/usage_scenarios/depends_on_long_form.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
name: Test depends_on
2+
name: Test depends_on long_form
33
author: David Kopp
44
description: test
55

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
name: Test depends_on
3+
author: David Kopp
4+
description: test
5+
6+
services:
7+
test-container-1:
8+
image: alpine
9+
depends_on:
10+
test-container-2:
11+
condition: service_healthy
12+
test-container-2:
13+
image: alpine
14+
healthcheck:
15+
test: ls
16+
interval: 1s
17+
18+
flow:
19+
- name: dummy
20+
container: test-container-1
21+
commands:
22+
- type: console
23+
command: pwd
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
name: Test depends_on
3+
author: David Kopp
4+
description: test
5+
6+
services:
7+
test-container-1:
8+
image: alpine
9+
depends_on:
10+
test-container-2:
11+
condition: service_healthy
12+
test-container-2:
13+
image: alpine
14+
15+
flow:
16+
- name: dummy
17+
container: test-container-1
18+
commands:
19+
- type: console
20+
command: pwd

tests/smoke_test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,11 @@ def setup_module(module):
3939
err = io.StringIO()
4040
GlobalConfig(config_name='test-config.yml').config
4141
with redirect_stdout(out), redirect_stderr(err):
42-
uri = os.path.abspath(os.path.join(
43-
CURRENT_DIR, 'stress-application/'))
42+
uri = os.path.abspath(os.path.join(CURRENT_DIR, 'stress-application/'))
4443
subprocess.run(['docker', 'compose', '-f', uri+'/compose.yml', 'build'], check=True)
4544

4645
# Run the application
47-
runner = Runner(name=RUN_NAME, uri=uri, uri_type='folder', dev_repeat_run=True, skip_system_checks=False)
46+
runner = Runner(name=RUN_NAME, uri=uri, uri_type='folder', dev_no_build=True, dev_no_sleeps=True, dev_no_metrics=False, skip_system_checks=False)
4847
runner.run()
4948

5049
#pylint: disable=global-statement

0 commit comments

Comments
 (0)