Use depends_on for container startup order (refactored) (#593)

davidkopp · web-flow · commit 36228cef71dc · 2023-12-20T11:45:06.000+01:00
* Use depends_on for container startup order

* Simplify test

* Improve logging, comments and variable names

* change subprocess.run to subprocess.check_output

* Remove waiting loop and raise error instead of warning

* simplify usage scenarios for tests

* Prevent cycle dependencies

* Add waiting loop again and make max waiting time configurable

* Add check if unsupported depends_on long form is used

* Change location of wait_time_dependencies in config

* Fix depends_on cycle detection

* Add depends_on to schema for check

* Refactor ordering of services
diff --git a/config.yml.example b/config.yml.example
@@ -44,6 +44,8 @@ measurement:
   idle-time-end: 5
   flow-process-runtime: 3800
   phase-transition-time: 1
+  boot:
+    wait_time_dependencies: 20
   metric-providers:
 
   # Please select the needed providers according to the working ones on your system
diff --git a/lib/schema_checker.py b/lib/schema_checker.py
@@ -98,6 +98,7 @@ def check_usage_scenario(self, usage_scenario):
                     Optional("networks"): self.single_or_list(Use(self.contains_no_invalid_chars)),
                     Optional("environment"): self.single_or_list(Or(dict,str)),
                     Optional("ports"): self.single_or_list(Or(str, int)),
+                    Optional("depends_on"): Or([str],dict),
                     Optional("setup-commands"): [str],
                     Optional("volumes"): self.single_or_list(str),
                     Optional("folder-destination"):str,
diff --git a/runner.py b/runner.py
@@ -21,6 +21,7 @@
 import random
 import shutil
 import yaml
+from collections import OrderedDict
 
 
 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -643,18 +644,50 @@ def setup_networks(self):
             self.__networks.append(network)
             self.__join_default_network = True
 
+    def order_services(self, services):
+        names_ordered = []
+        def order_service_names(service_name, visited=None):
+            if visited is None:
+                visited = set()
+            if service_name in visited:
+                raise RuntimeError(f"Cycle found in depends_on definition with service '{service_name}'!")
+            visited.add(service_name)
+
+            service = services[service_name]
+            if 'depends_on' in service:
+                if isinstance(service['depends_on'], dict):
+                    raise RuntimeError(f"Service definition of {service_name} uses the long form of 'depends_on', however, GMT only supports the short form!")
+                for dep in service['depends_on']:
+                    if dep not in names_ordered:
+                        order_service_names(dep, visited)
+
+            if service_name not in names_ordered:
+                names_ordered.append(service_name)
+
+        # Iterate over all services and sort them with the recursive function 'order_service_names'
+        for service_name in services.keys():
+            order_service_names(service_name)
+        print("Startup order: ", names_ordered)
+        return OrderedDict((key, services[key]) for key in names_ordered)
+
     def setup_services(self):
+        config = GlobalConfig().config
+        print(TerminalColors.HEADER, '\nSetting up services', TerminalColors.ENDC)
         # technically the usage_scenario needs no services and can also operate on an empty list
         # This use case is when you have running containers on your host and want to benchmark some code running in them
-        for service_name in self._usage_scenario.get('services', []):
-            print(TerminalColors.HEADER, '\nSetting up containers', TerminalColors.ENDC)
+        services = self._usage_scenario.get('services', {})
 
-            if 'container_name' in self._usage_scenario['services'][service_name]:
-                container_name = self._usage_scenario['services'][service_name]['container_name']
+        # Check if there are service dependencies defined with 'depends_on'.
+        # If so, change the order of the services accordingly.
+        services_ordered = self.order_services(services)
+        for service_name, service in services_ordered.items():
+
+            if 'container_name' in service:
+                container_name = service['container_name']
             else:
                 container_name = service_name
 
-            service = self._usage_scenario['services'][service_name]
+            print(TerminalColors.HEADER, '\nSetting up container: ', container_name, TerminalColors.ENDC)
 
             print('Resetting container')
             # By using the -f we return with 0 if no container is found
@@ -801,10 +834,38 @@ def setup_services(self):
             if 'cmd' in service:  # must come last
                 docker_run_string.append(service['cmd'])
 
+            # Before starting the container, check if the dependent containers are "ready". 
+            # If not, wait for some time. If the container is not ready after a certain time, throw an error.
+            # Currently we consider "ready" only as "running".
+            # In the future we want to implement an health check to know if dependent containers are actually ready.
+            if 'depends_on' in service:
+                for dependent_container in service['depends_on']:
+                    time_waited = 0
+                    state = ""
+                    max_waiting_time = config['measurement']['boot']['wait_time_dependencies']
+                    while time_waited < max_waiting_time:
+                        # TODO: Check health status instead if `healthcheck` is enabled (https://github.com/green-coding-berlin/green-metrics-tool/issues/423)
+                        # This waiting loop is actually a pre-work for the upcoming health check. For the check if the container is "running", as implemented here, the waiting loop is not needed.
+                        status_output = subprocess.check_output(
+                            ["docker", "container", "inspect", "-f", "{{.State.Status}}", dependent_container],
+                            stderr=subprocess.STDOUT,
+                            text=True
+                        )
+                        state = status_output.strip()
+                        if state == "running":
+                            break;
+                        else:
+                            print(f"State of container '{dependent_container}': {state}. Waiting for 1 second")
+                            self.custom_sleep(1)
+                            time_waited += 1
+
+                    if state != "running":
+                        raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not running after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
+
             print(f"Running docker run with: {' '.join(docker_run_string)}")
 
             # docker_run_string must stay as list, cause this forces items to be quoted and escaped and prevents
-            # injection of unwawnted params
+            # injection of unwanted params
 
             ps = subprocess.run(
                 docker_run_string,
diff --git a/tests/data/usage_scenarios/depends_on.yml b/tests/data/usage_scenarios/depends_on.yml
@@ -0,0 +1,28 @@
+---
+name: Test depends_on
+author: David Kopp
+description: test
+
+services:
+  test-container-1:
+    image: alpine
+    depends_on:
+      - test-container-2
+      - test-container-3
+  test-container-2:
+    image: alpine
+  test-container-3:
+    image: alpine
+    depends_on:
+      - test-container-4
+  test-container-4:
+    image: alpine
+    depends_on:
+      - test-container-2
+
+flow:
+  - name: dummy
+    container: test-container-1
+    commands:
+      - type: console
+        command: pwd
diff --git a/tests/data/usage_scenarios/depends_on_error_cycle.yml b/tests/data/usage_scenarios/depends_on_error_cycle.yml
@@ -0,0 +1,22 @@
+---
+name: Test depends_on
+author: David Kopp
+description: test
+
+services:
+  test-container-1:
+    image: alpine
+    depends_on:
+      - test-container-2
+  test-container-2:
+    image: alpine
+    depends_on:
+      - test-container-1
+
+flow:
+  - name: Stress
+    container: test-container-1
+    commands:
+      - type: console
+        command: stress-ng -c 1 -t 1 -q
+        note: Starting Stress
diff --git a/tests/data/usage_scenarios/depends_on_error_not_running.yml b/tests/data/usage_scenarios/depends_on_error_not_running.yml
@@ -0,0 +1,20 @@
+---
+name: Test depends_on
+author: David Kopp
+description: test
+
+services:
+  test-container-1:
+    image: alpine
+    depends_on:
+      - test-container-2
+  test-container-2:
+    image: hello-world # Container exists immediately after start
+
+flow:
+  - name: Stress
+    container: test-container-1
+    commands:
+      - type: console
+        command: stress-ng -c 1 -t 1 -q
+        note: Starting Stress
diff --git a/tests/data/usage_scenarios/depends_on_error_unsupported_long_form.yml b/tests/data/usage_scenarios/depends_on_error_unsupported_long_form.yml
@@ -0,0 +1,20 @@
+---
+name: Test depends_on
+author: David Kopp
+description: test
+
+services:
+  test-container-1:
+    image: alpine
+    depends_on:
+      test-container-2:
+        condition: service_started
+  test-container-2:
+    image: alpine
+
+flow:
+  - name: dummy
+    container: test-container-1
+    commands:
+      - type: console
+        command: pwd
diff --git a/tests/test_usage_scenario.py b/tests/test_usage_scenario.py
@@ -222,6 +222,56 @@ def get_contents_of_bound_volume(runner):
         Tests.cleanup(runner)
     return ls
 
+# depends_on: [array] (optional)
+# Array of container names to express dependencies
+def test_depends_on_order():
+    out = io.StringIO()
+    err = io.StringIO()
+    runner = Tests.setup_runner(usage_scenario='depends_on.yml', dry_run=True)
+
+    with redirect_stdout(out), redirect_stderr(err):
+        try:
+            Tests.run_until(runner, 'setup_services')
+        finally:
+            runner.cleanup()
+
+    # Expected order: test-container-2, test-container-4, test-container-3, test-container-1
+    assert_order(out.getvalue(), "test-container-2", "test-container-4")
+    assert_order(out.getvalue(), "test-container-4", "test-container-3")
+    assert_order(out.getvalue(), "test-container-3", "test-container-1")
+
+def assert_order(text, first, second):
+    index1 = text.find(first)
+    index2 = text.find(second)
+
+    assert index1 != -1 and index2 != -1, \
+        Tests.assertion_info(f"stdout contain the container names '{first}' and '{second}'.", \
+                             f"stdout doesn't contain '{first}' and/or '{second}'.")
+    
+    assert index1 < index2, Tests.assertion_info(f'{first} should start first, \
+                             because it is a dependency of {second}.', f'{second} started first')
+
+def test_depends_on_error_not_running():
+    runner = Tests.setup_runner(usage_scenario='depends_on_error_not_running.yml', dry_run=True)
+    with pytest.raises(RuntimeError) as e:
+        Tests.run_until(runner, 'setup_services')
+    assert "Dependent container 'test-container-2' of 'test-container-1' is not running" in str(e.value) , \
+        Tests.assertion_info('test-container-2 is not running', str(e.value))
+
+def test_depends_on_error_cyclic_dependency():
+    runner = Tests.setup_runner(usage_scenario='depends_on_error_cycle.yml', dry_run=True)
+    with pytest.raises(RuntimeError) as e:
+        Tests.run_until(runner, 'setup_services')
+    assert "Cycle found in depends_on definition with service 'test-container-1'" in str(e.value) , \
+        Tests.assertion_info('cycle in depends_on with test-container-1', str(e.value))
+
+def test_depends_on_error_unsupported_long_form():
+    runner = Tests.setup_runner(usage_scenario='depends_on_error_unsupported_long_form.yml', dry_run=True)
+    with pytest.raises(RuntimeError) as e:
+        Tests.run_until(runner, 'setup_services')
+    assert "long form" in str(e.value) , \
+        Tests.assertion_info('long form is not supported', str(e.value))
+
 #volumes: [array] (optional)
 #Array of volumes to be mapped. Only read of runner.py is executed with --allow-unsafe flag
 def test_volume_bindings_allow_unsafe_true():