Skip to content

Commit 9ff731b

Browse files
feat(deployment)!: Migrate package orchestration to Docker Compose (resolves #1177); Temporarily remove support for multi-node deployments. (#1178)
Co-authored-by: kirkrodrigues <[email protected]>
1 parent 237ba42 commit 9ff731b

File tree

23 files changed

+1891
-1783
lines changed

23 files changed

+1891
-1783
lines changed

components/clp-package-utils/clp_package_utils/controller.py

Lines changed: 813 additions & 0 deletions
Large diffs are not rendered by default.

components/clp-package-utils/clp_package_utils/general.py

Lines changed: 149 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import enum
22
import errno
3+
import json
34
import os
45
import pathlib
56
import re
@@ -15,6 +16,9 @@
1516
CLP_DEFAULT_CREDENTIALS_FILE_PATH,
1617
CLP_SHARED_CONFIG_FILENAME,
1718
CLPConfig,
19+
CONTAINER_AWS_CONFIG_DIRECTORY,
20+
CONTAINER_CLP_HOME,
21+
CONTAINER_INPUT_LOGS_ROOT_DIR,
1822
DB_COMPONENT_NAME,
1923
QueryEngine,
2024
QUEUE_COMPONENT_NAME,
@@ -42,15 +46,50 @@
4246
EXTRACT_IR_CMD = "i"
4347
EXTRACT_JSON_CMD = "j"
4448

45-
# Paths
46-
CONTAINER_AWS_CONFIG_DIRECTORY = pathlib.Path("/") / ".aws"
47-
CONTAINER_CLP_HOME = pathlib.Path("/") / "opt" / "clp"
48-
CONTAINER_INPUT_LOGS_ROOT_DIR = pathlib.Path("/") / "mnt" / "logs"
49-
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH = pathlib.Path("etc") / "clp-config.yml"
50-
5149
DOCKER_MOUNT_TYPE_STRINGS = ["bind"]
5250

5351

52+
class DockerDependencyError(OSError):
53+
"""Base class for errors related to Docker dependencies."""
54+
55+
56+
class DockerNotAvailableError(DockerDependencyError):
57+
"""Raised when Docker or Docker Compose is unavailable."""
58+
59+
def __init__(self, base_message: str, process_error: subprocess.CalledProcessError) -> None:
60+
message = base_message
61+
output_chunks: list[str] = []
62+
for stream in (process_error.stdout, process_error.stderr):
63+
if stream is None:
64+
continue
65+
if isinstance(stream, bytes):
66+
text = stream.decode(errors="replace")
67+
else:
68+
text = str(stream)
69+
text = text.strip()
70+
if text:
71+
output_chunks.append(text)
72+
if len(output_chunks) > 0:
73+
message = "\n".join([base_message, *output_chunks])
74+
super().__init__(errno.ENOENT, message)
75+
76+
77+
class DockerComposeProjectNotRunningError(DockerDependencyError):
78+
"""Raised when a Docker Compose project is not running but should be."""
79+
80+
def __init__(self, project_name: str) -> None:
81+
super().__init__(errno.ESRCH, f"Docker Compose project '{project_name}' is not running.")
82+
83+
84+
class DockerComposeProjectAlreadyRunningError(DockerDependencyError):
85+
"""Raised when a Docker Compose project is already running but should not be."""
86+
87+
def __init__(self, project_name: str) -> None:
88+
super().__init__(
89+
errno.EEXIST, f"Docker Compose project '{project_name}' is already running."
90+
)
91+
92+
5493
class DockerMountType(enum.IntEnum):
5594
BIND = 0
5695

@@ -98,13 +137,6 @@ def __init__(self, clp_home: pathlib.Path, docker_clp_home: pathlib.Path):
98137
self.generated_config_file: Optional[DockerMount] = None
99138

100139

101-
def _validate_data_directory(data_dir: pathlib.Path, component_name: str) -> None:
102-
try:
103-
validate_path_could_be_dir(data_dir)
104-
except ValueError as ex:
105-
raise ValueError(f"{component_name} data directory is invalid: {ex}")
106-
107-
108140
def get_clp_home():
109141
# Determine CLP_HOME from an environment variable or this script's path
110142
clp_home = None
@@ -132,63 +164,30 @@ def generate_container_name(job_type: str) -> str:
132164
return f"clp-{job_type}-{str(uuid.uuid4())[-4:]}"
133165

134166

135-
def check_dependencies():
167+
def check_docker_dependencies(should_compose_project_be_running: bool, project_name: str):
168+
"""
169+
Checks if Docker and Docker Compose are installed, and whether a Docker Compose project is
170+
running.
171+
172+
:param should_compose_project_be_running:
173+
:param project_name: The Docker Compose project name to check.
174+
:raise DockerNotAvailableError: If any Docker dependency is not installed.
175+
:raise DockerComposeProjectNotRunningError: If the project isn't running when it should be.
176+
:raise DockerComposeProjectAlreadyRunningError: If the project is running when it shouldn't be.
177+
"""
136178
try:
137-
subprocess.run(
138-
"command -v docker",
139-
shell=True,
140-
stdout=subprocess.PIPE,
179+
subprocess.check_output(
180+
["docker", "--version"],
141181
stderr=subprocess.STDOUT,
142-
check=True,
143-
)
144-
except subprocess.CalledProcessError:
145-
raise EnvironmentError("docker is not installed or available on the path")
146-
try:
147-
subprocess.run(
148-
["docker", "ps"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True
149182
)
150-
except subprocess.CalledProcessError:
151-
raise EnvironmentError("docker cannot run without superuser privileges (sudo).")
183+
except subprocess.CalledProcessError as e:
184+
raise DockerNotAvailableError("docker is not installed or available on the path", e) from e
152185

153-
154-
def is_container_running(container_name):
155-
# fmt: off
156-
cmd = [
157-
"docker", "ps",
158-
# Only return container IDs
159-
"--quiet",
160-
"--filter", f"name={container_name}"
161-
]
162-
# fmt: on
163-
proc = subprocess.run(cmd, stdout=subprocess.PIPE)
164-
if proc.stdout.decode("utf-8"):
165-
return True
166-
167-
return False
168-
169-
170-
def is_container_exited(container_name):
171-
# fmt: off
172-
cmd = [
173-
"docker", "ps",
174-
# Only return container IDs
175-
"--quiet",
176-
"--filter", f"name={container_name}",
177-
"--filter", "status=exited"
178-
]
179-
# fmt: on
180-
proc = subprocess.run(cmd, stdout=subprocess.PIPE)
181-
if proc.stdout.decode("utf-8"):
182-
return True
183-
184-
return False
185-
186-
187-
def validate_log_directory(logs_dir: pathlib.Path, component_name: str) -> None:
188-
try:
189-
validate_path_could_be_dir(logs_dir)
190-
except ValueError as ex:
191-
raise ValueError(f"{component_name} logs directory is invalid: {ex}")
186+
is_running = _is_docker_compose_project_running(project_name)
187+
if should_compose_project_be_running and not is_running:
188+
raise DockerComposeProjectNotRunningError(project_name)
189+
if not should_compose_project_be_running and is_running:
190+
raise DockerComposeProjectAlreadyRunningError(project_name)
192191

193192

194193
def validate_port(port_name: str, hostname: str, port: int):
@@ -309,6 +308,19 @@ def generate_container_config(
309308
return container_clp_config, docker_mounts
310309

311310

311+
def generate_docker_compose_container_config(clp_config: CLPConfig) -> CLPConfig:
312+
"""
313+
Copies the given config and transforms mount paths and hosts for Docker Compose.
314+
315+
:param clp_config:
316+
:return: The container config.
317+
"""
318+
container_clp_config = clp_config.model_copy(deep=True)
319+
container_clp_config.transform_for_container()
320+
321+
return container_clp_config
322+
323+
312324
def generate_worker_config(clp_config: CLPConfig) -> WorkerConfig:
313325
worker_config = WorkerConfig()
314326
worker_config.package = clp_config.package.model_copy(deep=True)
@@ -345,17 +357,15 @@ def dump_container_config(
345357
return config_file_path_on_container, config_file_path_on_host
346358

347359

348-
def dump_shared_container_config(
349-
container_clp_config: CLPConfig, clp_config: CLPConfig
350-
) -> Tuple[pathlib.Path, pathlib.Path]:
360+
def dump_shared_container_config(container_clp_config: CLPConfig, clp_config: CLPConfig):
351361
"""
352362
Dumps the given container config to `CLP_SHARED_CONFIG_FILENAME` in the logs directory, so that
353363
it's accessible in the container.
354364
355365
:param container_clp_config:
356366
:param clp_config:
357367
"""
358-
return dump_container_config(container_clp_config, clp_config, CLP_SHARED_CONFIG_FILENAME)
368+
dump_container_config(container_clp_config, clp_config, CLP_SHARED_CONFIG_FILENAME)
359369

360370

361371
def generate_container_start_cmd(
@@ -431,11 +441,6 @@ def load_config_file(
431441
validate_path_for_container_mount(clp_config.data_directory)
432442
validate_path_for_container_mount(clp_config.logs_directory)
433443

434-
# Make data and logs directories node-specific
435-
hostname = socket.gethostname()
436-
clp_config.data_directory /= hostname
437-
clp_config.logs_directory /= hostname
438-
439444
return clp_config
440445

441446

@@ -488,35 +493,44 @@ def validate_and_load_redis_credentials_file(
488493
clp_config.redis.load_credentials_from_file(clp_config.credentials_file_path)
489494

490495

491-
def validate_db_config(clp_config: CLPConfig, data_dir: pathlib.Path, logs_dir: pathlib.Path):
496+
def validate_db_config(
497+
clp_config: CLPConfig,
498+
component_config: pathlib.Path,
499+
data_dir: pathlib.Path,
500+
logs_dir: pathlib.Path,
501+
):
502+
if not component_config.exists():
503+
raise ValueError(f"{DB_COMPONENT_NAME} configuration file missing: '{component_config}'.")
492504
_validate_data_directory(data_dir, DB_COMPONENT_NAME)
493-
validate_log_directory(logs_dir, DB_COMPONENT_NAME)
505+
_validate_log_directory(logs_dir, DB_COMPONENT_NAME)
494506

495507
validate_port(f"{DB_COMPONENT_NAME}.port", clp_config.database.host, clp_config.database.port)
496508

497509

498510
def validate_queue_config(clp_config: CLPConfig, logs_dir: pathlib.Path):
499-
validate_log_directory(logs_dir, QUEUE_COMPONENT_NAME)
511+
_validate_log_directory(logs_dir, QUEUE_COMPONENT_NAME)
500512

501513
validate_port(f"{QUEUE_COMPONENT_NAME}.port", clp_config.queue.host, clp_config.queue.port)
502514

503515

504516
def validate_redis_config(
505-
clp_config: CLPConfig, data_dir: pathlib.Path, logs_dir: pathlib.Path, base_config: pathlib.Path
517+
clp_config: CLPConfig,
518+
component_config: pathlib.Path,
519+
data_dir: pathlib.Path,
520+
logs_dir: pathlib.Path,
506521
):
507-
_validate_data_directory(data_dir, REDIS_COMPONENT_NAME)
508-
validate_log_directory(logs_dir, REDIS_COMPONENT_NAME)
509-
510-
if not base_config.exists():
522+
if not component_config.exists():
511523
raise ValueError(
512-
f"{REDIS_COMPONENT_NAME} base configuration at {str(base_config)} is missing."
524+
f"{REDIS_COMPONENT_NAME} configuration file missing: '{component_config}'."
513525
)
526+
_validate_data_directory(data_dir, REDIS_COMPONENT_NAME)
527+
_validate_log_directory(logs_dir, REDIS_COMPONENT_NAME)
514528

515529
validate_port(f"{REDIS_COMPONENT_NAME}.port", clp_config.redis.host, clp_config.redis.port)
516530

517531

518532
def validate_reducer_config(clp_config: CLPConfig, logs_dir: pathlib.Path, num_workers: int):
519-
validate_log_directory(logs_dir, REDUCER_COMPONENT_NAME)
533+
_validate_log_directory(logs_dir, REDUCER_COMPONENT_NAME)
520534

521535
for i in range(0, num_workers):
522536
validate_port(
@@ -527,10 +541,17 @@ def validate_reducer_config(clp_config: CLPConfig, logs_dir: pathlib.Path, num_w
527541

528542

529543
def validate_results_cache_config(
530-
clp_config: CLPConfig, data_dir: pathlib.Path, logs_dir: pathlib.Path
544+
clp_config: CLPConfig,
545+
component_config: pathlib.Path,
546+
data_dir: pathlib.Path,
547+
logs_dir: pathlib.Path,
531548
):
549+
if not component_config.exists():
550+
raise ValueError(
551+
f"{RESULTS_CACHE_COMPONENT_NAME} configuration file missing: '{component_config}'."
552+
)
532553
_validate_data_directory(data_dir, RESULTS_CACHE_COMPONENT_NAME)
533-
validate_log_directory(logs_dir, RESULTS_CACHE_COMPONENT_NAME)
554+
_validate_log_directory(logs_dir, RESULTS_CACHE_COMPONENT_NAME)
534555

535556
validate_port(
536557
f"{RESULTS_CACHE_COMPONENT_NAME}.port",
@@ -707,3 +728,44 @@ def get_celery_connection_env_vars_list(container_clp_config: CLPConfig) -> List
707728
]
708729

709730
return env_vars
731+
732+
733+
def _is_docker_compose_project_running(project_name: str) -> bool:
734+
"""
735+
Checks if a Docker Compose project is running.
736+
737+
:param project_name:
738+
:return: Whether at least one instance is running.
739+
:raise DockerNotAvailableError: If Docker Compose is not installed or fails. The error message
740+
includes the Docker command's output when available.
741+
"""
742+
cmd = ["docker", "compose", "ls", "--format", "json", "--filter", f"name={project_name}"]
743+
try:
744+
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
745+
running_instances = json.loads(output)
746+
return len(running_instances) >= 1
747+
except subprocess.CalledProcessError as e:
748+
raise DockerNotAvailableError(
749+
"Docker Compose is not installed or not functioning properly.", e
750+
) from e
751+
752+
753+
def _validate_data_directory(data_dir: pathlib.Path, component_name: str) -> None:
754+
try:
755+
validate_path_could_be_dir(data_dir)
756+
except ValueError as ex:
757+
raise ValueError(f"{component_name} data directory is invalid: {ex}")
758+
759+
760+
def _validate_log_directory(logs_dir: pathlib.Path, component_name: str):
761+
"""
762+
Validates that the logs directory path for a component is valid.
763+
764+
:param logs_dir:
765+
:param component_name:
766+
:raise ValueError: If the path is invalid or can't be a directory.
767+
"""
768+
try:
769+
validate_path_could_be_dir(logs_dir)
770+
except ValueError as ex:
771+
raise ValueError(f"{component_name} logs directory is invalid: {ex}")

components/clp-package-utils/clp_package_utils/scripts/archive_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from clp_py_utils.clp_config import (
1010
CLP_DB_PASS_ENV_VAR_NAME,
1111
CLP_DB_USER_ENV_VAR_NAME,
12+
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
1213
CLP_DEFAULT_DATASET_NAME,
1314
StorageEngine,
1415
StorageType,
1516
)
1617

1718
from clp_package_utils.general import (
18-
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
1919
CLPConfig,
2020
DockerMount,
2121
dump_container_config,

components/clp-package-utils/clp_package_utils/scripts/compress.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
from clp_py_utils.clp_config import (
1111
CLP_DB_PASS_ENV_VAR_NAME,
1212
CLP_DB_USER_ENV_VAR_NAME,
13+
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
1314
CLP_DEFAULT_DATASET_NAME,
1415
StorageEngine,
1516
)
1617
from job_orchestration.scheduler.job_config import InputType
1718

1819
from clp_package_utils.general import (
19-
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
2020
CONTAINER_INPUT_LOGS_ROOT_DIR,
2121
dump_container_config,
2222
generate_container_config,

components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
ARCHIVE_MANAGER_ACTION_NAME,
1111
CLP_DB_PASS_ENV_VAR_NAME,
1212
CLP_DB_USER_ENV_VAR_NAME,
13+
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
1314
StorageEngine,
1415
StorageType,
1516
)
1617
from clp_py_utils.s3_utils import generate_container_auth_options
1718

1819
from clp_package_utils.general import (
19-
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
2020
dump_container_config,
2121
generate_container_config,
2222
generate_container_name,

0 commit comments

Comments
 (0)