Skip to content

Commit dab1010

Browse files
authored
refactor(package): Standardize how configuration and credentials are passed to containers (resolves #1149). (#1152)
1 parent 76db060 commit dab1010

File tree

19 files changed

+379
-186
lines changed

19 files changed

+379
-186
lines changed

components/clp-package-utils/clp_package_utils/general.py

Lines changed: 122 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
import typing
1010
import uuid
1111
from enum import auto
12-
from typing import List, Optional, Tuple
12+
from typing import Dict, List, Optional, Tuple
1313

1414
import yaml
1515
from clp_py_utils.clp_config import (
1616
CLP_DEFAULT_CREDENTIALS_FILE_PATH,
17+
CLP_SHARED_CONFIG_FILENAME,
1718
CLPConfig,
1819
DB_COMPONENT_NAME,
1920
QueryEngine,
@@ -95,6 +96,7 @@ def __init__(self, clp_home: pathlib.Path, docker_clp_home: pathlib.Path):
9596
self.archives_output_dir: typing.Optional[DockerMount] = None
9697
self.stream_output_dir: typing.Optional[DockerMount] = None
9798
self.aws_config_dir: typing.Optional[DockerMount] = None
99+
self.generated_config_file: typing.Optional[DockerMount] = None
98100

99101

100102
def _validate_data_directory(data_dir: pathlib.Path, component_name: str) -> None:
@@ -285,6 +287,18 @@ def generate_container_config(
285287
container_clp_config.stream_output.get_directory(),
286288
)
287289

290+
if not is_path_already_mounted(
291+
clp_home,
292+
CONTAINER_CLP_HOME,
293+
clp_config.get_shared_config_file_path(),
294+
container_clp_config.get_shared_config_file_path(),
295+
):
296+
docker_mounts.generated_config_file = DockerMount(
297+
DockerMountType.BIND,
298+
clp_config.get_shared_config_file_path(),
299+
container_clp_config.get_shared_config_file_path(),
300+
)
301+
288302
# Only create the mount if the directory exists
289303
if clp_config.aws_config_directory is not None:
290304
container_clp_config.aws_config_directory = CONTAINER_AWS_CONFIG_DIRECTORY
@@ -308,33 +322,57 @@ def generate_worker_config(clp_config: CLPConfig) -> WorkerConfig:
308322
return worker_config
309323

310324

325+
def get_container_config_filename(container_name: str) -> str:
326+
return f".{container_name}-config.yml"
327+
328+
311329
def dump_container_config(
312-
container_clp_config: CLPConfig, clp_config: CLPConfig, container_name: str
313-
) -> Tuple[pathlib.Path, pathlib.Path]:
330+
container_clp_config: CLPConfig, clp_config: CLPConfig, config_filename: str
331+
):
314332
"""
315-
Writes the given config to the logs directory so that it's accessible in the container.
333+
Writes the given container config to the logs directory, so that it's accessible in the
334+
container.
335+
316336
:param container_clp_config: The config to write.
317337
:param clp_config: The corresponding config on the host (used to determine the logs directory).
318-
:param container_name:
338+
:param config_filename:
319339
:return: The path to the config file in the container and on the host.
320340
"""
321-
container_config_filename = f".{container_name}-config.yml"
322-
config_file_path_on_host = clp_config.logs_directory / container_config_filename
323-
config_file_path_on_container = container_clp_config.logs_directory / container_config_filename
341+
config_file_path_on_host = clp_config.logs_directory / config_filename
342+
config_file_path_on_container = container_clp_config.logs_directory / config_filename
324343
with open(config_file_path_on_host, "w") as f:
325344
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
326345

327346
return config_file_path_on_container, config_file_path_on_host
328347

329348

349+
def dump_shared_container_config(
350+
container_clp_config: CLPConfig, clp_config: CLPConfig
351+
) -> Tuple[pathlib.Path, pathlib.Path]:
352+
"""
353+
Dumps the given container config to `CLP_SHARED_CONFIG_FILENAME` in the logs directory, so that
354+
it's accessible in the container.
355+
356+
:param container_clp_config:
357+
:param clp_config:
358+
"""
359+
return dump_container_config(container_clp_config, clp_config, CLP_SHARED_CONFIG_FILENAME)
360+
361+
330362
def generate_container_start_cmd(
331-
container_name: str, container_mounts: List[Optional[DockerMount]], container_image: str
363+
container_name: str,
364+
container_mounts: List[Optional[DockerMount]],
365+
container_image: str,
366+
extra_env_vars: Optional[Dict[str, str]] = None,
332367
) -> List[str]:
333368
"""
334-
Generates the command to start a container with the given mounts and name.
369+
Generates the command to start a container with the given mounts, environment variables, and
370+
name.
371+
335372
:param container_name:
336373
:param container_mounts:
337374
:param container_image:
375+
:param extra_env_vars: Environment variables to set on top of the predefined ones.
338376
:return: The command.
339377
"""
340378
clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
@@ -350,6 +388,12 @@ def generate_container_start_cmd(
350388
"--name", container_name,
351389
"--log-driver", "local"
352390
]
391+
env_vars = {
392+
"PYTHONPATH": clp_site_packages_dir,
393+
**(extra_env_vars if extra_env_vars is not None else {}),
394+
}
395+
for key, value in env_vars.items():
396+
container_start_cmd.extend(["-e", f"{key}={value}"])
353397
for mount in container_mounts:
354398
if mount:
355399
container_start_cmd.append("--mount")
@@ -428,21 +472,21 @@ def validate_and_load_db_credentials_file(
428472
clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool
429473
):
430474
validate_credentials_file_path(clp_config, clp_home, generate_default_file)
431-
clp_config.load_database_credentials_from_file()
475+
clp_config.database.load_credentials_from_file(clp_config.credentials_file_path)
432476

433477

434478
def validate_and_load_queue_credentials_file(
435479
clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool
436480
):
437481
validate_credentials_file_path(clp_config, clp_home, generate_default_file)
438-
clp_config.load_queue_credentials_from_file()
482+
clp_config.queue.load_credentials_from_file(clp_config.credentials_file_path)
439483

440484

441485
def validate_and_load_redis_credentials_file(
442486
clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool
443487
):
444488
validate_credentials_file_path(clp_config, clp_home, generate_default_file)
445-
clp_config.load_redis_credentials_from_file()
489+
clp_config.redis.load_credentials_from_file(clp_config.credentials_file_path)
446490

447491

448492
def validate_db_config(clp_config: CLPConfig, data_dir: pathlib.Path, logs_dir: pathlib.Path):
@@ -599,3 +643,68 @@ def is_retention_period_configured(clp_config: CLPConfig) -> bool:
599643
return True
600644

601645
return False
646+
647+
648+
def get_common_env_vars_list(
649+
include_clp_home_env_var=True,
650+
) -> List[str]:
651+
"""
652+
:param include_clp_home_env_var:
653+
:return: A list of common environment variables for Docker containers, in the format
654+
"KEY=VALUE".
655+
"""
656+
clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
657+
env_vars = [f"PYTHONPATH={clp_site_packages_dir}"]
658+
659+
if include_clp_home_env_var:
660+
env_vars.append(f"CLP_HOME={CONTAINER_CLP_HOME}")
661+
662+
return env_vars
663+
664+
665+
def get_credential_env_vars_list(
666+
container_clp_config: CLPConfig,
667+
include_db_credentials=False,
668+
include_queue_credentials=False,
669+
include_redis_credentials=False,
670+
) -> List[str]:
671+
"""
672+
:param container_clp_config:
673+
:param include_db_credentials:
674+
:param include_queue_credentials:
675+
:param include_redis_credentials:
676+
:return: A list of credential environment variables for Docker containers, in the format
677+
"KEY=VALUE".
678+
"""
679+
env_vars = []
680+
681+
if include_db_credentials:
682+
env_vars.append(f"CLP_DB_USER={container_clp_config.database.username}")
683+
env_vars.append(f"CLP_DB_PASS={container_clp_config.database.password}")
684+
685+
if include_queue_credentials:
686+
env_vars.append(f"CLP_QUEUE_USER={container_clp_config.queue.username}")
687+
env_vars.append(f"CLP_QUEUE_PASS={container_clp_config.queue.password}")
688+
689+
if include_redis_credentials:
690+
env_vars.append(f"CLP_REDIS_PASS={container_clp_config.redis.password}")
691+
692+
return env_vars
693+
694+
695+
def get_celery_connection_env_vars_list(container_clp_config: CLPConfig) -> List[str]:
696+
"""
697+
:param container_clp_config:
698+
:return: A list of Celery connection environment variables for Docker containers, in the format
699+
"KEY=VALUE".
700+
"""
701+
env_vars = [
702+
f"BROKER_URL=amqp://"
703+
f"{container_clp_config.queue.username}:{container_clp_config.queue.password}@"
704+
f"{container_clp_config.queue.host}:{container_clp_config.queue.port}",
705+
f"RESULT_BACKEND=redis://default:{container_clp_config.redis.password}@"
706+
f"{container_clp_config.redis.host}:{container_clp_config.redis.port}/"
707+
f"{container_clp_config.redis.query_backend_database}",
708+
]
709+
710+
return env_vars

components/clp-package-utils/clp_package_utils/scripts/archive_manager.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from typing import Final, List, Optional
77

88
from clp_py_utils.clp_config import (
9+
CLP_DB_PASS_ENV_VAR_NAME,
10+
CLP_DB_USER_ENV_VAR_NAME,
911
CLP_DEFAULT_DATASET_NAME,
1012
StorageEngine,
1113
StorageType,
@@ -20,6 +22,7 @@
2022
generate_container_name,
2123
generate_container_start_cmd,
2224
get_clp_home,
25+
get_container_config_filename,
2326
load_config_file,
2427
validate_and_load_db_credentials_file,
2528
validate_dataset_name,
@@ -212,16 +215,20 @@ def main(argv: List[str]) -> int:
212215

213216
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
214217
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
215-
container_clp_config, clp_config, container_name
218+
container_clp_config, clp_config, get_container_config_filename(container_name)
216219
)
217220

218221
necessary_mounts: List[Optional[DockerMount]] = [
219222
mounts.clp_home,
220223
mounts.logs_dir,
221224
mounts.archives_output_dir,
222225
]
226+
extra_env_vars = {
227+
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
228+
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
229+
}
223230
container_start_cmd: List[str] = generate_container_start_cmd(
224-
container_name, necessary_mounts, clp_config.execution_container
231+
container_name, necessary_mounts, clp_config.execution_container, extra_env_vars
225232
)
226233

227234
# fmt: off

components/clp-package-utils/clp_package_utils/scripts/compress.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from typing import List, Optional
88

99
from clp_py_utils.clp_config import (
10+
CLP_DB_PASS_ENV_VAR_NAME,
11+
CLP_DB_USER_ENV_VAR_NAME,
1012
CLP_DEFAULT_DATASET_NAME,
1113
StorageEngine,
1214
)
@@ -20,6 +22,7 @@
2022
generate_container_name,
2123
generate_container_start_cmd,
2224
get_clp_home,
25+
get_container_config_filename,
2326
JobType,
2427
load_config_file,
2528
validate_and_load_db_credentials_file,
@@ -202,7 +205,7 @@ def main(argv):
202205

203206
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
204207
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
205-
container_clp_config, clp_config, container_name
208+
container_clp_config, clp_config, get_container_config_filename(container_name)
206209
)
207210

208211
necessary_mounts = [mounts.clp_home, mounts.data_dir, mounts.logs_dir]
@@ -222,8 +225,12 @@ def main(argv):
222225

223226
_generate_logs_list(clp_config.logs_input.type, container_logs_list_path, parsed_args)
224227

228+
extra_env_vars = {
229+
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
230+
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
231+
}
225232
container_start_cmd = generate_container_start_cmd(
226-
container_name, necessary_mounts, clp_config.execution_container
233+
container_name, necessary_mounts, clp_config.execution_container, extra_env_vars
227234
)
228235
compress_cmd = _generate_compress_cmd(
229236
parsed_args, dataset, generated_config_path_on_container, logs_list_path_on_container

components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
generate_container_name,
2020
generate_container_start_cmd,
2121
get_clp_home,
22+
get_container_config_filename,
2223
load_config_file,
2324
validate_and_load_db_credentials_file,
2425
validate_dataset_name,
@@ -124,7 +125,7 @@ def main(argv: List[str]) -> int:
124125

125126
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
126127
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
127-
container_clp_config, clp_config, container_name
128+
container_clp_config, clp_config, get_container_config_filename(container_name)
128129
)
129130

130131
necessary_mounts = [

components/clp-package-utils/clp_package_utils/scripts/decompress.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from typing import Optional
77

88
from clp_py_utils.clp_config import (
9+
CLP_DB_PASS_ENV_VAR_NAME,
10+
CLP_DB_USER_ENV_VAR_NAME,
911
CLP_DEFAULT_DATASET_NAME,
1012
CLPConfig,
1113
StorageEngine,
@@ -24,6 +26,7 @@
2426
generate_container_name,
2527
generate_container_start_cmd,
2628
get_clp_home,
29+
get_container_config_filename,
2730
JobType,
2831
load_config_file,
2932
validate_and_load_db_credentials_file,
@@ -99,7 +102,7 @@ def handle_extract_file_cmd(
99102
container_name = generate_container_name(str(JobType.FILE_EXTRACTION))
100103
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
101104
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
102-
container_clp_config, clp_config, container_name
105+
container_clp_config, clp_config, get_container_config_filename(container_name)
103106
)
104107

105108
# Set up mounts
@@ -122,8 +125,13 @@ def handle_extract_file_cmd(
122125
container_paths_to_extract_file_path,
123126
)
124127
)
128+
129+
extra_env_vars = {
130+
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
131+
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
132+
}
125133
container_start_cmd = generate_container_start_cmd(
126-
container_name, necessary_mounts, clp_config.execution_container
134+
container_name, necessary_mounts, clp_config.execution_container, extra_env_vars
127135
)
128136

129137
# fmt: off
@@ -191,11 +199,15 @@ def handle_extract_stream_cmd(
191199
container_name = generate_container_name(str(JobType.IR_EXTRACTION))
192200
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
193201
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
194-
container_clp_config, clp_config, container_name
202+
container_clp_config, clp_config, get_container_config_filename(container_name)
195203
)
196204
necessary_mounts = [mounts.clp_home, mounts.logs_dir]
205+
extra_env_vars = {
206+
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
207+
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
208+
}
197209
container_start_cmd = generate_container_start_cmd(
198-
container_name, necessary_mounts, clp_config.execution_container
210+
container_name, necessary_mounts, clp_config.execution_container, extra_env_vars
199211
)
200212

201213
# fmt: off

components/clp-package-utils/clp_package_utils/scripts/native/archive_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ def main(argv: List[str]) -> int:
194194
config_file_path, default_config_file_path, clp_home
195195
)
196196
clp_config.validate_logs_dir()
197+
clp_config.database.load_credentials_from_env()
197198
except:
198199
logger.exception("Failed to load config.")
199200
return -1

components/clp-package-utils/clp_package_utils/scripts/native/compress.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def main(argv):
226226
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
227227
clp_config.validate_logs_input_config()
228228
clp_config.validate_logs_dir()
229+
clp_config.database.load_credentials_from_env()
229230
except:
230231
logger.exception("Failed to load config.")
231232
return -1

0 commit comments

Comments
 (0)