Skip to content

Commit d4168d7

Browse files
authored
feat(clp-package)!: Containerize the scripts in sbin via Docker Compose; Remove redundant assets from the package (resolves #1358). (#1512)
1 parent f798f1f commit d4168d7

File tree

28 files changed

+522
-234
lines changed

28 files changed

+522
-234
lines changed

.github/workflows/clp-artifact-build.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ jobs:
538538
shell: "bash"
539539
run: "chown $(id -u):$(id -g) -R ."
540540

541-
- name: "Build the package"
541+
- name: "Build the package without the package image"
542542
uses: "./.github/actions/run-on-image"
543543
env:
544544
OS_NAME: "ubuntu-jammy"
@@ -548,7 +548,8 @@ jobs:
548548
${{needs.filter-relevant-changes.outputs.ubuntu_jammy_image_changed == 'false'
549549
|| (github.event_name != 'pull_request' && github.ref == 'refs/heads/main')}}
550550
run_command: >-
551-
CLP_CPP_MAX_PARALLELISM_PER_BUILD_TASK=$(getconf _NPROCESSORS_ONLN) task package
551+
CLP_CPP_MAX_PARALLELISM_PER_BUILD_TASK=$(getconf _NPROCESSORS_ONLN)
552+
task package-build-deps
552553
553554
- uses: "./.github/actions/clp-build-runtime-image"
554555
with:

components/clp-package-utils/clp_package_utils/controller.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
get_datasets_table_name,
3838
get_files_table_name,
3939
)
40+
from clp_py_utils.core import resolve_host_path_in_container
4041

4142
from clp_package_utils.general import (
4243
check_docker_dependencies,
@@ -119,9 +120,12 @@ def _set_up_env_for_database(self) -> EnvVarsDict:
119120
logs_dir = self._clp_config.logs_directory / component_name
120121
validate_db_config(self._clp_config, conf_logging_file, data_dir, logs_dir)
121122

122-
data_dir.mkdir(exist_ok=True, parents=True)
123-
logs_dir.mkdir(exist_ok=True, parents=True)
124-
_chown_paths_if_root(data_dir, logs_dir)
123+
resolved_data_dir = resolve_host_path_in_container(data_dir)
124+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
125+
126+
resolved_data_dir.mkdir(exist_ok=True, parents=True)
127+
resolved_logs_dir.mkdir(exist_ok=True, parents=True)
128+
_chown_paths_if_root(resolved_data_dir, resolved_logs_dir)
125129

126130
env_vars = EnvVarsDict()
127131

@@ -166,8 +170,9 @@ def _set_up_env_for_queue(self) -> EnvVarsDict:
166170
logs_dir = self._clp_config.logs_directory / component_name
167171
validate_queue_config(self._clp_config, logs_dir)
168172

169-
logs_dir.mkdir(exist_ok=True, parents=True)
170-
_chown_paths_if_root(logs_dir)
173+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
174+
resolved_logs_dir.mkdir(exist_ok=True, parents=True)
175+
_chown_paths_if_root(resolved_logs_dir)
171176

172177
env_vars = EnvVarsDict()
173178

@@ -204,9 +209,12 @@ def _set_up_env_for_redis(self) -> EnvVarsDict:
204209
logs_dir = self._clp_config.logs_directory / component_name
205210
validate_redis_config(self._clp_config, conf_file, data_dir, logs_dir)
206211

207-
data_dir.mkdir(exist_ok=True, parents=True)
208-
logs_dir.mkdir(exist_ok=True, parents=True)
209-
_chown_paths_if_root(data_dir, logs_dir)
212+
resolved_data_dir = resolve_host_path_in_container(data_dir)
213+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
214+
215+
resolved_data_dir.mkdir(exist_ok=True, parents=True)
216+
resolved_logs_dir.mkdir(exist_ok=True, parents=True)
217+
_chown_paths_if_root(resolved_data_dir, resolved_logs_dir)
210218

211219
env_vars = EnvVarsDict()
212220

@@ -252,9 +260,12 @@ def _set_up_env_for_results_cache(self) -> EnvVarsDict:
252260
logs_dir = self._clp_config.logs_directory / component_name
253261
validate_results_cache_config(self._clp_config, conf_file, data_dir, logs_dir)
254262

255-
data_dir.mkdir(exist_ok=True, parents=True)
256-
logs_dir.mkdir(exist_ok=True, parents=True)
257-
_chown_paths_if_root(data_dir, logs_dir)
263+
resolved_data_dir = resolve_host_path_in_container(data_dir)
264+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
265+
266+
resolved_data_dir.mkdir(exist_ok=True, parents=True)
267+
resolved_logs_dir.mkdir(exist_ok=True, parents=True)
268+
_chown_paths_if_root(resolved_data_dir, resolved_logs_dir)
258269

259270
env_vars = EnvVarsDict()
260271

@@ -291,7 +302,8 @@ def _set_up_env_for_compression_scheduler(self) -> EnvVarsDict:
291302
logger.info(f"Setting up environment for {component_name}...")
292303

293304
logs_dir = self._clp_config.logs_directory / component_name
294-
logs_dir.mkdir(parents=True, exist_ok=True)
305+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
306+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
295307

296308
env_vars = EnvVarsDict()
297309

@@ -314,7 +326,8 @@ def _set_up_env_for_query_scheduler(self) -> EnvVarsDict:
314326
logger.info(f"Setting up environment for {component_name}...")
315327

316328
logs_dir = self._clp_config.logs_directory / component_name
317-
logs_dir.mkdir(parents=True, exist_ok=True)
329+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
330+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
318331

319332
env_vars = EnvVarsDict()
320333

@@ -336,7 +349,8 @@ def _set_up_env_for_compression_worker(self, num_workers: int) -> EnvVarsDict:
336349
logger.info(f"Setting up environment for {component_name}...")
337350

338351
logs_dir = self._clp_config.logs_directory / component_name
339-
logs_dir.mkdir(parents=True, exist_ok=True)
352+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
353+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
340354

341355
env_vars = EnvVarsDict()
342356

@@ -365,7 +379,8 @@ def _set_up_env_for_query_worker(self, num_workers: int) -> EnvVarsDict:
365379
logger.info(f"Setting up environment for {component_name}...")
366380

367381
logs_dir = self._clp_config.logs_directory / component_name
368-
logs_dir.mkdir(parents=True, exist_ok=True)
382+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
383+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
369384

370385
env_vars = EnvVarsDict()
371386

@@ -392,7 +407,8 @@ def _set_up_env_for_reducer(self, num_workers: int) -> EnvVarsDict:
392407
logger.info(f"Setting up environment for {component_name}...")
393408

394409
logs_dir = self._clp_config.logs_directory / component_name
395-
logs_dir.mkdir(parents=True, exist_ok=True)
410+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
411+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
396412

397413
env_vars = EnvVarsDict()
398414

@@ -427,7 +443,9 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict:
427443
self._clp_home / "var" / "www" / "webui" / "server" / "dist" / "settings.json"
428444
)
429445
validate_webui_config(
430-
self._clp_config, client_settings_json_path, server_settings_json_path
446+
self._clp_config,
447+
client_settings_json_path,
448+
server_settings_json_path,
431449
)
432450

433451
# Read, update, and write back client's and server's settings.json
@@ -454,10 +472,13 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict:
454472
"SqlDbClpTablePrefix": table_prefix,
455473
"SqlDbCompressionJobsTableName": COMPRESSION_JOBS_TABLE_NAME,
456474
}
475+
resolved_client_settings_json_path = resolve_host_path_in_container(
476+
client_settings_json_path
477+
)
457478
client_settings_json = self._read_and_update_settings_json(
458-
client_settings_json_path, client_settings_json_updates
479+
resolved_client_settings_json_path, client_settings_json_updates
459480
)
460-
with open(client_settings_json_path, "w") as client_settings_json_file:
481+
with open(resolved_client_settings_json_path, "w") as client_settings_json_file:
461482
client_settings_json_file.write(json.dumps(client_settings_json))
462483

463484
server_settings_json_updates = {
@@ -509,10 +530,13 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict:
509530
server_settings_json_updates["PrestoHost"] = None
510531
server_settings_json_updates["PrestoPort"] = None
511532

533+
resolved_server_settings_json_path = resolve_host_path_in_container(
534+
server_settings_json_path
535+
)
512536
server_settings_json = self._read_and_update_settings_json(
513-
server_settings_json_path, server_settings_json_updates
537+
resolved_server_settings_json_path, server_settings_json_updates
514538
)
515-
with open(server_settings_json_path, "w") as settings_json_file:
539+
with open(resolved_server_settings_json_path, "w") as settings_json_file:
516540
settings_json_file.write(json.dumps(server_settings_json))
517541

518542
env_vars = EnvVarsDict()
@@ -544,7 +568,9 @@ def _set_up_env_for_mcp_server(self) -> EnvVarsDict:
544568

545569
logs_dir = self._clp_config.logs_directory / component_name
546570
validate_mcp_server_config(self._clp_config, logs_dir)
547-
logs_dir.mkdir(parents=True, exist_ok=True)
571+
572+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
573+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
548574

549575
env_vars = EnvVarsDict()
550576

@@ -581,7 +607,8 @@ def _set_up_env_for_garbage_collector(self) -> EnvVarsDict:
581607
logger.info(f"Setting up environment for {component_name}...")
582608

583609
logs_dir = self._clp_config.logs_directory / component_name
584-
logs_dir.mkdir(parents=True, exist_ok=True)
610+
resolved_logs_dir = resolve_host_path_in_container(logs_dir)
611+
resolved_logs_dir.mkdir(parents=True, exist_ok=True)
585612

586613
env_vars = EnvVarsDict()
587614

@@ -812,13 +839,14 @@ def get_or_create_instance_id(clp_config: CLPConfig) -> str:
812839
:return: The instance ID.
813840
"""
814841
instance_id_file_path = clp_config.logs_directory / "instance-id"
842+
resolved_instance_id_file_path = resolve_host_path_in_container(instance_id_file_path)
815843

816-
if instance_id_file_path.exists():
817-
with open(instance_id_file_path, "r") as f:
844+
if resolved_instance_id_file_path.exists():
845+
with open(resolved_instance_id_file_path, "r") as f:
818846
instance_id = f.readline()
819847
else:
820848
instance_id = str(uuid.uuid4())[-4:]
821-
with open(instance_id_file_path, "w") as f:
849+
with open(resolved_instance_id_file_path, "w") as f:
822850
f.write(instance_id)
823851

824852
return instance_id

components/clp-package-utils/clp_package_utils/general.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
get_config_value,
3939
make_config_path_absolute,
4040
read_yaml_config_file,
41+
resolve_host_path_in_container,
4142
validate_path_could_be_dir,
4243
)
4344
from strenum import KebabCaseStrEnum
@@ -355,7 +356,8 @@ def dump_container_config(
355356
"""
356357
config_file_path_on_host = clp_config.logs_directory / config_filename
357358
config_file_path_on_container = container_clp_config.logs_directory / config_filename
358-
with open(config_file_path_on_host, "w") as f:
359+
resolved_config_file_path_on_host = resolve_host_path_in_container(config_file_path_on_host)
360+
with open(resolved_config_file_path_on_host, "w") as f:
359361
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
360362

361363
return config_file_path_on_container, config_file_path_on_host
@@ -463,16 +465,17 @@ def validate_credentials_file_path(
463465
clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool
464466
):
465467
credentials_file_path = clp_config.credentials_file_path
466-
if not credentials_file_path.exists():
468+
resolved_credentials_file_path = resolve_host_path_in_container(credentials_file_path)
469+
if not resolved_credentials_file_path.exists():
467470
if (
468471
make_config_path_absolute(clp_home, CLP_DEFAULT_CREDENTIALS_FILE_PATH)
469472
== credentials_file_path
470473
and generate_default_file
471474
):
472-
generate_credentials_file(credentials_file_path)
475+
generate_credentials_file(resolved_credentials_file_path)
473476
else:
474477
raise ValueError(f"Credentials file path '{credentials_file_path}' does not exist.")
475-
elif not credentials_file_path.is_file():
478+
elif not resolved_credentials_file_path.is_file():
476479
raise ValueError(f"Credentials file path '{credentials_file_path}' is not a file.")
477480

478481

@@ -503,7 +506,8 @@ def validate_db_config(
503506
data_dir: pathlib.Path,
504507
logs_dir: pathlib.Path,
505508
):
506-
if not component_config.exists():
509+
resolved_component_config = resolve_host_path_in_container(component_config)
510+
if not resolved_component_config.exists():
507511
raise ValueError(f"{DB_COMPONENT_NAME} configuration file missing: '{component_config}'.")
508512
_validate_data_directory(data_dir, DB_COMPONENT_NAME)
509513
_validate_log_directory(logs_dir, DB_COMPONENT_NAME)
@@ -523,7 +527,8 @@ def validate_redis_config(
523527
data_dir: pathlib.Path,
524528
logs_dir: pathlib.Path,
525529
):
526-
if not component_config.exists():
530+
resolved_component_config = resolve_host_path_in_container(component_config)
531+
if not resolved_component_config.exists():
527532
raise ValueError(
528533
f"{REDIS_COMPONENT_NAME} configuration file missing: '{component_config}'."
529534
)
@@ -550,7 +555,8 @@ def validate_results_cache_config(
550555
data_dir: pathlib.Path,
551556
logs_dir: pathlib.Path,
552557
):
553-
if not component_config.exists():
558+
resolved_component_config = resolve_host_path_in_container(component_config)
559+
if not resolved_component_config.exists():
554560
raise ValueError(
555561
f"{RESULTS_CACHE_COMPONENT_NAME} configuration file missing: '{component_config}'."
556562
)
@@ -564,13 +570,9 @@ def validate_results_cache_config(
564570
)
565571

566572

567-
def validate_logs_input_config(clp_config: CLPConfig) -> None:
568-
clp_config.validate_logs_input_config()
569-
570-
571573
def validate_output_storage_config(clp_config: CLPConfig) -> None:
572-
clp_config.validate_archive_output_config()
573-
clp_config.validate_stream_output_config()
574+
clp_config.validate_archive_output_config(True)
575+
clp_config.validate_stream_output_config(True)
574576

575577
validate_path_for_container_mount(clp_config.archive_output.get_directory())
576578
validate_path_for_container_mount(clp_config.stream_output.get_directory())
@@ -582,7 +584,8 @@ def validate_webui_config(
582584
server_settings_json_path: pathlib.Path,
583585
):
584586
for path in [client_settings_json_path, server_settings_json_path]:
585-
if not path.exists():
587+
resolved_path = resolve_host_path_in_container(path)
588+
if not resolved_path.exists():
586589
raise ValueError(f"{WEBUI_COMPONENT_NAME} {path} is not a valid path to settings.json")
587590

588591
validate_port(f"{WEBUI_COMPONENT_NAME}.port", clp_config.webui.host, clp_config.webui.port)
@@ -764,7 +767,7 @@ def _is_docker_compose_project_running(project_name: str) -> bool:
764767

765768
def _validate_data_directory(data_dir: pathlib.Path, component_name: str) -> None:
766769
try:
767-
validate_path_could_be_dir(data_dir)
770+
validate_path_could_be_dir(resolve_host_path_in_container(data_dir))
768771
except ValueError as ex:
769772
raise ValueError(f"{component_name} data directory is invalid: {ex}")
770773

@@ -778,6 +781,6 @@ def _validate_log_directory(logs_dir: pathlib.Path, component_name: str):
778781
:raise ValueError: If the path is invalid or can't be a directory.
779782
"""
780783
try:
781-
validate_path_could_be_dir(logs_dir)
784+
validate_path_could_be_dir(resolve_host_path_in_container(logs_dir))
782785
except ValueError as ex:
783786
raise ValueError(f"{component_name} logs directory is invalid: {ex}")

components/clp-package-utils/clp_package_utils/scripts/archive_manager.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
StorageEngine,
1515
StorageType,
1616
)
17+
from clp_py_utils.core import resolve_host_path_in_container
1718

1819
from clp_package_utils.general import (
1920
CLPConfig,
@@ -171,9 +172,11 @@ def main(argv: List[str]) -> int:
171172
try:
172173
config_file_path: Path = Path(parsed_args.config)
173174
clp_config: CLPConfig = load_config_file(
174-
config_file_path, default_config_file_path, clp_home
175+
resolve_host_path_in_container(config_file_path),
176+
resolve_host_path_in_container(default_config_file_path),
177+
clp_home,
175178
)
176-
clp_config.validate_logs_dir()
179+
clp_config.validate_logs_dir(True)
177180

178181
# Validate and load necessary credentials
179182
validate_and_load_db_credentials_file(clp_config, clp_home, False)
@@ -220,7 +223,6 @@ def main(argv: List[str]) -> int:
220223
)
221224

222225
necessary_mounts: List[Optional[DockerMount]] = [
223-
mounts.clp_home,
224226
mounts.logs_dir,
225227
mounts.archives_output_dir,
226228
]
@@ -281,7 +283,10 @@ def main(argv: List[str]) -> int:
281283
logger.debug(f"Docker command failed: {shlex.join(cmd)}")
282284

283285
# Remove generated files
284-
generated_config_path_on_host.unlink()
286+
resolved_generated_config_path_on_host = resolve_host_path_in_container(
287+
generated_config_path_on_host
288+
)
289+
resolved_generated_config_path_on_host.unlink()
285290

286291
return ret_code
287292

0 commit comments

Comments
 (0)