diff --git a/.github/workflows/clp-artifact-build.yaml b/.github/workflows/clp-artifact-build.yaml index 9cbb0b6c4b..b0106ec45e 100644 --- a/.github/workflows/clp-artifact-build.yaml +++ b/.github/workflows/clp-artifact-build.yaml @@ -538,7 +538,7 @@ jobs: shell: "bash" run: "chown $(id -u):$(id -g) -R ." - - name: "Build the package" + - name: "Build the package without the package image" uses: "./.github/actions/run-on-image" env: OS_NAME: "ubuntu-jammy" @@ -548,7 +548,8 @@ jobs: ${{needs.filter-relevant-changes.outputs.ubuntu_jammy_image_changed == 'false' || (github.event_name != 'pull_request' && github.ref == 'refs/heads/main')}} run_command: >- - CLP_CPP_MAX_PARALLELISM_PER_BUILD_TASK=$(getconf _NPROCESSORS_ONLN) task package + CLP_CPP_MAX_PARALLELISM_PER_BUILD_TASK=$(getconf _NPROCESSORS_ONLN) + task package-build-deps - uses: "./.github/actions/clp-build-runtime-image" with: diff --git a/components/clp-package-utils/clp_package_utils/controller.py b/components/clp-package-utils/clp_package_utils/controller.py index 0c0057097f..a1088efefa 100644 --- a/components/clp-package-utils/clp_package_utils/controller.py +++ b/components/clp-package-utils/clp_package_utils/controller.py @@ -37,6 +37,7 @@ get_datasets_table_name, get_files_table_name, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( check_docker_dependencies, @@ -119,9 +120,12 @@ def _set_up_env_for_database(self) -> EnvVarsDict: logs_dir = self._clp_config.logs_directory / component_name validate_db_config(self._clp_config, conf_logging_file, data_dir, logs_dir) - data_dir.mkdir(exist_ok=True, parents=True) - logs_dir.mkdir(exist_ok=True, parents=True) - _chown_paths_if_root(data_dir, logs_dir) + resolved_data_dir = resolve_host_path_in_container(data_dir) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + + resolved_data_dir.mkdir(exist_ok=True, parents=True) + resolved_logs_dir.mkdir(exist_ok=True, parents=True) + _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) env_vars = EnvVarsDict() @@ -166,8 +170,9 @@ def _set_up_env_for_queue(self) -> EnvVarsDict: logs_dir = self._clp_config.logs_directory / component_name validate_queue_config(self._clp_config, logs_dir) - logs_dir.mkdir(exist_ok=True, parents=True) - _chown_paths_if_root(logs_dir) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(exist_ok=True, parents=True) + _chown_paths_if_root(resolved_logs_dir) env_vars = EnvVarsDict() @@ -204,9 +209,12 @@ def _set_up_env_for_redis(self) -> EnvVarsDict: logs_dir = self._clp_config.logs_directory / component_name validate_redis_config(self._clp_config, conf_file, data_dir, logs_dir) - data_dir.mkdir(exist_ok=True, parents=True) - logs_dir.mkdir(exist_ok=True, parents=True) - _chown_paths_if_root(data_dir, logs_dir) + resolved_data_dir = resolve_host_path_in_container(data_dir) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + + resolved_data_dir.mkdir(exist_ok=True, parents=True) + resolved_logs_dir.mkdir(exist_ok=True, parents=True) + _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) env_vars = EnvVarsDict() @@ -252,9 +260,12 @@ def _set_up_env_for_results_cache(self) -> EnvVarsDict: logs_dir = self._clp_config.logs_directory / component_name validate_results_cache_config(self._clp_config, conf_file, data_dir, logs_dir) - data_dir.mkdir(exist_ok=True, parents=True) - logs_dir.mkdir(exist_ok=True, parents=True) - _chown_paths_if_root(data_dir, logs_dir) + resolved_data_dir = resolve_host_path_in_container(data_dir) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + + resolved_data_dir.mkdir(exist_ok=True, parents=True) + resolved_logs_dir.mkdir(exist_ok=True, parents=True) + _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) env_vars = EnvVarsDict() @@ -291,7 +302,8 @@ def _set_up_env_for_compression_scheduler(self) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -314,7 +326,8 @@ def _set_up_env_for_query_scheduler(self) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -336,7 +349,8 @@ def _set_up_env_for_compression_worker(self, num_workers: int) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -365,7 +379,8 @@ def _set_up_env_for_query_worker(self, num_workers: int) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -392,7 +407,8 @@ def _set_up_env_for_reducer(self, num_workers: int) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -427,7 +443,9 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: self._clp_home / "var" / "www" / "webui" / "server" / "dist" / "settings.json" ) validate_webui_config( - self._clp_config, client_settings_json_path, server_settings_json_path + self._clp_config, + client_settings_json_path, + server_settings_json_path, ) # Read, update, and write back client's and server's settings.json @@ -454,10 +472,13 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: "SqlDbClpTablePrefix": table_prefix, "SqlDbCompressionJobsTableName": COMPRESSION_JOBS_TABLE_NAME, } + resolved_client_settings_json_path = resolve_host_path_in_container( + client_settings_json_path + ) client_settings_json = self._read_and_update_settings_json( - client_settings_json_path, client_settings_json_updates + resolved_client_settings_json_path, client_settings_json_updates ) - with open(client_settings_json_path, "w") as client_settings_json_file: + with open(resolved_client_settings_json_path, "w") as client_settings_json_file: client_settings_json_file.write(json.dumps(client_settings_json)) server_settings_json_updates = { @@ -509,10 +530,13 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: server_settings_json_updates["PrestoHost"] = None server_settings_json_updates["PrestoPort"] = None + resolved_server_settings_json_path = resolve_host_path_in_container( + server_settings_json_path + ) server_settings_json = self._read_and_update_settings_json( - server_settings_json_path, server_settings_json_updates + resolved_server_settings_json_path, server_settings_json_updates ) - with open(server_settings_json_path, "w") as settings_json_file: + with open(resolved_server_settings_json_path, "w") as settings_json_file: settings_json_file.write(json.dumps(server_settings_json)) env_vars = EnvVarsDict() @@ -544,7 +568,9 @@ def _set_up_env_for_mcp_server(self) -> EnvVarsDict: logs_dir = self._clp_config.logs_directory / component_name validate_mcp_server_config(self._clp_config, logs_dir) - logs_dir.mkdir(parents=True, exist_ok=True) + + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -581,7 +607,8 @@ def _set_up_env_for_garbage_collector(self) -> EnvVarsDict: logger.info(f"Setting up environment for {component_name}...") logs_dir = self._clp_config.logs_directory / component_name - logs_dir.mkdir(parents=True, exist_ok=True) + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) env_vars = EnvVarsDict() @@ -812,13 +839,14 @@ def get_or_create_instance_id(clp_config: CLPConfig) -> str: :return: The instance ID. """ instance_id_file_path = clp_config.logs_directory / "instance-id" + resolved_instance_id_file_path = resolve_host_path_in_container(instance_id_file_path) - if instance_id_file_path.exists(): - with open(instance_id_file_path, "r") as f: + if resolved_instance_id_file_path.exists(): + with open(resolved_instance_id_file_path, "r") as f: instance_id = f.readline() else: instance_id = str(uuid.uuid4())[-4:] - with open(instance_id_file_path, "w") as f: + with open(resolved_instance_id_file_path, "w") as f: f.write(instance_id) return instance_id diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 9d3693fa84..d293b16a64 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -38,6 +38,7 @@ get_config_value, make_config_path_absolute, read_yaml_config_file, + resolve_host_path_in_container, validate_path_could_be_dir, ) from strenum import KebabCaseStrEnum @@ -355,7 +356,8 @@ def dump_container_config( """ config_file_path_on_host = clp_config.logs_directory / config_filename config_file_path_on_container = container_clp_config.logs_directory / config_filename - with open(config_file_path_on_host, "w") as f: + resolved_config_file_path_on_host = resolve_host_path_in_container(config_file_path_on_host) + with open(resolved_config_file_path_on_host, "w") as f: yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) return config_file_path_on_container, config_file_path_on_host @@ -463,16 +465,17 @@ def validate_credentials_file_path( clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool ): credentials_file_path = clp_config.credentials_file_path - if not credentials_file_path.exists(): + resolved_credentials_file_path = resolve_host_path_in_container(credentials_file_path) + if not resolved_credentials_file_path.exists(): if ( make_config_path_absolute(clp_home, CLP_DEFAULT_CREDENTIALS_FILE_PATH) == credentials_file_path and generate_default_file ): - generate_credentials_file(credentials_file_path) + generate_credentials_file(resolved_credentials_file_path) else: raise ValueError(f"Credentials file path '{credentials_file_path}' does not exist.") - elif not credentials_file_path.is_file(): + elif not resolved_credentials_file_path.is_file(): raise ValueError(f"Credentials file path '{credentials_file_path}' is not a file.") @@ -503,7 +506,8 @@ def validate_db_config( data_dir: pathlib.Path, logs_dir: pathlib.Path, ): - if not component_config.exists(): + resolved_component_config = resolve_host_path_in_container(component_config) + if not resolved_component_config.exists(): raise ValueError(f"{DB_COMPONENT_NAME} configuration file missing: '{component_config}'.") _validate_data_directory(data_dir, DB_COMPONENT_NAME) _validate_log_directory(logs_dir, DB_COMPONENT_NAME) @@ -523,7 +527,8 @@ def validate_redis_config( data_dir: pathlib.Path, logs_dir: pathlib.Path, ): - if not component_config.exists(): + resolved_component_config = resolve_host_path_in_container(component_config) + if not resolved_component_config.exists(): raise ValueError( f"{REDIS_COMPONENT_NAME} configuration file missing: '{component_config}'." ) @@ -550,7 +555,8 @@ def validate_results_cache_config( data_dir: pathlib.Path, logs_dir: pathlib.Path, ): - if not component_config.exists(): + resolved_component_config = resolve_host_path_in_container(component_config) + if not resolved_component_config.exists(): raise ValueError( f"{RESULTS_CACHE_COMPONENT_NAME} configuration file missing: '{component_config}'." ) @@ -564,13 +570,9 @@ def validate_results_cache_config( ) -def validate_logs_input_config(clp_config: CLPConfig) -> None: - clp_config.validate_logs_input_config() - - def validate_output_storage_config(clp_config: CLPConfig) -> None: - clp_config.validate_archive_output_config() - clp_config.validate_stream_output_config() + clp_config.validate_archive_output_config(True) + clp_config.validate_stream_output_config(True) validate_path_for_container_mount(clp_config.archive_output.get_directory()) validate_path_for_container_mount(clp_config.stream_output.get_directory()) @@ -582,7 +584,8 @@ def validate_webui_config( server_settings_json_path: pathlib.Path, ): for path in [client_settings_json_path, server_settings_json_path]: - if not path.exists(): + resolved_path = resolve_host_path_in_container(path) + if not resolved_path.exists(): raise ValueError(f"{WEBUI_COMPONENT_NAME} {path} is not a valid path to settings.json") validate_port(f"{WEBUI_COMPONENT_NAME}.port", clp_config.webui.host, clp_config.webui.port) @@ -764,7 +767,7 @@ def _is_docker_compose_project_running(project_name: str) -> bool: def _validate_data_directory(data_dir: pathlib.Path, component_name: str) -> None: try: - validate_path_could_be_dir(data_dir) + validate_path_could_be_dir(resolve_host_path_in_container(data_dir)) except ValueError as ex: raise ValueError(f"{component_name} data directory is invalid: {ex}") @@ -778,6 +781,6 @@ def _validate_log_directory(logs_dir: pathlib.Path, component_name: str): :raise ValueError: If the path is invalid or can't be a directory. """ try: - validate_path_could_be_dir(logs_dir) + validate_path_could_be_dir(resolve_host_path_in_container(logs_dir)) except ValueError as ex: raise ValueError(f"{component_name} logs directory is invalid: {ex}") diff --git a/components/clp-package-utils/clp_package_utils/scripts/archive_manager.py b/components/clp-package-utils/clp_package_utils/scripts/archive_manager.py index 5ccc351a61..81bcdf7e1c 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/archive_manager.py +++ b/components/clp-package-utils/clp_package_utils/scripts/archive_manager.py @@ -14,6 +14,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( CLPConfig, @@ -171,9 +172,11 @@ def main(argv: List[str]) -> int: try: config_file_path: Path = Path(parsed_args.config) clp_config: CLPConfig = load_config_file( - config_file_path, default_config_file_path, clp_home + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, ) - clp_config.validate_logs_dir() + clp_config.validate_logs_dir(True) # Validate and load necessary credentials validate_and_load_db_credentials_file(clp_config, clp_home, False) @@ -220,7 +223,6 @@ def main(argv: List[str]) -> int: ) necessary_mounts: List[Optional[DockerMount]] = [ - mounts.clp_home, mounts.logs_dir, mounts.archives_output_dir, ] @@ -281,7 +283,10 @@ def main(argv: List[str]) -> int: logger.debug(f"Docker command failed: {shlex.join(cmd)}") # Remove generated files - generated_config_path_on_host.unlink() + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() return ret_code diff --git a/components/clp-package-utils/clp_package_utils/scripts/compress.py b/components/clp-package-utils/clp_package_utils/scripts/compress.py index 89d8f7365f..0678753a56 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/compress.py @@ -15,6 +15,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( CONTAINER_INPUT_LOGS_ROOT_DIR, @@ -56,7 +57,10 @@ def _generate_logs_list( return len(parsed_args.paths) != 0 no_path_found = True - with open(host_logs_list_path, "r") as host_logs_list_file: + resolved_host_logs_list_path = resolve_host_path_in_container( + pathlib.Path(host_logs_list_path) + ) + with open(resolved_host_logs_list_path, "r") as host_logs_list_file: for line in host_logs_list_file: stripped_path_str = line.rstrip() if "" == stripped_path_str: @@ -175,8 +179,12 @@ def main(argv): # Validate and load config file try: config_file_path = pathlib.Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_logs_dir() + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) + clp_config.validate_logs_dir(True) # Validate and load necessary credentials validate_and_load_db_credentials_file(clp_config, clp_home, False) @@ -230,20 +238,19 @@ def main(argv): container_clp_config, clp_config, get_container_config_filename(container_name) ) - necessary_mounts = [mounts.clp_home, mounts.data_dir, mounts.logs_dir, mounts.input_logs_dir] + necessary_mounts = [mounts.data_dir, mounts.logs_dir, mounts.input_logs_dir] # Write compression logs to a file while True: # Get unused output path - container_logs_list_filename = f"{uuid.uuid4()}.txt" - container_logs_list_path = clp_config.logs_directory / container_logs_list_filename - logs_list_path_on_container = ( - container_clp_config.logs_directory / container_logs_list_filename - ) - if not container_logs_list_path.exists(): + logs_list_filename = f"{uuid.uuid4()}.txt" + logs_list_path_on_host = clp_config.logs_directory / logs_list_filename + resolved_logs_list_path_on_host = resolve_host_path_in_container(logs_list_path_on_host) + logs_list_path_on_container = container_clp_config.logs_directory / logs_list_filename + if not resolved_logs_list_path_on_host.exists(): break - if not _generate_logs_list(container_logs_list_path, parsed_args): + if not _generate_logs_list(resolved_logs_list_path_on_host, parsed_args): logger.error("No filesystem paths given for compression.") return -1 @@ -266,9 +273,13 @@ def main(argv): logger.error("Compression failed.") logger.debug(f"Docker command failed: {shlex.join(cmd)}") else: - container_logs_list_path.unlink() + resolved_logs_list_path_on_host.unlink() + + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() - generated_config_path_on_host.unlink() return ret_code diff --git a/components/clp-package-utils/clp_package_utils/scripts/compress_from_s3.py b/components/clp-package-utils/clp_package_utils/scripts/compress_from_s3.py index 3f7309d400..de70983031 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/compress_from_s3.py +++ b/components/clp-package-utils/clp_package_utils/scripts/compress_from_s3.py @@ -14,6 +14,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( dump_container_config, @@ -56,7 +57,10 @@ def _generate_url_list( return len(parsed_args.inputs) != 0 no_url_found = True - with open(parsed_args.inputs_from, "r") as input_file: + resolved_inputs_from_path = resolve_host_path_in_container( + pathlib.Path(parsed_args.inputs_from) + ) + with open(resolved_inputs_from_path, "r") as input_file: for line in input_file: stripped_url = line.strip() if "" == stripped_url: @@ -222,8 +226,12 @@ def main(argv): try: config_file_path = pathlib.Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_logs_dir() + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) + clp_config.validate_logs_dir(True) validate_and_load_db_credentials_file(clp_config, clp_home, False) except Exception: @@ -286,18 +294,17 @@ def main(argv): container_clp_config, clp_config, get_container_config_filename(container_name) ) - necessary_mounts = [mounts.clp_home, mounts.data_dir, mounts.logs_dir] + necessary_mounts = [mounts.data_dir, mounts.logs_dir] while True: - container_url_list_filename = f"{uuid.uuid4()}.txt" - container_url_list_path = clp_config.logs_directory / container_url_list_filename - url_list_path_on_container = ( - container_clp_config.logs_directory / container_url_list_filename - ) - if not container_url_list_path.exists(): + url_list_filename = f"{uuid.uuid4()}.txt" + url_list_path_on_host = clp_config.logs_directory / url_list_filename + resolved_url_list_path_on_host = resolve_host_path_in_container(url_list_path_on_host) + url_list_path_on_container = container_clp_config.logs_directory / url_list_filename + if not resolved_url_list_path_on_host.exists(): break - if not _generate_url_list(parsed_args.subcommand, container_url_list_path, parsed_args): + if not _generate_url_list(parsed_args.subcommand, resolved_url_list_path_on_host, parsed_args): logger.error("No S3 URLs given for compression.") return -1 @@ -320,9 +327,13 @@ def main(argv): logger.error("Compression failed.") logger.debug(f"Docker command failed: {shlex.join(cmd)}") else: - container_url_list_path.unlink() + resolved_url_list_path_on_host.unlink() + + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() - generated_config_path_on_host.unlink() return ret_code diff --git a/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py b/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py index 7b895340e6..d4b1218d1e 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py +++ b/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py @@ -14,6 +14,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_py_utils.s3_utils import generate_container_auth_options from clp_package_utils.general import ( @@ -92,8 +93,12 @@ def main(argv: List[str]) -> int: # Validate and load config file try: config_file_path = Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_logs_dir() + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) + clp_config.validate_logs_dir(True) # Validate and load necessary credentials validate_and_load_db_credentials_file(clp_config, clp_home, False) @@ -131,10 +136,7 @@ def main(argv: List[str]) -> int: container_clp_config, clp_config, get_container_config_filename(container_name) ) - necessary_mounts = [ - mounts.clp_home, - mounts.logs_dir, - ] + necessary_mounts = [mounts.logs_dir] if clp_config.archive_output.storage.type == StorageType.FS: necessary_mounts.append(mounts.archives_output_dir) @@ -187,7 +189,10 @@ def main(argv: List[str]) -> int: logger.debug(f"Docker command failed: {shlex.join(cmd)}") # Remove generated files - generated_config_path_on_host.unlink() + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() return ret_code diff --git a/components/clp-package-utils/clp_package_utils/scripts/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/decompress.py index d5c4c059dd..747874fa5b 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/decompress.py @@ -1,5 +1,6 @@ import argparse import logging +import os import pathlib import shlex import subprocess @@ -15,6 +16,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( DockerMount, @@ -51,8 +53,12 @@ def validate_and_load_config( :return: The config object on success, None otherwise. """ try: - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_logs_dir() + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) + clp_config.validate_logs_dir(True) # Validate and load necessary credentials validate_and_load_db_credentials_file(clp_config, clp_home, False) @@ -78,8 +84,9 @@ def handle_extract_file_cmd( # Validate extraction directory extraction_dir = pathlib.Path(parsed_args.extraction_dir).resolve() + resolved_extraction_dir = resolve_host_path_in_container(extraction_dir) try: - validate_path_could_be_dir(extraction_dir) + validate_path_could_be_dir(resolved_extraction_dir) except ValueError as ex: logger.error(f"extraction-dir is invalid: {ex}") return -1 @@ -107,10 +114,9 @@ def handle_extract_file_cmd( ) # Set up mounts - extraction_dir.mkdir(exist_ok=True) + resolved_extraction_dir.mkdir(exist_ok=True) container_extraction_dir = pathlib.Path("/") / "mnt" / "extraction-dir" necessary_mounts = [ - mounts.clp_home, mounts.data_dir, mounts.logs_dir, mounts.archives_output_dir, @@ -161,7 +167,10 @@ def handle_extract_file_cmd( logger.debug(f"Docker command failed: {shlex.join(cmd)}") # Remove generated files - generated_config_path_on_host.unlink() + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() return ret_code @@ -205,7 +214,7 @@ def handle_extract_stream_cmd( generated_config_path_on_container, generated_config_path_on_host = dump_container_config( container_clp_config, clp_config, get_container_config_filename(container_name) ) - necessary_mounts = [mounts.clp_home, mounts.logs_dir] + necessary_mounts = [mounts.logs_dir] extra_env_vars = { CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username, CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password, @@ -266,7 +275,10 @@ def handle_extract_stream_cmd( logger.debug(f"Docker command failed: {shlex.join(cmd)}") # Remove generated files - generated_config_path_on_host.unlink() + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() return ret_code @@ -298,8 +310,13 @@ def main(argv): file_extraction_parser.add_argument( "-f", "--files-from", help="A file listing all files to extract." ) + default_extraction_dir = pathlib.Path(os.environ.get("CLP_PWD_HOST", ".")) file_extraction_parser.add_argument( - "-d", "--extraction-dir", metavar="DIR", default=".", help="Extract files into DIR." + "-d", + "--extraction-dir", + metavar="DIR", + default=default_extraction_dir, + help="Extract files into DIR.", ) # IR extraction command parser diff --git a/components/clp-package-utils/clp_package_utils/scripts/search.py b/components/clp-package-utils/clp_package_utils/scripts/search.py index 03aa3817b9..0c4eda4ec0 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/search.py @@ -13,6 +13,7 @@ StorageEngine, StorageType, ) +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.general import ( dump_container_config, @@ -91,8 +92,12 @@ def main(argv): # Validate and load config file try: config_file_path = pathlib.Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_logs_dir() + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) + clp_config.validate_logs_dir(True) # Validate and load necessary credentials validate_and_load_db_credentials_file(clp_config, clp_home, False) @@ -128,7 +133,7 @@ def main(argv): generated_config_path_on_container, generated_config_path_on_host = dump_container_config( container_clp_config, clp_config, get_container_config_filename(container_name) ) - necessary_mounts = [mounts.clp_home, mounts.logs_dir] + necessary_mounts = [mounts.logs_dir] extra_env_vars = { CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username, CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password, @@ -180,7 +185,10 @@ def main(argv): logger.debug(f"Docker command failed: {shlex.join(cmd)}") # Remove generated files - generated_config_path_on_host.unlink() + resolved_generated_config_path_on_host = resolve_host_path_in_container( + generated_config_path_on_host + ) + resolved_generated_config_path_on_host.unlink() return ret_code diff --git a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py index 15a44b7458..01b28bf38f 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py @@ -4,6 +4,7 @@ import sys from clp_py_utils.clp_config import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.controller import DockerComposeController, get_or_create_instance_id from clp_package_utils.general import ( @@ -12,7 +13,6 @@ validate_and_load_db_credentials_file, validate_and_load_queue_credentials_file, validate_and_load_redis_credentials_file, - validate_logs_input_config, validate_output_storage_config, validate_retention_config, ) @@ -53,30 +53,38 @@ def main(argv): try: # Validate and load config file. config_file_path = pathlib.Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) validate_and_load_db_credentials_file(clp_config, clp_home, True) validate_and_load_queue_credentials_file(clp_config, clp_home, True) validate_and_load_redis_credentials_file(clp_config, clp_home, True) - validate_logs_input_config(clp_config) + clp_config.validate_logs_input_config(True) validate_output_storage_config(clp_config) validate_retention_config(clp_config) - clp_config.validate_aws_config_dir() - clp_config.validate_data_dir() - clp_config.validate_logs_dir() - clp_config.validate_tmp_dir() + clp_config.validate_aws_config_dir(True) + clp_config.validate_data_dir(True) + clp_config.validate_logs_dir(True) + clp_config.validate_tmp_dir(True) except: logger.exception("Failed to load config.") return -1 try: # Create necessary directories. - clp_config.data_directory.mkdir(parents=True, exist_ok=True) - clp_config.logs_directory.mkdir(parents=True, exist_ok=True) - clp_config.tmp_directory.mkdir(parents=True, exist_ok=True) - clp_config.archive_output.get_directory().mkdir(parents=True, exist_ok=True) - clp_config.stream_output.get_directory().mkdir(parents=True, exist_ok=True) + resolve_host_path_in_container(clp_config.data_directory).mkdir(parents=True, exist_ok=True) + resolve_host_path_in_container(clp_config.logs_directory).mkdir(parents=True, exist_ok=True) + resolve_host_path_in_container(clp_config.tmp_directory).mkdir(parents=True, exist_ok=True) + resolve_host_path_in_container(clp_config.archive_output.get_directory()).mkdir( + parents=True, exist_ok=True + ) + resolve_host_path_in_container(clp_config.stream_output.get_directory()).mkdir( + parents=True, exist_ok=True + ) except: logger.exception("Failed to create necessary directories.") return -1 diff --git a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py index 105e929df7..b450a73027 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py @@ -4,6 +4,7 @@ import sys from clp_py_utils.clp_config import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH +from clp_py_utils.core import resolve_host_path_in_container from clp_package_utils.controller import DockerComposeController, get_or_create_instance_id from clp_package_utils.general import ( @@ -30,7 +31,11 @@ def main(argv): try: config_file_path = pathlib.Path(parsed_args.config) - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config = load_config_file( + resolve_host_path_in_container(config_file_path), + resolve_host_path_in_container(default_config_file_path), + clp_home, + ) except: logger.exception("Failed to load config.") return -1 diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index 8c8352baab..cc383a7f2f 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -19,6 +19,7 @@ get_config_value, make_config_path_absolute, read_yaml_config_file, + resolve_host_path_in_container, validate_path_could_be_dir, ) from .serialization_utils import serialize_path, serialize_str_enum @@ -666,15 +667,18 @@ def make_config_paths_absolute(self, clp_home: pathlib.Path): ) self._version_file_path = make_config_path_absolute(clp_home, self._version_file_path) - def validate_logs_input_config(self): + def validate_logs_input_config(self, use_host_mount: bool = False): logs_input_type = self.logs_input.type if StorageType.FS == logs_input_type: # NOTE: This can't be a pydantic validator since input_logs_dir might be a # package-relative path that will only be resolved after pydantic validation input_logs_dir = self.logs_input.directory - if not input_logs_dir.exists(): + resolved_input_logs_dir = ( + resolve_host_path_in_container(input_logs_dir) if use_host_mount else input_logs_dir + ) + if not resolved_input_logs_dir.exists(): raise ValueError(f"logs_input.directory '{input_logs_dir}' doesn't exist.") - if not input_logs_dir.is_dir(): + if not resolved_input_logs_dir.is_dir(): raise ValueError(f"logs_input.directory '{input_logs_dir}' is not a directory.") if StorageType.S3 == logs_input_type and StorageEngine.CLP_S != self.package.storage_engine: raise ValueError( @@ -682,7 +686,7 @@ def validate_logs_input_config(self): f" = '{StorageEngine.CLP_S}'" ) - def validate_archive_output_config(self): + def validate_archive_output_config(self, use_host_mount: bool = False): if ( StorageType.S3 == self.archive_output.storage.type and StorageEngine.CLP_S != self.package.storage_engine @@ -691,12 +695,18 @@ def validate_archive_output_config(self): f"archive_output.storage.type = 's3' is only supported with package.storage_engine" f" = '{StorageEngine.CLP_S}'" ) + archive_output_dir = self.archive_output.get_directory() + resolved_archive_output_dir = ( + resolve_host_path_in_container(archive_output_dir) + if use_host_mount + else archive_output_dir + ) try: - validate_path_could_be_dir(self.archive_output.get_directory()) + validate_path_could_be_dir(resolved_archive_output_dir) except ValueError as ex: raise ValueError(f"archive_output.storage's directory is invalid: {ex}") - def validate_stream_output_config(self): + def validate_stream_output_config(self, use_host_mount: bool = False): if ( StorageType.S3 == self.stream_output.storage.type and StorageEngine.CLP_S != self.package.storage_engine @@ -705,30 +715,42 @@ def validate_stream_output_config(self): f"stream_output.storage.type = 's3' is only supported with package.storage_engine" f" = '{StorageEngine.CLP_S}'" ) + stream_output_dir = self.stream_output.get_directory() + resolved_stream_output_dir = ( + resolve_host_path_in_container(stream_output_dir) + if use_host_mount + else stream_output_dir + ) try: - validate_path_could_be_dir(self.stream_output.get_directory()) + validate_path_could_be_dir(resolved_stream_output_dir) except ValueError as ex: raise ValueError(f"stream_output.storage's directory is invalid: {ex}") - def validate_data_dir(self): + def validate_data_dir(self, use_host_mount: bool = False): + data_dir = self.data_directory + resolved_data_dir = resolve_host_path_in_container(data_dir) if use_host_mount else data_dir try: - validate_path_could_be_dir(self.data_directory) + validate_path_could_be_dir(resolved_data_dir) except ValueError as ex: raise ValueError(f"data_directory is invalid: {ex}") - def validate_logs_dir(self): + def validate_logs_dir(self, use_host_mount: bool = False): + logs_dir = self.logs_directory + resolved_logs_dir = resolve_host_path_in_container(logs_dir) if use_host_mount else logs_dir try: - validate_path_could_be_dir(self.logs_directory) + validate_path_could_be_dir(resolved_logs_dir) except ValueError as ex: raise ValueError(f"logs_directory is invalid: {ex}") - def validate_tmp_dir(self): + def validate_tmp_dir(self, use_host_mount: bool = False): + tmp_dir = self.tmp_directory + resolved_tmp_dir = resolve_host_path_in_container(tmp_dir) if use_host_mount else tmp_dir try: - validate_path_could_be_dir(self.tmp_directory) + validate_path_could_be_dir(resolved_tmp_dir) except ValueError as ex: raise ValueError(f"tmp_directory is invalid: {ex}") - def validate_aws_config_dir(self): + def validate_aws_config_dir(self, use_host_mount: bool = False): profile_auth_used = False auth_configs = [] @@ -749,7 +771,12 @@ def validate_aws_config_dir(self): raise ValueError( "aws_config_directory must be set when using profile authentication" ) - if not self.aws_config_directory.exists(): + resolved_aws_config_dir = ( + resolve_host_path_in_container(self.aws_config_directory) + if use_host_mount + else self.aws_config_directory + ) + if not resolved_aws_config_dir.exists(): raise ValueError( f"aws_config_directory does not exist: '{self.aws_config_directory}'" ) diff --git a/components/clp-py-utils/clp_py_utils/core.py b/components/clp-py-utils/clp_py_utils/core.py index 95266fa216..b04a4f7aa8 100644 --- a/components/clp-py-utils/clp_py_utils/core.py +++ b/components/clp-py-utils/clp_py_utils/core.py @@ -3,6 +3,8 @@ import yaml from yaml.parser import ParserError +CONTAINER_DIR_FOR_HOST_ROOT = pathlib.Path("/") / "mnt" / "host" + class FileMetadata: __slots__ = ("path", "size", "estimated_uncompressed_size") @@ -62,6 +64,34 @@ def read_yaml_config_file(yaml_config_file_path: pathlib.Path): return config +def resolve_host_path_in_container(host_path: pathlib.Path) -> pathlib.Path: + """ + Translates a host path to its container-mount equivalent. It also resolves a single level of + symbolic link if the host path itself is a symlink. + + :param host_path: The host path. + :return: The translated path. + """ + host_path = host_path.absolute() + translated_path = CONTAINER_DIR_FOR_HOST_ROOT / host_path.relative_to("/") + + try: + if not translated_path.is_symlink(): + return translated_path + + link_target = translated_path.readlink() + if link_target.is_absolute(): + return CONTAINER_DIR_FOR_HOST_ROOT / link_target.relative_to("/") + else: + # If the symlink points to a relative path, resolve it relative to the symlink's parent. + return (translated_path.parent / link_target).resolve() + except OSError: + # Ignore if reading the symlink fails (e.g., broken link or permission error). + pass + + return translated_path + + def validate_path_could_be_dir(path: pathlib.Path): part = path while True: diff --git a/components/package-template/src/sbin/.common-env.sh b/components/package-template/src/sbin/.common-env.sh new file mode 100755 index 0000000000..2651568a32 --- /dev/null +++ b/components/package-template/src/sbin/.common-env.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +package_root=$(readlink -f "$script_dir/..") + +if [[ -z "${CLP_HOME:-}" ]]; then + export CLP_HOME="$package_root" +else + export CLP_HOME="$CLP_HOME" +fi + +image_id_file="$CLP_HOME/clp-package-image.id" +version_file="$CLP_HOME/VERSION" + +if [[ -f "$image_id_file" ]]; then + image_id="$(tr -d '[:space:]' <"$image_id_file")" + export CLP_PACKAGE_CONTAINER_IMAGE_REF="$image_id" +elif [[ -f "$version_file" ]]; then + version="$(tr -d '[:space:]' <"$version_file")" + export CLP_PACKAGE_CONTAINER_IMAGE_REF="ghcr.io/y-scope/clp/clp-package:v$version" +else + echo >&2 "Error: Neither '${image_id_file}' nor '${version_file}' exist." + return 1 +fi + +uid="$(id --user 2>/dev/null || echo "1000")" +gid="$(getent group docker | cut -d: -f3 2>/dev/null || echo "999")" +export CLP_FIRST_PARTY_SERVICE_UID_GID="$uid:$gid" + +CLP_PWD_HOST="$(pwd 2>/dev/null || echo "")" +export CLP_PWD_HOST + +if [[ -z "${CLP_DOCKER_PLUGIN_DIR:-}" ]]; then + for compose_plugin_dir in \ + "$HOME/.docker/cli-plugins" \ + "/mnt/wsl/docker-desktop/cli-tools/usr/local/lib/docker/cli-plugins" \ + "/usr/local/lib/docker/cli-plugins" \ + "/usr/libexec/docker/cli-plugins"; do + + compose_plugin_path="$compose_plugin_dir/docker-compose" + if [[ -f "$compose_plugin_path" ]]; then + export CLP_DOCKER_PLUGIN_DIR="$compose_plugin_dir" + break + fi + done + if [[ -z "${CLP_DOCKER_PLUGIN_DIR:-}" ]]; then + echo >&2 "Warning: Docker plugin directory not found;" \ + "Docker Compose may not work inside container." + fi +fi + +if [[ -z "${CLP_DOCKER_SOCK_PATH:-}" ]]; then + socket="$(docker context inspect \ + --format '{{.Endpoints.docker.Host}}' 2>/dev/null | + sed -E 's|^unix://||')" + + if [[ -S "$socket" ]]; then + export CLP_DOCKER_SOCK_PATH="$socket" + fi +fi diff --git a/components/package-template/src/sbin/admin-tools/archive-manager.sh b/components/package-template/src/sbin/admin-tools/archive-manager.sh index 8dc9cdecb5..9c69fce68a 100755 --- a/components/package-template/src/sbin/admin-tools/archive-manager.sh +++ b/components/package-template/src/sbin/admin-tools/archive-manager.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/../.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/../.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.archive_manager \ "$@" diff --git a/components/package-template/src/sbin/admin-tools/dataset-manager.sh b/components/package-template/src/sbin/admin-tools/dataset-manager.sh index e0ecb767a1..7b3e002788 100755 --- a/components/package-template/src/sbin/admin-tools/dataset-manager.sh +++ b/components/package-template/src/sbin/admin-tools/dataset-manager.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/../.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/../.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.dataset_manager \ "$@" diff --git a/components/package-template/src/sbin/compress-from-s3.sh b/components/package-template/src/sbin/compress-from-s3.sh index 7d3bc8e455..49b9a5e1b6 100755 --- a/components/package-template/src/sbin/compress-from-s3.sh +++ b/components/package-template/src/sbin/compress-from-s3.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.compress_from_s3 \ "$@" diff --git a/components/package-template/src/sbin/compress.sh b/components/package-template/src/sbin/compress.sh index c9b7c1b9b5..be24b32ea4 100755 --- a/components/package-template/src/sbin/compress.sh +++ b/components/package-template/src/sbin/compress.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.compress \ "$@" diff --git a/components/package-template/src/sbin/decompress.sh b/components/package-template/src/sbin/decompress.sh index a8a343bb44..cb89685964 100755 --- a/components/package-template/src/sbin/decompress.sh +++ b/components/package-template/src/sbin/decompress.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.decompress \ "$@" diff --git a/components/package-template/src/sbin/search.sh b/components/package-template/src/sbin/search.sh index d3987265a1..183d9a87af 100755 --- a/components/package-template/src/sbin/search.sh +++ b/components/package-template/src/sbin/search.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.search \ "$@" diff --git a/components/package-template/src/sbin/start-clp.sh b/components/package-template/src/sbin/start-clp.sh index b4ecabd802..11b09d3f6c 100755 --- a/components/package-template/src/sbin/start-clp.sh +++ b/components/package-template/src/sbin/start-clp.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.start_clp \ "$@" diff --git a/components/package-template/src/sbin/stop-clp.sh b/components/package-template/src/sbin/stop-clp.sh index b6ee88a945..c94c3ac947 100755 --- a/components/package-template/src/sbin/stop-clp.sh +++ b/components/package-template/src/sbin/stop-clp.sh @@ -1,9 +1,12 @@ #!/usr/bin/env bash -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +common_env_path="$script_dir/.common-env.sh" -PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ +# shellcheck source=.common-env.sh +source "$common_env_path" + +docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" run --rm clp-runtime \ python3 \ -m clp_package_utils.scripts.stop_clp \ "$@" diff --git a/docs/src/dev-docs/building-package.md b/docs/src/dev-docs/building-package.md index 4149366a12..8275af1ef9 100755 --- a/docs/src/dev-docs/building-package.md +++ b/docs/src/dev-docs/building-package.md @@ -57,8 +57,8 @@ task The build will be in `build/clp-package` and defaults to using the storage engine for `clp-text`. :::{note} -The `task` command runs `task docker-images:package` under the hood. In addition to the build, a -Docker image named `clp-package:dev--` will also be created. +The `task` command runs `task package` under the hood. In addition to the build, a Docker image +named `clp-package:dev--` will also be created. ::: :::{note} diff --git a/docs/src/user-docs/quick-start/index.md b/docs/src/user-docs/quick-start/index.md index faf814c0ba..1d1d5b6f96 100644 --- a/docs/src/user-docs/quick-start/index.md +++ b/docs/src/user-docs/quick-start/index.md @@ -17,7 +17,6 @@ To run a CLP release, you'll need: * `docker-ce` >= 27.0.3 * `docker-ce-cli` >= 27.0.3 * `docker-compose-plugin` >= 2.28.1 -* [Python](#python) ### Docker @@ -33,19 +32,7 @@ NOTE: * If you're not running as root, ensure Docker can be run [without superuser privileges][docker-non-root]. -* If you're using Docker Desktop, ensure version 4.34 or higher is installed, and - [host networking is enabled][docker-desktop-host-networking]. - -### Python - -To check whether Python is installed on your system, run: - -```bash -python3 --version -``` - -CLP requires Python 3.10 or higher. If Python isn't installed, or if the version isn't high enough, -install or upgrade it by following the instructions for your OS. +* If you're using Docker Desktop, ensure version 4.34 or higher is installed. --- @@ -143,5 +130,4 @@ How to compress and search unstructured text logs. [clp-releases]: https://github.com/y-scope/clp/releases [Docker]: https://docs.docker.com/engine/install/ -[docker-desktop-host-networking]: https://docs.docker.com/engine/network/drivers/host/#docker-desktop [docker-non-root]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user diff --git a/taskfile.yaml b/taskfile.yaml index e1a43e77a5..293a6b1298 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -27,6 +27,7 @@ vars: G_NODEJS_22_BIN_DIR: "{{.G_NODEJS_22_BUILD_DIR}}/bin" G_PACKAGE_BUILD_DIR: "{{.G_BUILD_DIR}}/clp-package" G_PACKAGE_VENV_DIR: "{{.G_BUILD_DIR}}/package-venv" + G_PYTHON_LIBS_DIR: "{{.G_BUILD_DIR}}/python-libs" G_WEBUI_BUILD_DIR: "{{.G_BUILD_DIR}}/webui" G_SPIDER_BUILD_DIR: "{{.G_BUILD_DIR}}/spider" @@ -43,6 +44,8 @@ vars: # Checksum files G_PACKAGE_CHECKSUM_FILE: "{{.G_BUILD_DIR}}/package.md5" + G_PYTHON_LIBS_CHECKSUM_FILE: "{{.G_BUILD_DIR}}/python-libs.md5" + G_WEBUI_CHECKSUM_FILE: "{{.G_BUILD_DIR}}/webui.md5" G_WEBUI_CLIENT_NODE_MODULES_CHECKSUM_FILE: "{{.G_BUILD_DIR}}/webui-client-node-modules.md5" G_WEBUI_COMMON_NODE_MODULES_CHECKSUM_FILE: "{{.G_BUILD_DIR}}/webui-common-node-modules.md5" G_WEBUI_LOG_VIEWER_NODE_MODULES_CHECKSUM_FILE: @@ -52,7 +55,7 @@ vars: tasks: default: - deps: ["docker-images:package"] + deps: ["package"] clean: cmds: @@ -101,86 +104,45 @@ tasks: - "rm -rf '{{.G_WEBUI_SRC_DIR}}/yscope-log-viewer/node_modules'" package: - env: - NODE_ENV: "production" vars: CHECKSUM_FILE: "{{.G_PACKAGE_CHECKSUM_FILE}}" OUTPUT_DIR: "{{.G_PACKAGE_BUILD_DIR}}" sources: - - "{{.G_BUILD_DIR}}/package-venv.md5" - - "{{.G_BUILD_DIR}}/webui.md5" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clg" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clo" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clp" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clp-s" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/indexer" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/log-converter" - - "{{.G_CORE_COMPONENT_BUILD_DIR}}/reducer-server" - - "{{.G_SPIDER_BUILD_DIR}}/spider-build/src/spider/spider_scheduler" - - "{{.G_SPIDER_BUILD_DIR}}/spider-build/src/spider/spider_worker" + - "{{.G_BUILD_DIR}}/clp-package-image.id" + - "{{.G_WEBUI_BUILD_DIR}}/client/settings.json" + - "{{.G_WEBUI_BUILD_DIR}}/server/dist/settings.json" - "{{.TASKFILE}}" - - "components/clp-mcp-server/dist/*.whl" - - "components/clp-package-utils/dist/*.whl" - - "components/clp-py-utils/dist/*.whl" - - "components/job-orchestration/dist/*.whl" - "components/package-template/src/**/*" - "tools/deployment/package/**/*" generates: ["{{.CHECKSUM_FILE}}"] deps: - - "core" - - "clp-mcp-server" - - "clp-package-utils" - - "clp-py-utils" - - "deps:spider" - - "init" - - "job-orchestration" - - "package-venv" + - "docker-images:package" - task: "utils:checksum:validate" vars: CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] - - "webui" cmds: - "rm -rf '{{.OUTPUT_DIR}}'" - - "rsync -a components/package-template/src/ '{{.OUTPUT_DIR}}'" - - "mkdir -p '{{.OUTPUT_DIR}}/lib/python3/site-packages'" - - |- - . "{{.G_PACKAGE_VENV_DIR}}/bin/activate" - pip3 install --upgrade \ - components/clp-mcp-server/dist/*.whl \ - components/clp-package-utils/dist/*.whl \ - components/clp-py-utils/dist/*.whl \ - components/job-orchestration/dist/*.whl \ - -t "{{.OUTPUT_DIR}}/lib/python3/site-packages" - - "mkdir -p '{{.OUTPUT_DIR}}/bin'" - >- - rsync -a - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clg" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clo" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clp" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/clp-s" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/indexer" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/log-converter" - "{{.G_CORE_COMPONENT_BUILD_DIR}}/reducer-server" - "{{.G_SPIDER_BUILD_DIR}}/spider-build/src/spider/spider_scheduler" - "{{.G_SPIDER_BUILD_DIR}}/spider-build/src/spider/spider_worker" - "{{.OUTPUT_DIR}}/bin/" + rsync --archive + "components/package-template/src/" + "{{.OUTPUT_DIR}}" - >- - rsync -a - "{{.G_NODEJS_22_BIN_DIR}}/node" - "{{.OUTPUT_DIR}}/bin/node-22" - - "mkdir -p '{{.OUTPUT_DIR}}/var/www/'" + rsync --archive --mkpath + "{{.G_WEBUI_BUILD_DIR}}/client/settings.json" + "{{.OUTPUT_DIR}}/var/www/webui/client/" - >- - rsync -a - "{{.G_WEBUI_BUILD_DIR}}/" - "{{.OUTPUT_DIR}}/var/www/webui" - - |- - cd "{{.OUTPUT_DIR}}/var/www/webui" - PATH="{{.G_NODEJS_22_BIN_DIR}}":$PATH npm ci --omit=dev + rsync --archive --mkpath + "{{.G_WEBUI_BUILD_DIR}}/server/dist/settings.json" + "{{.OUTPUT_DIR}}/var/www/webui/server/dist/" - >- - rsync -a + rsync --archive "tools/deployment/package/" "{{.OUTPUT_DIR}}" + - >- + rsync --archive + "{{.G_BUILD_DIR}}/clp-package-image.id" + "{{.OUTPUT_DIR}}" - "echo '{{.G_PACKAGE_VERSION}}' > '{{.OUTPUT_DIR}}/VERSION'" # This command must be last - task: "utils:checksum:compute" @@ -249,9 +211,19 @@ tasks: vars: COMPONENT: "{{.TASK}}" + package-build-deps: + deps: + - "core" + - "deps:spider" + - "init" + - "python-libs" + - "webui" + webui: + env: + NODE_ENV: "production" vars: - CHECKSUM_FILE: "{{.G_BUILD_DIR}}/{{.TASK}}.md5" + CHECKSUM_FILE: "{{.G_WEBUI_CHECKSUM_FILE}}" OUTPUT_DIR: "{{.G_WEBUI_BUILD_DIR}}" sources: - "{{.G_WEBUI_CLIENT_NODE_MODULES_CHECKSUM_FILE}}" @@ -290,6 +262,9 @@ tasks: rsync -a package.json "{{.OUTPUT_DIR}}/server/" - |- rsync -a package.json package-lock.json "{{.OUTPUT_DIR}}/" + - |- + cd "{{.OUTPUT_DIR}}" + PATH="{{.G_NODEJS_22_BIN_DIR}}":$PATH npm ci --omit=dev - task: "utils:checksum:compute" vars: CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" @@ -503,6 +478,46 @@ tasks: --directory '{{.G_BUILD_DIR}}' --dereference '{{.VERSIONED_PACKAGE_NAME}}' + python-libs: + internal: true + vars: + CHECKSUM_FILE: "{{.G_PYTHON_LIBS_CHECKSUM_FILE}}" + OUTPUT_DIR: "{{.G_PYTHON_LIBS_DIR}}" + sources: + - "{{.G_BUILD_DIR}}/package-venv.md5" + - "{{.TASKFILE}}" + - "components/clp-mcp-server/dist/*.whl" + - "components/clp-package-utils/dist/*.whl" + - "components/clp-py-utils/dist/*.whl" + - "components/job-orchestration/dist/*.whl" + generates: ["{{.CHECKSUM_FILE}}"] + deps: + - "clp-mcp-server" + - "clp-package-utils" + - "clp-py-utils" + - "init" + - "job-orchestration" + - "package-venv" + - task: "utils:checksum:validate" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + cmds: + - "rm -rf '{{.OUTPUT_DIR}}'" + - "mkdir -p '{{.OUTPUT_DIR}}'" + - |- + . "{{.G_PACKAGE_VENV_DIR}}/bin/activate" + pip3 install --upgrade \ + components/clp-mcp-server/dist/*.whl \ + components/clp-package-utils/dist/*.whl \ + components/clp-py-utils/dist/*.whl \ + components/job-orchestration/dist/*.whl \ + -t "{{.OUTPUT_DIR}}" + - task: "utils:checksum:compute" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + package-venv: internal: true vars: diff --git a/taskfiles/docker-images.yaml b/taskfiles/docker-images.yaml index 821c1a91ae..1177075d06 100644 --- a/taskfiles/docker-images.yaml +++ b/taskfiles/docker-images.yaml @@ -2,14 +2,8 @@ version: "3" tasks: package: - vars: - SRC_DIR: "{{.ROOT_DIR}}/tools/docker-images/clp-package" - dir: "{{.SRC_DIR}}" - sources: - - "{{.G_PACKAGE_CHECKSUM_FILE}}" - - "{{.SRC_DIR}}/**/*" + dir: "{{.ROOT_DIR}}/tools/docker-images/clp-package" deps: - - ":package" + - ":package-build-deps" cmds: - "./build.sh" - - "rsync --archive '{{.G_BUILD_DIR}}/clp-package-image.id' '{{.G_PACKAGE_BUILD_DIR}}'" diff --git a/tools/deployment/package/docker-compose.runtime.yaml b/tools/deployment/package/docker-compose.runtime.yaml new file mode 100644 index 0000000000..daf72f2623 --- /dev/null +++ b/tools/deployment/package/docker-compose.runtime.yaml @@ -0,0 +1,38 @@ +services: + clp-runtime: + hostname: "clp_runtime" + image: "${CLP_PACKAGE_CONTAINER_IMAGE_REF:-clp-package}" + logging: + driver: "local" + stdin_open: true + tty: true + user: "${CLP_FIRST_PARTY_SERVICE_UID_GID:-1000:999}" + environment: + # NOTE: We forward "$HOME" into the container so that if the user specified "~" anywhere in + # their config, the container can resolve it to the relevant path on the host. As a result, + # the container should not rely on "$HOME" or "~" pointing to a path inside the container. + HOME: "${HOME:?Please set a value.}" + + CLP_HOME: "${CLP_HOME:?Please set a value.}" + CLP_PWD_HOST: "${CLP_PWD_HOST:-${PWD}}" + volumes: + # Docker daemon bridge + - type: "bind" + source: "${CLP_DOCKER_PLUGIN_DIR:-/usr/local/lib/docker/cli-plugins}" + target: "/usr/local/lib/docker/cli-plugins" + read_only: true + - type: "bind" + source: "${CLP_DOCKER_SOCK_PATH:-/var/run/docker.sock}" + target: "/var/run/docker.sock" + - type: "bind" + source: "/usr/bin/docker" + target: "/usr/bin/docker" + read_only: true + + # Host filesystem + - type: "bind" + source: "${CLP_HOME:?Please set a value.}" + target: "${CLP_HOME:?Please set a value.}" + - type: "bind" + source: "/" + target: "/mnt/host" diff --git a/tools/docker-images/clp-package/Dockerfile b/tools/docker-images/clp-package/Dockerfile index 6728a5f0c9..6dc00016d1 100644 --- a/tools/docker-images/clp-package/Dockerfile +++ b/tools/docker-images/clp-package/Dockerfile @@ -27,6 +27,18 @@ RUN useradd --uid ${UID} --shell /bin/bash --home-dir ${CLP_HOME} ${USER} USER ${USER} WORKDIR ${CLP_HOME} -COPY --link --chown=${UID} ./build/clp-package ${CLP_HOME} +COPY --link --chown=${UID} ./components/package-template/src/ . +COPY --link --chown=${UID} ./build/core/clg bin/ +COPY --link --chown=${UID} ./build/core/clo bin/ +COPY --link --chown=${UID} ./build/core/clp bin/ +COPY --link --chown=${UID} ./build/core/clp-s bin/ +COPY --link --chown=${UID} ./build/core/indexer bin/ +COPY --link --chown=${UID} ./build/core/log-converter bin/ +COPY --link --chown=${UID} ./build/core/reducer-server bin/ COPY --link --chown=${UID} ./build/deps/cpp/mariadb-connector-cpp-install/lib/*/libmariadbcpp.so* \ - ${CLP_HOME}/lib/ + lib/ +COPY --link --chown=${UID} ./build/spider/spider-build/src/spider/spider_scheduler bin/ +COPY --link --chown=${UID} ./build/spider/spider-build/src/spider/spider_worker bin/ +COPY --link --chown=${UID} ./build/nodejs-22/bin/node bin/node-22 +COPY --link --chown=${UID} ./build/python-libs/ lib/python3/site-packages/ +COPY --link --chown=${UID} ./build/webui/ var/www/webui/