-
Notifications
You must be signed in to change notification settings - Fork 83
feat(clp-package)!: Containerize the scripts in sbin via Docker Compose; Remove redundant assets from the package (resolves #1358).
#1512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
00efbc1
ed3dde2
d307457
f871424
b03539a
1b65afe
3b4d44c
347b181
74a3921
885e9b5
0db2f30
695ed77
4f21e0f
a7c81a3
e6e8217
be2d200
d910944
163921b
3f5c302
65d421f
83c581c
e03a361
299795f
0ccd155
beacdcf
c7141e7
25c2450
0ac4501
479fc81
9921d48
a2dd5cf
8d6deb6
c6c5164
3b44945
4b6f337
bb90b69
ae825c0
9a66669
6983943
e372661
bee1b79
b0e8abf
5172963
bb5d8d2
95a8e1f
395e42f
3af2e4d
198f398
1d02051
3063e85
306997a
4f6c5b4
ef627c6
75ada35
969fde0
732dc92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| get_datasets_table_name, | ||
| get_files_table_name, | ||
| ) | ||
| from clp_py_utils.core import resolve_host_path | ||
|
|
||
| from clp_package_utils.general import ( | ||
| check_docker_dependencies, | ||
|
|
@@ -119,9 +120,12 @@ def _set_up_env_for_database(self) -> EnvVarsDict: | |
| logs_dir = self._clp_config.logs_directory / component_name | ||
| validate_db_config(self._clp_config, conf_logging_file, data_dir, logs_dir) | ||
|
|
||
| data_dir.mkdir(exist_ok=True, parents=True) | ||
| logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(data_dir, logs_dir) | ||
| resolved_data_dir = resolve_host_path(data_dir) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
|
|
||
| resolved_data_dir.mkdir(exist_ok=True, parents=True) | ||
| resolved_logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -166,8 +170,10 @@ def _set_up_env_for_queue(self) -> EnvVarsDict: | |
| logs_dir = self._clp_config.logs_directory / component_name | ||
| validate_queue_config(self._clp_config, logs_dir) | ||
|
|
||
| logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(logs_dir) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
|
|
||
| resolved_logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(resolved_logs_dir) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -204,9 +210,12 @@ def _set_up_env_for_redis(self) -> EnvVarsDict: | |
| logs_dir = self._clp_config.logs_directory / component_name | ||
| validate_redis_config(self._clp_config, conf_file, data_dir, logs_dir) | ||
|
|
||
| data_dir.mkdir(exist_ok=True, parents=True) | ||
| logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(data_dir, logs_dir) | ||
| resolved_data_dir = resolve_host_path(data_dir) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
|
|
||
| resolved_data_dir.mkdir(exist_ok=True, parents=True) | ||
| resolved_logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -252,9 +261,12 @@ def _set_up_env_for_results_cache(self) -> EnvVarsDict: | |
| logs_dir = self._clp_config.logs_directory / component_name | ||
| validate_results_cache_config(self._clp_config, conf_file, data_dir, logs_dir) | ||
|
|
||
| data_dir.mkdir(exist_ok=True, parents=True) | ||
| logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(data_dir, logs_dir) | ||
| resolved_data_dir = resolve_host_path(data_dir) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
|
|
||
| resolved_data_dir.mkdir(exist_ok=True, parents=True) | ||
| resolved_logs_dir.mkdir(exist_ok=True, parents=True) | ||
| _chown_paths_if_root(resolved_data_dir, resolved_logs_dir) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -291,7 +303,8 @@ def _set_up_env_for_compression_scheduler(self) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -314,7 +327,8 @@ def _set_up_env_for_query_scheduler(self) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -336,7 +350,8 @@ def _set_up_env_for_compression_worker(self, num_workers: int) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -365,7 +380,8 @@ def _set_up_env_for_query_worker(self, num_workers: int) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -392,7 +408,8 @@ def _set_up_env_for_reducer(self, num_workers: int) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -426,8 +443,12 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: | |
| server_settings_json_path = ( | ||
| self._clp_home / "var" / "www" / "webui" / "server" / "dist" / "settings.json" | ||
| ) | ||
| resolved_client_settings_json_path = resolve_host_path(client_settings_json_path) | ||
| resolved_server_settings_json_path = resolve_host_path(server_settings_json_path) | ||
| validate_webui_config( | ||
| self._clp_config, client_settings_json_path, server_settings_json_path | ||
| self._clp_config, | ||
| client_settings_json_path, | ||
| server_settings_json_path, | ||
| ) | ||
|
|
||
| # Read, update, and write back client's and server's settings.json | ||
|
|
@@ -455,9 +476,9 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: | |
| "SqlDbCompressionJobsTableName": COMPRESSION_JOBS_TABLE_NAME, | ||
| } | ||
| client_settings_json = self._read_and_update_settings_json( | ||
| client_settings_json_path, client_settings_json_updates | ||
| resolved_client_settings_json_path, client_settings_json_updates | ||
| ) | ||
| with open(client_settings_json_path, "w") as client_settings_json_file: | ||
| with open(resolved_client_settings_json_path, "w") as client_settings_json_file: | ||
| client_settings_json_file.write(json.dumps(client_settings_json)) | ||
|
Comment on lines
+479
to
482
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Prefer json.dump with indentation for readability. Small DX improvement for troubleshooting settings. - with open(resolved_client_settings_json_path, "w") as client_settings_json_file:
- client_settings_json_file.write(json.dumps(client_settings_json))
+ with open(resolved_client_settings_json_path, "w") as client_settings_json_file:
+ json.dump(client_settings_json, client_settings_json_file, indent=2, sort_keys=True)
@@
- with open(resolved_server_settings_json_path, "w") as settings_json_file:
- settings_json_file.write(json.dumps(server_settings_json))
+ with open(resolved_server_settings_json_path, "w") as settings_json_file:
+ json.dump(server_settings_json, settings_json_file, indent=2, sort_keys=True)Also applies to: 536-537 🤖 Prompt for AI Agents |
||
|
|
||
| server_settings_json_updates = { | ||
|
|
@@ -510,9 +531,9 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict: | |
| server_settings_json_updates["PrestoPort"] = None | ||
|
|
||
| server_settings_json = self._read_and_update_settings_json( | ||
| server_settings_json_path, server_settings_json_updates | ||
| resolved_server_settings_json_path, server_settings_json_updates | ||
| ) | ||
| with open(server_settings_json_path, "w") as settings_json_file: | ||
| with open(resolved_server_settings_json_path, "w") as settings_json_file: | ||
| settings_json_file.write(json.dumps(server_settings_json)) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
@@ -544,7 +565,8 @@ def _set_up_env_for_mcp_server(self) -> EnvVarsDict: | |
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| validate_mcp_server_config(self._clp_config, logs_dir) | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -581,7 +603,8 @@ def _set_up_env_for_garbage_collector(self) -> EnvVarsDict: | |
| logger.info(f"Setting up environment for {component_name}...") | ||
|
|
||
| logs_dir = self._clp_config.logs_directory / component_name | ||
| logs_dir.mkdir(parents=True, exist_ok=True) | ||
| resolved_logs_dir = resolve_host_path(logs_dir) | ||
| resolved_logs_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| env_vars = EnvVarsDict() | ||
|
|
||
|
|
@@ -696,8 +719,10 @@ def set_up_env(self) -> None: | |
| env_vars["CLP_LOGS_INPUT_DIR_HOST"] = str(self._clp_config.logs_input.directory) | ||
|
|
||
| # Output config | ||
| archive_output_dir_str = str(self._clp_config.archive_output.get_directory()) | ||
| stream_output_dir_str = str(self._clp_config.stream_output.get_directory()) | ||
| archive_output_dir = self._clp_config.archive_output.get_directory() | ||
| stream_output_dir = self._clp_config.stream_output.get_directory() | ||
| archive_output_dir_str = str(archive_output_dir) | ||
| stream_output_dir_str = str(stream_output_dir) | ||
| if self._clp_config.archive_output.storage.type == StorageType.FS: | ||
| env_vars["CLP_ARCHIVE_OUTPUT_DIR_HOST"] = archive_output_dir_str | ||
| if self._clp_config.archive_output.storage.type == StorageType.S3: | ||
|
|
@@ -804,13 +829,14 @@ def get_or_create_instance_id(clp_config: CLPConfig) -> str: | |
| :return: The instance ID. | ||
| """ | ||
| instance_id_file_path = clp_config.logs_directory / "instance-id" | ||
| resolved_instance_id_file_path = resolve_host_path(instance_id_file_path) | ||
|
|
||
| if instance_id_file_path.exists(): | ||
| with open(instance_id_file_path, "r") as f: | ||
| if resolved_instance_id_file_path.exists(): | ||
| with open(resolved_instance_id_file_path, "r") as f: | ||
| instance_id = f.readline() | ||
| else: | ||
| instance_id = str(uuid.uuid4())[-4:] | ||
| with open(instance_id_file_path, "w") as f: | ||
| with open(resolved_instance_id_file_path, "w") as f: | ||
| f.write(instance_id) | ||
|
|
||
| return instance_id | ||
|
Comment on lines
841
to
852
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Trim minor style issue and reduce collision risk for instance IDs.
- if resolved_instance_id_file_path.exists():
- with open(resolved_instance_id_file_path, "r") as f:
+ if resolved_instance_id_file_path.exists():
+ with open(resolved_instance_id_file_path) as f:
instance_id = f.readline()
else:
- instance_id = str(uuid.uuid4())[-4:]
+ instance_id = uuid.uuid4().hex[-8:]
with open(resolved_instance_id_file_path, "w") as f:
f.write(instance_id)🧰 Tools🪛 Ruff (0.14.2)835-835: Unnecessary mode argument Remove mode argument (UP015) 🤖 Prompt for AI Agents |
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -38,6 +38,7 @@ | |||||||||||||||||||||||||||||||||||||
| get_config_value, | ||||||||||||||||||||||||||||||||||||||
| make_config_path_absolute, | ||||||||||||||||||||||||||||||||||||||
| read_yaml_config_file, | ||||||||||||||||||||||||||||||||||||||
| resolve_host_path, | ||||||||||||||||||||||||||||||||||||||
| validate_path_could_be_dir, | ||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||
| from strenum import KebabCaseStrEnum | ||||||||||||||||||||||||||||||||||||||
|
|
@@ -355,7 +356,8 @@ def dump_container_config( | |||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
| config_file_path_on_host = clp_config.logs_directory / config_filename | ||||||||||||||||||||||||||||||||||||||
| config_file_path_on_container = container_clp_config.logs_directory / config_filename | ||||||||||||||||||||||||||||||||||||||
| with open(config_file_path_on_host, "w") as f: | ||||||||||||||||||||||||||||||||||||||
| resolved_config_file_path_on_host = resolve_host_path(config_file_path_on_host) | ||||||||||||||||||||||||||||||||||||||
| with open(resolved_config_file_path_on_host, "w") as f: | ||||||||||||||||||||||||||||||||||||||
| yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
| resolved_config_file_path_on_host = resolve_host_path(config_file_path_on_host) | |
| with open(resolved_config_file_path_on_host, "w") as f: | |
| yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) | |
| resolved_config_file_path_on_host = resolve_host_path(config_file_path_on_host) | |
| resolved_config_file_path_on_host.parent.mkdir(parents=True, exist_ok=True) | |
| with open(resolved_config_file_path_on_host, "w") as f: | |
| yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) |
🤖 Prompt for AI Agents
In components/clp-package-utils/clp_package_utils/general.py around lines 359 to
361, the code opens the resolved config file path for writing without ensuring
its parent directory exists; create the parent directory before opening the file
by resolving the path (use Path or os.path), computing parent_dir =
resolved_path.parent (or os.path.dirname), and calling os.makedirs(parent_dir,
exist_ok=True) (skip if parent_dir is empty or root), then open and write the
YAML as before so first-run writes won't fail when the logs dir hierarchy is
missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also mkdir for generated default credentials file.
Generating the default file will fail if its parent dir does not exist.
Apply this diff:
- resolved_credentials_file_path = resolve_host_path(credentials_file_path)
+ resolved_credentials_file_path = resolve_host_path(credentials_file_path)
@@
- generate_credentials_file(resolved_credentials_file_path)
+ resolved_credentials_file_path.parent.mkdir(parents=True, exist_ok=True)
+ generate_credentials_file(resolved_credentials_file_path)📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| if not resolved_credentials_file_path.exists(): | |
| if ( | |
| make_config_path_absolute(clp_home, CLP_DEFAULT_CREDENTIALS_FILE_PATH) | |
| == credentials_file_path | |
| and generate_default_file | |
| ): | |
| generate_credentials_file(credentials_file_path) | |
| generate_credentials_file(resolved_credentials_file_path) | |
| else: | |
| if not resolved_credentials_file_path.exists(): | |
| if ( | |
| make_config_path_absolute(clp_home, CLP_DEFAULT_CREDENTIALS_FILE_PATH) | |
| == credentials_file_path | |
| and generate_default_file | |
| ): | |
| resolved_credentials_file_path.parent.mkdir(parents=True, exist_ok=True) | |
| generate_credentials_file(resolved_credentials_file_path) | |
| else: |
🤖 Prompt for AI Agents
In components/clp-package-utils/clp_package_utils/general.py around lines 471 to
478, the code generates a default credentials file but doesn't ensure its parent
directory exists; before calling
generate_credentials_file(resolved_credentials_file_path) create the parent
directory for resolved_credentials_file_path (e.g.,
resolved_credentials_file_path.parent.mkdir(parents=True, exist_ok=True)) so
file creation won't fail when the directory is missing.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| StorageEngine, | ||
| StorageType, | ||
| ) | ||
| from clp_py_utils.core import resolve_host_path | ||
|
|
||
| from clp_package_utils.general import ( | ||
| CLPConfig, | ||
|
|
@@ -220,7 +221,6 @@ def main(argv: List[str]) -> int: | |
| ) | ||
|
|
||
| necessary_mounts: List[Optional[DockerMount]] = [ | ||
| mounts.clp_home, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mounts like this have to be removed, or they conflict with the contents inside the docker image.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, can you explain more?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, that comment was so unclear that it took me time to recall why it was done - on the host, clp_home is essentially the clp-package directory root. previously we were mounting the package root into the container so the python modules in |
||
| mounts.logs_dir, | ||
| mounts.archives_output_dir, | ||
| ] | ||
|
|
@@ -281,7 +281,8 @@ def main(argv: List[str]) -> int: | |
| logger.debug(f"Docker command failed: {shlex.join(cmd)}") | ||
|
|
||
| # Remove generated files | ||
| generated_config_path_on_host.unlink() | ||
| resolved_generated_config_path_on_host = resolve_host_path(generated_config_path_on_host) | ||
| resolved_generated_config_path_on_host.unlink() | ||
|
|
||
| return ret_code | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.