Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions components/clp-py-utils/clp_py_utils/clp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,13 @@
COMPRESSION_JOBS_TABLE_NAME = "compression_jobs"
COMPRESSION_TASKS_TABLE_NAME = "compression_tasks"

OS_RELEASE_FILE_PATH = pathlib.Path("etc") / "os-release"

CLP_DEFAULT_CREDENTIALS_FILE_PATH = pathlib.Path("etc") / "credentials.yml"
CLP_DEFAULT_DATA_DIRECTORY_PATH = pathlib.Path("var") / "data"
CLP_DEFAULT_DATASET_NAME = "default"
CLP_METADATA_TABLE_PREFIX = "clp_"
CLP_PACKAGE_IMAGE_ID_PATH = pathlib.Path("clp-package-image.id")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about CLP_PACKAGE_CONTAINER_IMAGE_ID_PATH? Similarly, how about _container_image_id_path?

CLP_SHARED_CONFIG_FILENAME = ".clp-config.yml"

CLP_VERSION_FILE_PATH = pathlib.Path("VERSION")

# Environment variable names
CLP_DB_USER_ENV_VAR_NAME = "CLP_DB_USER"
Expand Down Expand Up @@ -902,7 +901,8 @@ class CLPConfig(BaseModel):
logs_directory: pathlib.Path = pathlib.Path("var") / "log"
aws_config_directory: Optional[pathlib.Path] = None

_os_release_file_path: pathlib.Path = PrivateAttr(default=OS_RELEASE_FILE_PATH)
_image_id_path: pathlib.Path = PrivateAttr(default=CLP_PACKAGE_IMAGE_ID_PATH)
_version_file_path: pathlib.Path = PrivateAttr(default=CLP_VERSION_FILE_PATH)

@field_validator("aws_config_directory")
@classmethod
Expand All @@ -919,7 +919,8 @@ def make_config_paths_absolute(self, clp_home: pathlib.Path):
self.stream_output.storage.make_config_paths_absolute(clp_home)
self.data_directory = make_config_path_absolute(clp_home, self.data_directory)
self.logs_directory = make_config_path_absolute(clp_home, self.logs_directory)
self._os_release_file_path = make_config_path_absolute(clp_home, self._os_release_file_path)
self._image_id_path = make_config_path_absolute(clp_home, self._image_id_path)
self._version_file_path = make_config_path_absolute(clp_home, self._version_file_path)

def validate_logs_input_config(self):
logs_input_type = self.logs_input.type
Expand Down Expand Up @@ -1012,17 +1013,14 @@ def load_execution_container_name(self):
# Accept configured value for debug purposes
return

os_release = dotenv_values(self._os_release_file_path)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should have removed the dotenv_values import and related dependency installation in this PR. @coderabbitai file an issue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if dotenv_values is used anywhere else in the codebase
rg -n 'dotenv_values' --type=py

# Check for python-dotenv dependency in requirements files
fd -e txt -e in -e toml -e cfg --exec grep -Hn 'python-dotenv' {}

# Also check for pyproject.toml dependencies
fd pyproject.toml --exec grep -A 20 'dependencies' {}

Length of output: 1741


🏁 Script executed:

#!/bin/bash
# Check for dotenv usage specifically in the clp-py-utils component
rg -n 'dotenv' components/clp-py-utils/ --type=py

Length of output: 85


🏁 Script executed:

#!/bin/bash
# File an issue to track the removal of unused dotenv_values import and dependency
gh issue create \
  --title "Remove unused python-dotenv dependency from clp-py-utils" \
  --body "## Context
After PR #1357, the \`dotenv_values\` import is no longer used in \`components/clp-py-utils/clp_py_utils/clp_config.py\`.

## Tasks
- [ ] Remove the unused import: \`from dotenv import dotenv_values\` from \`components/clp-py-utils/clp_py_utils/clp_config.py\` (line 6)
- [ ] Remove the \`python-dotenv\` dependency from \`components/clp-py-utils/pyproject.toml\`

## Notes
- The \`python-dotenv\` dependency should remain in \`tools/deployment/presto-clp/scripts/requirements.txt\` as it's still used by \`tools/deployment/presto-clp/scripts/init.py\`

## References
- PR: https://github.com/y-scope/clp/pull/1357
- Comment: https://github.com/y-scope/clp/pull/1357#discussion_r2442446179
- Requested by: @junhaoliao" \
  --assignee junhaoliao

Length of output: 42


@junhaoliao, I've filed issue #1443 to track the removal of the unused python-dotenv dependency and import.

Summary:

  • The dotenv_values import in components/clp-py-utils/clp_py_utils/clp_config.py (line 6) is no longer used after this PR
  • The python-dotenv dependency in components/clp-py-utils/pyproject.toml can be removed
  • The dependency should remain in tools/deployment/presto-clp/scripts/requirements.txt since it's still actively used by tools/deployment/presto-clp/scripts/init.py

The issue has been assigned to you with the tasks and context needed for the cleanup.


🧬 Code Graph Analysis Results

components/clp-py-utils/clp_py_utils/clp_config.py

[Snippet 1]

  • Filename: components/clp-py-utils/clp_py_utils/clp_config.py

  • Line(s): 6-6

  • Content:
    from dotenv import dotenv_values

  • Rationale: This import is directly related to the user comment about removing dotenv_values usage and the related dependency installation.

if "ubuntu" == os_release["ID"]:
self.execution_container = (
f"clp-execution-x86-{os_release['ID']}-{os_release['VERSION_CODENAME']}:main"
)
else:
raise NotImplementedError(
f"Unsupported OS {os_release['ID']} in {OS_RELEASE_FILE_PATH}"
)
if self._image_id_path.exists():
with open(self._image_id_path) as image_id_file:
self.execution_container = image_id_file.read().strip()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we may change this attribute name to package_container though i feel the name execution_container is more intuitive to those who are unfamiliar with the history of the execution container

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about container_image_ref?


self.execution_container = "ghcr.io/y-scope/clp/" + self.execution_container
if self.execution_container == "":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.execution_container is None by default which means this branch won't get triggered unless the image ID file is empty, right? It also feels a little inconsistent to validate that the image ID file isn't empty, but we don't do the same for the version file.

I think we should expect both of them to have content and let Python throw an exception if they don't. So I would write this as:

        if self._image_id_path.exists():
            with open(self._image_id_path) as image_id_file:
                self.execution_container = image_id_file.read().strip()
        else:
            with open(self._version_file_path) as version_file:
                clp_package_version = version_file.read().strip()
            self.execution_container = f"ghcr.io/y-scope/clp/clp-package:{clp_package_version}"

with open(self._version_file_path) as version_file:
clp_package_version = version_file.read().strip()
self.execution_container = f"ghcr.io/y-scope/clp/clp-package:{clp_package_version}"

def get_shared_config_file_path(self) -> pathlib.Path:
return self.logs_directory / CLP_SHARED_CONFIG_FILENAME
Expand Down
2 changes: 1 addition & 1 deletion taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ vars:

tasks:
default:
deps: ["package"]
deps: ["docker-images:package"]

clean:
cmds:
Expand Down
1 change: 1 addition & 0 deletions taskfiles/docker-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ tasks:
- ":package"
cmds:
- "./build.sh"
- "rsync -a '{{.G_BUILD_DIR}}/clp-package-image.id' '{{.G_PACKAGE_BUILD_DIR}}'"
Loading