From 25371dbf455abf34972f401b0e4e8cebfa344fac Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:21:27 -0500 Subject: [PATCH 01/22] add and move generate_sandbox_dockerfile script to scripts dir --- scripts/generate_sandbox_dockerfile.py | 225 +++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100755 scripts/generate_sandbox_dockerfile.py diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py new file mode 100755 index 000000000..6d8382ee6 --- /dev/null +++ b/scripts/generate_sandbox_dockerfile.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 + +"""Script to generate Dockerfile.sandbox files for ZenML projects. + +This ensures consistency across all project Docker images. +""" + +import argparse +import os +import re +import sys +from pathlib import Path + +DOCKERFILE_TEMPLATE = """# Sandbox base image +FROM safoinext/zenml-sandbox:latest + +# Project metadata +LABEL project_name="{project_name}" +LABEL project_version="0.1.0" + +# Install project-specific dependencies +RUN pip install --no-cache-dir \\ +{dependencies} + +# Set workspace directory +WORKDIR /workspace + +# Clone only the project directory and reorganize +RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \\ + cp -r /tmp/zenml-projects/{project_name}/* /workspace/ && \\ + rm -rf /tmp/zenml-projects + +# Create a template .env file for API keys +RUN echo "{api_vars}" > .env + +# Create a .vscode directory and settings.json file +RUN mkdir -p /workspace/.vscode && \\ + echo '{{\\n'\\ + ' "workbench.colorTheme": "Default Dark Modern"\\n'\\ + '}}' > /workspace/.vscode/settings.json + +{env_vars_block} +""" + + +def format_env_key(key): + """Format environment variable placeholder text.""" + # Extract the service name from the key + service = key.split("_")[0] if "_" in key else key + # Special case handling + if key == "GOOGLE_APPLICATION_CREDENTIALS": + return f"{key}=PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE" + if key == "HF_TOKEN": + return f"{key}=YOUR_HUGGINGFACE_TOKEN_HERE" + return f"{key}=YOUR_{service}_KEY_HERE" + + +def parse_requirements(project_dir): + """Parse requirements.txt file if it exists.""" + req_file = Path(project_dir) / "requirements.txt" + if not req_file.exists(): + print(f"Warning: No requirements.txt found in {project_dir}") + return [] + + dependencies = [] + with open(req_file, "r") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + if line.startswith("polars"): + line = line.replace("polars", "polars-lts-cpu") + dependencies.append(line) + + return dependencies + + +def detect_api_keys(project_dir): + """Attempt to detect required API keys by scanning Python files.""" + api_patterns = { + # LLM Provider API Keys + "HF_TOKEN": r"huggingface|hf_token", + "OPENAI_API_KEY": r"openai|gpt", + "ANTHROPIC_API_KEY": r"anthropic|claude", + "MISTRAL_API_KEY": r"mistral|mistralai", + "GEMINI_API_KEY": r"gemini|google", + # ZenML-specific API Keys and Environment Variables + "ZENML_STORE_API_KEY": r"zenml.*api_key|zenml_store_api_key", + "ZENML_STORE_URL": r"zenml_store_url|zenml.*url", + "ZENML_PROJECT_SECRET_NAME": r"zenml.*secret|secret_name", + "ZENML_HF_USERNAME": r"zenml_hf_username|hf_username", + "ZENML_HF_SPACE_NAME": r"zenml_hf_space_name|hf_space_name", + # Monitoring and Logging + "LANGFUSE_PUBLIC_KEY": r"langfuse.*public", + "LANGFUSE_SECRET_KEY": r"langfuse.*secret", + "LANGFUSE_HOST": r"langfuse.*host", + # Vector Databases + "PINECONE_API_KEY": r"pinecone", + "SUPABASE_USER": r"supabase.*user", + "SUPABASE_PASSWORD": r"supabase.*password", + "SUPABASE_HOST": r"supabase.*host", + "SUPABASE_PORT": r"supabase.*port", + # Cloud Provider Keys + "AWS_ACCESS_KEY_ID": r"aws.*access|aws_access_key_id", + "AWS_SECRET_ACCESS_KEY": r"aws.*secret|aws_secret_access_key", + "AWS_SESSION_TOKEN": r"aws.*session|aws_session_token", + "AWS_REGION": r"aws.*region|aws_region", + "GOOGLE_APPLICATION_CREDENTIALS": r"google.*credentials", + # Other Service-Specific Keys + "FIFTYONE_LABELSTUDIO_API_KEY": r"fiftyone|labelstudio", + "NEPTUNE_API_TOKEN": r"neptune", + "GH_ACCESS_TOKEN": r"gh_access_token|github", + } + + detected_keys = [] + + for py_file in Path(project_dir).glob("**/*.py"): + with open(py_file, "r", encoding="utf-8", errors="ignore") as f: + content = f.read().lower() + for key, pattern in api_patterns.items(): + if re.search(pattern, content): + detected_keys.append(key) + + # Remove duplicates + detected_keys = list(set(detected_keys)) + + if not detected_keys: + detected_keys = ["API_KEY=YOUR_API_KEY_HERE"] + + return [format_env_key(key) for key in detected_keys] + + +def detect_env_variables(project_dir, dependencies): + """Detect which environment variables are needed based on dependencies and content.""" + env_vars = [] + + # Only add POLARS_SKIP_CPU_CHECK if any polars package is in dependencies + if any("polars" in dep.lower() for dep in dependencies): + env_vars.append("POLARS_SKIP_CPU_CHECK=1") + + # Only add TOKENIZERS_PARALLELISM if transformers or tokenizers is used + if any( + dep.lower().startswith(("transform", "token")) for dep in dependencies + ): + env_vars.append("TOKENIZERS_PARALLELISM=false") + + # These are development convenience variables - could be made optional + # env_vars.append("PYTHONUNBUFFERED=1") + # env_vars.append("PYTHONDONTWRITEBYTECODE=1") + + return env_vars + + +def generate_dockerfile(project_name, output_dir=None): + """Generate a Dockerfile.sandbox for the specified project.""" + if output_dir is None: + output_dir = project_name + + project_dir = Path(output_dir) + if not project_dir.exists(): + print(f"Error: Project directory {project_dir} not found") + return False + + # Get dependencies + dependencies = parse_requirements(project_dir) + if dependencies: + formatted_deps = "\n".join( + f' "{dep}" \\' for dep in dependencies[:-1] + ) + if formatted_deps: + formatted_deps += f'\n "{dependencies[-1]}"' + else: + formatted_deps = f' "{dependencies[-1]}"' + else: + formatted_deps = "" + + # Detect API keys + api_vars = detect_api_keys(project_dir) + formatted_api_vars = '" && \\\n echo "'.join(api_vars) + + env_vars = detect_env_variables(project_dir, dependencies) + env_vars_block = "" + if env_vars: + env_vars_block = ( + "\n# Set environment variables for compatibility and performance" + ) + for var in env_vars: + env_vars_block += f"\nENV {var}" + + # Generate Dockerfile content + dockerfile_content = DOCKERFILE_TEMPLATE.format( + project_name=project_name, + dependencies=formatted_deps, + api_vars=formatted_api_vars, + env_vars_block=env_vars_block, + ) + + # Write Dockerfile + dockerfile_path = project_dir / "Dockerfile.sandbox" + with open(dockerfile_path, "w") as f: + f.write(dockerfile_content) + + print( + f"Generated Dockerfile.sandbox for {project_name} at {dockerfile_path}" + ) + return True + + +def main(): + """Main function to parse arguments and generate Dockerfile.sandbox.""" + parser = argparse.ArgumentParser( + description="Generate Dockerfile.sandbox for ZenML projects" + ) + parser.add_argument("project", help="Project name") + parser.add_argument( + "--output-dir", help="Output directory (defaults to project name)" + ) + + args = parser.parse_args() + + success = generate_dockerfile(args.project, args.output_dir) + return 0 if success else 1 + + +if __name__ == "__main__": + sys.exit(main()) From ebae3c61fe9380b63b1046691f088ff4de9a3149 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:21:54 -0500 Subject: [PATCH 02/22] move generate_enml_project.py to scripts dir --- generate_zenml_project.py => scripts/generate_zenml_project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename generate_zenml_project.py => scripts/generate_zenml_project.py (99%) diff --git a/generate_zenml_project.py b/scripts/generate_zenml_project.py similarity index 99% rename from generate_zenml_project.py rename to scripts/generate_zenml_project.py index df22b2819..ee5a38ce6 100644 --- a/generate_zenml_project.py +++ b/scripts/generate_zenml_project.py @@ -9,7 +9,7 @@ def get_hello_world_str(): return dedent( - f"""\ + """\ import logging def main(): From 8104d8e99b08da44602bd5a16adae4ca6a960bea Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:29:00 -0500 Subject: [PATCH 03/22] update function to set project name in Dockerfile to base name, and not a path --- scripts/generate_sandbox_dockerfile.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 6d8382ee6..e472c4bb4 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -150,10 +150,12 @@ def detect_env_variables(project_dir, dependencies): return env_vars -def generate_dockerfile(project_name, output_dir=None): +def generate_dockerfile(project_path, output_dir=None): """Generate a Dockerfile.sandbox for the specified project.""" if output_dir is None: - output_dir = project_name + output_dir = project_path + + base_project_name = os.path.basename(project_path) project_dir = Path(output_dir) if not project_dir.exists(): @@ -188,7 +190,7 @@ def generate_dockerfile(project_name, output_dir=None): # Generate Dockerfile content dockerfile_content = DOCKERFILE_TEMPLATE.format( - project_name=project_name, + project_name=base_project_name, dependencies=formatted_deps, api_vars=formatted_api_vars, env_vars_block=env_vars_block, @@ -200,7 +202,7 @@ def generate_dockerfile(project_name, output_dir=None): f.write(dockerfile_content) print( - f"Generated Dockerfile.sandbox for {project_name} at {dockerfile_path}" + f"Generated Dockerfile.sandbox for {base_project_name} at {dockerfile_path}" ) return True From 99ba7842ad81042a8d8622d44da23e5dd7bf816e Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:29:46 -0500 Subject: [PATCH 04/22] generate Dockerfile for oncoclear as an example --- oncoclear/Dockerfile.sandbox | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 oncoclear/Dockerfile.sandbox diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.sandbox new file mode 100644 index 000000000..2024a9287 --- /dev/null +++ b/oncoclear/Dockerfile.sandbox @@ -0,0 +1,35 @@ +# Sandbox base image +FROM safoinext/zenml-sandbox:latest + +# Project metadata +LABEL project_name="oncoclear" +LABEL project_version="0.1.0" + +# Install project-specific dependencies +RUN pip install --no-cache-dir \ + "zenml[server]>=0.50.0" \ + "notebook" \ + "scikit-learn" \ + "pyarrow" \ + "pandas" + +# Set workspace directory +WORKDIR /workspace + +# Clone only the project directory and reorganize +RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \ + cp -r /tmp/zenml-projects/oncoclear/* /workspace/ && \ + rm -rf /tmp/zenml-projects + +# Create a template .env file for API keys +RUN echo "ZENML_PROJECT_SECRET_NAME=YOUR_ZENML_KEY_HERE" && \ + echo "ZENML_STORE_URL=YOUR_ZENML_KEY_HERE" && \ + echo "ZENML_STORE_API_KEY=YOUR_ZENML_KEY_HERE" > .env + +# Create a .vscode directory and settings.json file +RUN mkdir -p /workspace/.vscode && \ + echo '{\n'\ + ' "workbench.colorTheme": "Default Dark Modern"\n'\ + '}' > /workspace/.vscode/settings.json + + From 398c4e40657275fb0b562708e8ada5c0b0946870 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:36:23 -0500 Subject: [PATCH 05/22] update docker parent image to zenmldocker/zenml-sandbox --- scripts/generate_sandbox_dockerfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index e472c4bb4..640988a0a 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -12,7 +12,7 @@ from pathlib import Path DOCKERFILE_TEMPLATE = """# Sandbox base image -FROM safoinext/zenml-sandbox:latest +FROM zenmldocker/zenml-sandbox:latest # Project metadata LABEL project_name="{project_name}" From bfd8c233a36af8d19b5b6f9ae419e53fab850cf0 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 16 Apr 2025 08:54:28 -0500 Subject: [PATCH 06/22] use complete env variable key in template --- omni-reader/Dockerfile.sandbox | 39 ++++++++++++++------------ oncoclear/Dockerfile.sandbox | 8 +++--- scripts/generate_sandbox_dockerfile.py | 7 ++--- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/omni-reader/Dockerfile.sandbox b/omni-reader/Dockerfile.sandbox index 24301cee7..78b5985b6 100644 --- a/omni-reader/Dockerfile.sandbox +++ b/omni-reader/Dockerfile.sandbox @@ -1,44 +1,47 @@ # Sandbox base image -FROM safoinext/zenml-sandbox:latest +FROM zenmldocker/zenml-sandbox:latest + +# Project metadata +LABEL project_name="omni-reader" +LABEL project_version="0.1.0" # Install project-specific dependencies -# Install polars-lts-cpu instead of polars (version compiled for CPU compatibility) RUN pip install --no-cache-dir \ - "instructor==1.7.7" \ - "jiwer==3.0.5" \ - "jiter==0.8.2" \ + "instructor" \ + "jiwer" \ + "jiter" \ "importlib-metadata<7.0,>=1.4.0" \ - "litellm==1.64.1" \ + "litellm" \ "mistralai==1.0.3" \ "numpy<2.0,>=1.9.0" \ "openai==1.69.0" \ "Pillow==11.1.0" \ "polars-lts-cpu==1.26.0" \ "pyarrow>=7.0.0" \ - "python-dotenv==1.0.1" \ + "python-dotenv" \ "streamlit==1.44.0" \ "pydantic>=2.8.2,<2.9.0" \ "tqdm==4.66.4" \ - "zenml>=0.80.0" \ - uv + "zenml>=0.80.0" # Set workspace directory WORKDIR /workspace -# Clone only the omni-reader directory and reorganize +# Clone only the project directory and reorganize RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \ cp -r /tmp/zenml-projects/omni-reader/* /workspace/ && \ rm -rf /tmp/zenml-projects # Create a template .env file for API keys -RUN echo "OPENAI_API_KEY=YOUR_OPENAI_API_KEY_HERE" > .env && \ - echo "MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY_HERE" >> .env +RUN echo "OPENAI_API_KEY=YOUR_OPENAI_API_KEY" && \ + echo "MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY" > .env -# Create a .vscode directory (mainly to auto-apply the dark theme) -RUN mkdir -p /workspace/.vscode -# Copy settings file -COPY settings.json /workspace/.vscode/settings.json +# Create a .vscode directory and settings.json file +RUN mkdir -p /workspace/.vscode && \ + echo '{\n'\ + ' "workbench.colorTheme": "Default Dark Modern"\n'\ + '}' > /workspace/.vscode/settings.json -# Set environment variable to skip CPU checks for Polars as a fallback -ENV POLARS_SKIP_CPU_CHECK=1 +# Set environment variables for compatibility and performance +ENV POLARS_SKIP_CPU_CHECK=1 diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.sandbox index 2024a9287..865884ac2 100644 --- a/oncoclear/Dockerfile.sandbox +++ b/oncoclear/Dockerfile.sandbox @@ -1,5 +1,5 @@ # Sandbox base image -FROM safoinext/zenml-sandbox:latest +FROM zenmldocker/zenml-sandbox:latest # Project metadata LABEL project_name="oncoclear" @@ -22,9 +22,9 @@ RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenm rm -rf /tmp/zenml-projects # Create a template .env file for API keys -RUN echo "ZENML_PROJECT_SECRET_NAME=YOUR_ZENML_KEY_HERE" && \ - echo "ZENML_STORE_URL=YOUR_ZENML_KEY_HERE" && \ - echo "ZENML_STORE_API_KEY=YOUR_ZENML_KEY_HERE" > .env +RUN echo "ZENML_PROJECT_SECRET_NAME=YOUR_ZENML_PROJECT_SECRET_NAME" && \ + echo "ZENML_STORE_URL=YOUR_ZENML_STORE_URL" && \ + echo "ZENML_STORE_API_KEY=YOUR_ZENML_STORE_API_KEY" > .env # Create a .vscode directory and settings.json file RUN mkdir -p /workspace/.vscode && \ diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 640988a0a..028d08923 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -45,14 +45,11 @@ def format_env_key(key): """Format environment variable placeholder text.""" - # Extract the service name from the key - service = key.split("_")[0] if "_" in key else key # Special case handling if key == "GOOGLE_APPLICATION_CREDENTIALS": return f"{key}=PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE" - if key == "HF_TOKEN": - return f"{key}=YOUR_HUGGINGFACE_TOKEN_HERE" - return f"{key}=YOUR_{service}_KEY_HERE" + + return f"{key}=YOUR_{key}" def parse_requirements(project_dir): From 49384936820f423cecb1a3c208ea8fd484aa7e87 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Fri, 18 Apr 2025 10:47:22 -0500 Subject: [PATCH 07/22] update dockerfile generator script to use uv, handle pyproject.toml, add typings + docs --- scripts/generate_sandbox_dockerfile.py | 359 +++++++++++++------------ 1 file changed, 183 insertions(+), 176 deletions(-) diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 028d08923..2cdd239aa 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -1,224 +1,231 @@ #!/usr/bin/env python3 -"""Script to generate Dockerfile.sandbox files for ZenML projects. - -This ensures consistency across all project Docker images. -""" +"""Generate Dockerfile.sandbox for ZenML projects.""" import argparse -import os import re import sys from pathlib import Path -DOCKERFILE_TEMPLATE = """# Sandbox base image +import tomli + +# Dockerfile template +DOCKER_TEMPLATE = """# Sandbox base image FROM zenmldocker/zenml-sandbox:latest # Project metadata -LABEL project_name="{project_name}" +LABEL project_name="{name}" LABEL project_version="0.1.0" -# Install project-specific dependencies -RUN pip install --no-cache-dir \\ -{dependencies} +{deps} # Set workspace directory WORKDIR /workspace # Clone only the project directory and reorganize RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \\ - cp -r /tmp/zenml-projects/{project_name}/* /workspace/ && \\ + cp -r /tmp/zenml-projects/{name}/* /workspace/ && \\ rm -rf /tmp/zenml-projects -# Create a template .env file for API keys -RUN echo "{api_vars}" > .env - -# Create a .vscode directory and settings.json file +# VSCode settings RUN mkdir -p /workspace/.vscode && \\ - echo '{{\\n'\\ - ' "workbench.colorTheme": "Default Dark Modern"\\n'\\ - '}}' > /workspace/.vscode/settings.json + printf '{{\\n "workbench.colorTheme": "Default Dark Modern"\\n}}' > /workspace/.vscode/settings.json -{env_vars_block} +{env_block} """ +# Patterns to detect environment variables in code +ENV_PATTERN = re.compile( + r"os\.(?:getenv|environ(?:\[|\\.get))\(['\"]([A-Za-z0-9_]+)['\"]\)" +) +DOTENV_PATTERN = re.compile( + r"(?:load_dotenv|dotenv).*?['\"]([A-Za-z0-9_]+)['\"]" +) + + +def replace_polars(dep: str) -> str: + """Replaces 'polars' with 'polars-lts-cpu', a CPU-optimized LTS version for container environments.""" + return ( + dep.replace("polars", "polars-lts-cpu") + if dep.startswith("polars") + else dep + ) -def format_env_key(key): - """Format environment variable placeholder text.""" - # Special case handling - if key == "GOOGLE_APPLICATION_CREDENTIALS": - return f"{key}=PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE" - - return f"{key}=YOUR_{key}" +def parse_requirements(project_dir: Path) -> list[str]: + """Parse requirements.txt and apply LTS replacement for Polars. -def parse_requirements(project_dir): - """Parse requirements.txt file if it exists.""" - req_file = Path(project_dir) / "requirements.txt" + Replaces 'polars' with 'polars-lts-cpu', a CPU-optimized LTS version for container environments. + """ + req_file = project_dir / "requirements.txt" if not req_file.exists(): - print(f"Warning: No requirements.txt found in {project_dir}") return [] + deps = [] + for line in req_file.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#"): + deps.append(replace_polars(line)) + return deps - dependencies = [] - with open(req_file, "r") as f: - for line in f: - line = line.strip() - if line and not line.startswith("#"): - if line.startswith("polars"): - line = line.replace("polars", "polars-lts-cpu") - dependencies.append(line) - - return dependencies - - -def detect_api_keys(project_dir): - """Attempt to detect required API keys by scanning Python files.""" - api_patterns = { - # LLM Provider API Keys - "HF_TOKEN": r"huggingface|hf_token", - "OPENAI_API_KEY": r"openai|gpt", - "ANTHROPIC_API_KEY": r"anthropic|claude", - "MISTRAL_API_KEY": r"mistral|mistralai", - "GEMINI_API_KEY": r"gemini|google", - # ZenML-specific API Keys and Environment Variables - "ZENML_STORE_API_KEY": r"zenml.*api_key|zenml_store_api_key", - "ZENML_STORE_URL": r"zenml_store_url|zenml.*url", - "ZENML_PROJECT_SECRET_NAME": r"zenml.*secret|secret_name", - "ZENML_HF_USERNAME": r"zenml_hf_username|hf_username", - "ZENML_HF_SPACE_NAME": r"zenml_hf_space_name|hf_space_name", - # Monitoring and Logging - "LANGFUSE_PUBLIC_KEY": r"langfuse.*public", - "LANGFUSE_SECRET_KEY": r"langfuse.*secret", - "LANGFUSE_HOST": r"langfuse.*host", - # Vector Databases - "PINECONE_API_KEY": r"pinecone", - "SUPABASE_USER": r"supabase.*user", - "SUPABASE_PASSWORD": r"supabase.*password", - "SUPABASE_HOST": r"supabase.*host", - "SUPABASE_PORT": r"supabase.*port", - # Cloud Provider Keys - "AWS_ACCESS_KEY_ID": r"aws.*access|aws_access_key_id", - "AWS_SECRET_ACCESS_KEY": r"aws.*secret|aws_secret_access_key", - "AWS_SESSION_TOKEN": r"aws.*session|aws_session_token", - "AWS_REGION": r"aws.*region|aws_region", - "GOOGLE_APPLICATION_CREDENTIALS": r"google.*credentials", - # Other Service-Specific Keys - "FIFTYONE_LABELSTUDIO_API_KEY": r"fiftyone|labelstudio", - "NEPTUNE_API_TOKEN": r"neptune", - "GH_ACCESS_TOKEN": r"gh_access_token|github", - } - - detected_keys = [] - - for py_file in Path(project_dir).glob("**/*.py"): - with open(py_file, "r", encoding="utf-8", errors="ignore") as f: - content = f.read().lower() - for key, pattern in api_patterns.items(): - if re.search(pattern, content): - detected_keys.append(key) - - # Remove duplicates - detected_keys = list(set(detected_keys)) - - if not detected_keys: - detected_keys = ["API_KEY=YOUR_API_KEY_HERE"] - - return [format_env_key(key) for key in detected_keys] - - -def detect_env_variables(project_dir, dependencies): - """Detect which environment variables are needed based on dependencies and content.""" - env_vars = [] - - # Only add POLARS_SKIP_CPU_CHECK if any polars package is in dependencies - if any("polars" in dep.lower() for dep in dependencies): - env_vars.append("POLARS_SKIP_CPU_CHECK=1") - - # Only add TOKENIZERS_PARALLELISM if transformers or tokenizers is used - if any( - dep.lower().startswith(("transform", "token")) for dep in dependencies - ): - env_vars.append("TOKENIZERS_PARALLELISM=false") - - # These are development convenience variables - could be made optional - # env_vars.append("PYTHONUNBUFFERED=1") - # env_vars.append("PYTHONDONTWRITEBYTECODE=1") - - return env_vars - - -def generate_dockerfile(project_path, output_dir=None): - """Generate a Dockerfile.sandbox for the specified project.""" - if output_dir is None: - output_dir = project_path - - base_project_name = os.path.basename(project_path) - - project_dir = Path(output_dir) - if not project_dir.exists(): - print(f"Error: Project directory {project_dir} not found") - return False - # Get dependencies - dependencies = parse_requirements(project_dir) - if dependencies: - formatted_deps = "\n".join( - f' "{dep}" \\' for dep in dependencies[:-1] - ) - if formatted_deps: - formatted_deps += f'\n "{dependencies[-1]}"' +def parse_pyproject(project_dir: Path) -> list[str]: + """Parse pyproject.toml supporting PEP 621, Poetry, and PDM; replace Polars with its LTS CPU version. + + Supports dependencies under [project.dependencies], [tool.poetry.dependencies], and [tool.pdm.dependencies]. + """ + file = project_dir / "pyproject.toml" + if not file.exists(): + return [] + try: + data = tomli.loads(file.read_bytes()) + # PEP 621 + if deps := data.get("project", {}).get("dependencies"): # type: ignore + raw = deps + # Poetry + elif ( + poetry := data.get("tool", {}) + .get("poetry", {}) + .get("dependencies") + ): # type: ignore + raw = [ + f"{n}=={v}" if isinstance(v, str) else n + for n, v in poetry.items() + if n != "python" + ] + # PDM + elif pdm := data.get("tool", {}).get("pdm", {}).get("dependencies"): # type: ignore + raw = pdm else: - formatted_deps = f' "{dependencies[-1]}"' - else: - formatted_deps = "" + return [] + return [replace_polars(d) for d in raw] + except Exception as e: + print(f"Warning: pyproject.toml parse error: {e}") + return [] - # Detect API keys - api_vars = detect_api_keys(project_dir) - formatted_api_vars = '" && \\\n echo "'.join(api_vars) - env_vars = detect_env_variables(project_dir, dependencies) - env_vars_block = "" - if env_vars: - env_vars_block = ( - "\n# Set environment variables for compatibility and performance" +def get_dependencies(project_dir: Path, use_uv: bool) -> str: + """Aggregate dependencies from requirements or pyproject and format the install block. + + Includes a warning if no dependencies are found. + """ + deps = parse_requirements(project_dir) or parse_pyproject(project_dir) + if not deps: + print(f"Warning: no dependencies found in {project_dir}") + return "# No dependencies found" + # build install commands + lines = [] + lines.append("# Install dependencies") + if use_uv: + lines.append("RUN pip install uv") + lines.append("RUN uv pip install --system \\") + else: + lines.append("RUN pip install --no-cache-dir \\") + lines += [f' "{d}" \\' for d in deps[:-1]] + [f' "{deps[-1]}"'] + return "\n".join(lines) + + +def find_env_keys(project_dir: Path) -> set[str]: + """Detect environment variable keys from .env and Python source files. + + Scans .env for explicit keys and searches code for os.getenv, os.environ, and dotenv references. + Defaults to {'API_KEY'} if none found. + """ + keys = set() + env_file = project_dir / ".env" + if env_file.exists(): + for line in env_file.read_text(encoding="utf-8").splitlines(): + if line and not line.startswith("#") and "=" in line: + keys.add(line.split("=", 1)[0].strip()) + for py in project_dir.rglob("*.py"): + txt = py.read_text(errors="ignore") + keys |= set(ENV_PATTERN.findall(txt)) + keys |= set(DOTENV_PATTERN.findall(txt)) + return keys or {"API_KEY"} + + +def gen_env_block(project_dir: Path, keys: set[str]) -> str: + """Generate Dockerfile commands to set up .env with detected keys and runtime tweaks. + + Copies existing .env or creates a new one, appends missing keys, + and adds ENV lines for Polars and tokenizers settings. + """ + has_env = (project_dir / ".env").exists() + if has_env: + existing = { + line.split("=", 1)[0] + for line in (project_dir / ".env").read_text().splitlines() + if "=" in line + } + block = "# Copy existing .env\nCOPY .env /workspace/.env" + missing = keys - existing + else: + block = "# Create a template .env file for API keys" + missing = keys + for k in sorted(missing): + val = ( + "PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE" + if k == "GOOGLE_APPLICATION_CREDENTIALS" + else f"YOUR_{k}" ) - for var in env_vars: - env_vars_block += f"\nENV {var}" - - # Generate Dockerfile content - dockerfile_content = DOCKERFILE_TEMPLATE.format( - project_name=base_project_name, - dependencies=formatted_deps, - api_vars=formatted_api_vars, - env_vars_block=env_vars_block, - ) - - # Write Dockerfile - dockerfile_path = project_dir / "Dockerfile.sandbox" - with open(dockerfile_path, "w") as f: - f.write(dockerfile_content) - - print( - f"Generated Dockerfile.sandbox for {base_project_name} at {dockerfile_path}" + block += f'\nRUN echo "{k}={val}" >> /workspace/.env' + # runtime adjustments + if any("polars" in d.lower() for d in keys): + # Add POLARS_SKIP_CPU_CHECK if polars is used - this prevents Polars from + # generating warnings or errors when running in container environments where + # CPU feature detection may not work correctly + block += "\nENV POLARS_SKIP_CPU_CHECK=1" + if any(d.lower().startswith(("transform", "token")) for d in keys): + block += "\nENV TOKENIZERS_PARALLELISM=false" + return block + + +def generate_dockerfile( + project_path: str, + output_dir: str | None = None, + use_uv: bool = True, +) -> bool: + """Create Dockerfile.sandbox using the template, dependencies, and environment setup. + + Returns True on success, False otherwise. + """ + out = Path(output_dir or project_path) + if not out.exists(): + print(f"Error: {out} not found") + return False + name = Path(project_path).name + deps_block = get_dependencies(out, use_uv) + keys = find_env_keys(out) + env_block = gen_env_block(out, keys) + content = DOCKER_TEMPLATE.format( + name=name, deps=deps_block, env_block=env_block ) + (out / "Dockerfile.sandbox").write_text(content) + print(f"Generated Dockerfile.sandbox at {out / 'Dockerfile.sandbox'}") return True -def main(): - """Main function to parse arguments and generate Dockerfile.sandbox.""" +def main() -> None: + """CLI entry point.""" parser = argparse.ArgumentParser( - description="Generate Dockerfile.sandbox for ZenML projects" + "Generate Dockerfile.sandbox for ZenML projects" ) - parser.add_argument("project", help="Project name") + parser.add_argument("project", help="Path to the project directory") parser.add_argument( - "--output-dir", help="Output directory (defaults to project name)" + "--output-dir", help="Output directory (defaults to project path)" + ) + parser.add_argument( + "--use-uv", + action="store_true", + default=True, + help="Use uv for dependency installation (default: True)", ) - args = parser.parse_args() - - success = generate_dockerfile(args.project, args.output_dir) - return 0 if success else 1 + sys.exit( + 0 + if generate_dockerfile(args.project, args.output_dir, args.use_uv) + else 1 + ) if __name__ == "__main__": - sys.exit(main()) + main() From b1ebc93dc9a27a7f94eeebc6620b76367ba99b32 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Fri, 18 Apr 2025 10:47:37 -0500 Subject: [PATCH 08/22] add tomli to pyproject.toml to parse toml files --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 083e3f396..0858e040c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ authors = ["ZenML CodeMonkey "] [tool.poetry.dependencies] python = ">=3.7.0,<3.9.0" +tomli = "^2.0.1" # Added for parsing pyproject.toml files when generating Dockerfile.sandbox [tool.poetry.dev-dependencies] pytest = "^6.2.5" From 434f7cb545121c9feb307a85e6ebe52c6571e22f Mon Sep 17 00:00:00 2001 From: marwan37 Date: Fri, 18 Apr 2025 10:48:11 -0500 Subject: [PATCH 09/22] generate updated Dockerfile.sandbox for omnireader --- omni-reader/Dockerfile.sandbox | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/omni-reader/Dockerfile.sandbox b/omni-reader/Dockerfile.sandbox index 78b5985b6..823211ad6 100644 --- a/omni-reader/Dockerfile.sandbox +++ b/omni-reader/Dockerfile.sandbox @@ -5,8 +5,9 @@ FROM zenmldocker/zenml-sandbox:latest LABEL project_name="omni-reader" LABEL project_version="0.1.0" -# Install project-specific dependencies -RUN pip install --no-cache-dir \ +# Install dependencies +RUN pip install uv +RUN uv pip install --system \ "instructor" \ "jiwer" \ "jiter" \ @@ -32,16 +33,11 @@ RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenm cp -r /tmp/zenml-projects/omni-reader/* /workspace/ && \ rm -rf /tmp/zenml-projects -# Create a template .env file for API keys -RUN echo "OPENAI_API_KEY=YOUR_OPENAI_API_KEY" && \ - echo "MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY" > .env - -# Create a .vscode directory and settings.json file +# VSCode settings RUN mkdir -p /workspace/.vscode && \ - echo '{\n'\ - ' "workbench.colorTheme": "Default Dark Modern"\n'\ - '}' > /workspace/.vscode/settings.json + printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json - -# Set environment variables for compatibility and performance -ENV POLARS_SKIP_CPU_CHECK=1 +# Create a template .env file for API keys +RUN echo "MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY" >> /workspace/.env +RUN echo "OLLAMA_HOST=YOUR_OLLAMA_HOST" >> /workspace/.env +RUN echo "OPENAI_API_KEY=YOUR_OPENAI_API_KEY" >> /workspace/.env From 6d568df612c320ab6c95edbbea368ce890acb410 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Fri, 18 Apr 2025 11:16:46 -0500 Subject: [PATCH 10/22] update script to not generate a .env file if it didnt exist, and dont set environment variables directly --- oncoclear/Dockerfile.sandbox | 5 +- scripts/generate_sandbox_dockerfile.py | 74 ++++++++++++++------------ 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.sandbox index 865884ac2..b6178e9b6 100644 --- a/oncoclear/Dockerfile.sandbox +++ b/oncoclear/Dockerfile.sandbox @@ -5,8 +5,9 @@ FROM zenmldocker/zenml-sandbox:latest LABEL project_name="oncoclear" LABEL project_version="0.1.0" -# Install project-specific dependencies -RUN pip install --no-cache-dir \ +# Install dependencies +RUN pip install uv +RUN uv pip install --system \ "zenml[server]>=0.50.0" \ "notebook" \ "scikit-learn" \ diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 2cdd239aa..5dbde4add 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -121,7 +121,7 @@ def get_dependencies(project_dir: Path, use_uv: bool) -> str: else: lines.append("RUN pip install --no-cache-dir \\") lines += [f' "{d}" \\' for d in deps[:-1]] + [f' "{deps[-1]}"'] - return "\n".join(lines) + return "\n".join(lines), deps def find_env_keys(project_dir: Path) -> set[str]: @@ -143,40 +143,48 @@ def find_env_keys(project_dir: Path) -> set[str]: return keys or {"API_KEY"} -def gen_env_block(project_dir: Path, keys: set[str]) -> str: +def gen_env_block( + project_dir: Path, keys: set[str], installed_deps: list[str] +) -> str: """Generate Dockerfile commands to set up .env with detected keys and runtime tweaks. - Copies existing .env or creates a new one, appends missing keys, - and adds ENV lines for Polars and tokenizers settings. + Looks for any .env* files (like .env.example) and uses that for reference. + Does not create a .env file if one doesn't exist. + Adds Polars ENV only if polars-lts-cpu was installed. """ - has_env = (project_dir / ".env").exists() - if has_env: - existing = { - line.split("=", 1)[0] - for line in (project_dir / ".env").read_text().splitlines() - if "=" in line - } - block = "# Copy existing .env\nCOPY .env /workspace/.env" + lines = [] + + # Look for any .env* files (.env, .env.example, etc.) + env_files = list(project_dir.glob(".env*")) + + if env_files: + # Use the first .env* file found + env_file = env_files[0] + env_file_name = env_file.name + + # Parse the existing keys from the file + existing = set() + try: + for line in env_file.read_text(encoding="utf-8").splitlines(): + if line and not line.startswith("#") and "=" in line: + existing.add(line.split("=", 1)[0].strip()) + except Exception: + existing = set() + + # Copy the existing .env* file + lines.append(f"# Copy {env_file_name}") + lines.append(f"COPY {env_file_name} /workspace/.env") + + # Add missing keys only if we're copying a template missing = keys - existing - else: - block = "# Create a template .env file for API keys" - missing = keys - for k in sorted(missing): - val = ( - "PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE" - if k == "GOOGLE_APPLICATION_CREDENTIALS" - else f"YOUR_{k}" - ) - block += f'\nRUN echo "{k}={val}" >> /workspace/.env' - # runtime adjustments - if any("polars" in d.lower() for d in keys): - # Add POLARS_SKIP_CPU_CHECK if polars is used - this prevents Polars from - # generating warnings or errors when running in container environments where - # CPU feature detection may not work correctly - block += "\nENV POLARS_SKIP_CPU_CHECK=1" - if any(d.lower().startswith(("transform", "token")) for d in keys): - block += "\nENV TOKENIZERS_PARALLELISM=false" - return block + for k in sorted(missing): + lines.append(f'RUN echo "{k}=YOUR_{k}" >> /workspace/.env') + + # Add Polars ENV only if we actually installed polars-lts-cpu + if any("polars-lts-cpu" in dep for dep in installed_deps): + lines.append("ENV POLARS_SKIP_CPU_CHECK=1") + + return "\n".join(lines) if lines else "" def generate_dockerfile( @@ -193,9 +201,9 @@ def generate_dockerfile( print(f"Error: {out} not found") return False name = Path(project_path).name - deps_block = get_dependencies(out, use_uv) + deps_block, installed_deps = get_dependencies(out, use_uv) keys = find_env_keys(out) - env_block = gen_env_block(out, keys) + env_block = gen_env_block(out, keys, installed_deps) content = DOCKER_TEMPLATE.format( name=name, deps=deps_block, env_block=env_block ) From ec8f24ee1a523f5e5169c44a4f0f3a0264a37406 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Sun, 20 Apr 2025 08:52:43 -0500 Subject: [PATCH 11/22] use uv binary from distroless Docker image instead of installing uv via pip --- scripts/generate_sandbox_dockerfile.py | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 5dbde4add..81eb91bad 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -13,6 +13,13 @@ DOCKER_TEMPLATE = """# Sandbox base image FROM zenmldocker/zenml-sandbox:latest +# Install uv from official distroless image +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +# Set uv environment variables for optimization +ENV UV_SYSTEM_PYTHON=1 +ENV UV_COMPILE_BYTECODE=1 + # Project metadata LABEL project_name="{name}" LABEL project_version="0.1.0" @@ -103,7 +110,7 @@ def parse_pyproject(project_dir: Path) -> list[str]: return [] -def get_dependencies(project_dir: Path, use_uv: bool) -> str: +def get_dependencies(project_dir: Path) -> tuple[str, list[str]]: """Aggregate dependencies from requirements or pyproject and format the install block. Includes a warning if no dependencies are found. @@ -111,15 +118,13 @@ def get_dependencies(project_dir: Path, use_uv: bool) -> str: deps = parse_requirements(project_dir) or parse_pyproject(project_dir) if not deps: print(f"Warning: no dependencies found in {project_dir}") - return "# No dependencies found" + return "# No dependencies found", [] # build install commands lines = [] - lines.append("# Install dependencies") - if use_uv: - lines.append("RUN pip install uv") - lines.append("RUN uv pip install --system \\") - else: - lines.append("RUN pip install --no-cache-dir \\") + lines.append("# Install dependencies with uv and cache optimization") + lines.append("RUN --mount=type=cache,target=/root/.cache/uv \\") + lines.append(" uv pip install --system \\") + lines += [f' "{d}" \\' for d in deps[:-1]] + [f' "{deps[-1]}"'] return "\n".join(lines), deps @@ -190,7 +195,6 @@ def gen_env_block( def generate_dockerfile( project_path: str, output_dir: str | None = None, - use_uv: bool = True, ) -> bool: """Create Dockerfile.sandbox using the template, dependencies, and environment setup. @@ -201,7 +205,7 @@ def generate_dockerfile( print(f"Error: {out} not found") return False name = Path(project_path).name - deps_block, installed_deps = get_dependencies(out, use_uv) + deps_block, installed_deps = get_dependencies(out) keys = find_env_keys(out) env_block = gen_env_block(out, keys, installed_deps) content = DOCKER_TEMPLATE.format( @@ -228,11 +232,7 @@ def main() -> None: help="Use uv for dependency installation (default: True)", ) args = parser.parse_args() - sys.exit( - 0 - if generate_dockerfile(args.project, args.output_dir, args.use_uv) - else 1 - ) + sys.exit(0 if generate_dockerfile(args.project, args.output_dir) else 1) if __name__ == "__main__": From 8ff3b480dc6e4bb278d8e4b1fe70e315daade321 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Sun, 20 Apr 2025 08:54:46 -0500 Subject: [PATCH 12/22] generate updated Dockerfile.sandbox files --- omni-reader/Dockerfile.sandbox | 20 +++++++++++++------- oncoclear/Dockerfile.sandbox | 24 ++++++++++++------------ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/omni-reader/Dockerfile.sandbox b/omni-reader/Dockerfile.sandbox index 823211ad6..aa20f0c0a 100644 --- a/omni-reader/Dockerfile.sandbox +++ b/omni-reader/Dockerfile.sandbox @@ -1,13 +1,20 @@ # Sandbox base image FROM zenmldocker/zenml-sandbox:latest +# Install uv from official distroless image +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +# Set uv environment variables for optimization +ENV UV_SYSTEM_PYTHON=1 +ENV UV_COMPILE_BYTECODE=1 + # Project metadata LABEL project_name="omni-reader" LABEL project_version="0.1.0" -# Install dependencies -RUN pip install uv -RUN uv pip install --system \ +# Install dependencies with uv and cache optimization +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system \ "instructor" \ "jiwer" \ "jiter" \ @@ -37,7 +44,6 @@ RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenm RUN mkdir -p /workspace/.vscode && \ printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json -# Create a template .env file for API keys -RUN echo "MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY" >> /workspace/.env -RUN echo "OLLAMA_HOST=YOUR_OLLAMA_HOST" >> /workspace/.env -RUN echo "OPENAI_API_KEY=YOUR_OPENAI_API_KEY" >> /workspace/.env +# Copy .env.example +COPY .env.example /workspace/.env +ENV POLARS_SKIP_CPU_CHECK=1 diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.sandbox index b6178e9b6..ddd573fc4 100644 --- a/oncoclear/Dockerfile.sandbox +++ b/oncoclear/Dockerfile.sandbox @@ -1,13 +1,20 @@ # Sandbox base image FROM zenmldocker/zenml-sandbox:latest +# Install uv from official distroless image +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +# Set uv environment variables for optimization +ENV UV_SYSTEM_PYTHON=1 +ENV UV_COMPILE_BYTECODE=1 + # Project metadata LABEL project_name="oncoclear" LABEL project_version="0.1.0" -# Install dependencies -RUN pip install uv -RUN uv pip install --system \ +# Install dependencies with uv and cache optimization +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system \ "zenml[server]>=0.50.0" \ "notebook" \ "scikit-learn" \ @@ -22,15 +29,8 @@ RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenm cp -r /tmp/zenml-projects/oncoclear/* /workspace/ && \ rm -rf /tmp/zenml-projects -# Create a template .env file for API keys -RUN echo "ZENML_PROJECT_SECRET_NAME=YOUR_ZENML_PROJECT_SECRET_NAME" && \ - echo "ZENML_STORE_URL=YOUR_ZENML_STORE_URL" && \ - echo "ZENML_STORE_API_KEY=YOUR_ZENML_STORE_API_KEY" > .env - -# Create a .vscode directory and settings.json file +# VSCode settings RUN mkdir -p /workspace/.vscode && \ - echo '{\n'\ - ' "workbench.colorTheme": "Default Dark Modern"\n'\ - '}' > /workspace/.vscode/settings.json + printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json From ce252c3a1036da80554fd9cc783ff7f530909ec2 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Sun, 20 Apr 2025 08:57:15 -0500 Subject: [PATCH 13/22] change base image name to zenmldocker/zenml-projects:base --- omni-reader/Dockerfile.sandbox | 2 +- oncoclear/Dockerfile.sandbox | 2 +- scripts/generate_sandbox_dockerfile.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/omni-reader/Dockerfile.sandbox b/omni-reader/Dockerfile.sandbox index aa20f0c0a..d068455ed 100644 --- a/omni-reader/Dockerfile.sandbox +++ b/omni-reader/Dockerfile.sandbox @@ -1,5 +1,5 @@ # Sandbox base image -FROM zenmldocker/zenml-sandbox:latest +FROM zenmldocker/zenml-projects:base # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.sandbox index ddd573fc4..4c5b76da2 100644 --- a/oncoclear/Dockerfile.sandbox +++ b/oncoclear/Dockerfile.sandbox @@ -1,5 +1,5 @@ # Sandbox base image -FROM zenmldocker/zenml-sandbox:latest +FROM zenmldocker/zenml-projects:base # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_sandbox_dockerfile.py index 81eb91bad..f1044fd3c 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_sandbox_dockerfile.py @@ -11,7 +11,7 @@ # Dockerfile template DOCKER_TEMPLATE = """# Sandbox base image -FROM zenmldocker/zenml-sandbox:latest +FROM zenmldocker/zenml-projects:base # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ From b932f3e389188d128e55f6ee1b1f3d05c24290a9 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Sun, 20 Apr 2025 10:27:09 -0500 Subject: [PATCH 14/22] add workflow file --- .github/workflows/build-push-sandbox.yml | 153 +++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 .github/workflows/build-push-sandbox.yml diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-sandbox.yml new file mode 100644 index 000000000..38d4efb33 --- /dev/null +++ b/.github/workflows/build-push-sandbox.yml @@ -0,0 +1,153 @@ +name: Build and Push Project Sandbox Images + +on: + push: + branches: + - main + paths-ignore: + - "_assets/**" + - ".github/**" + - ".gitignore" + - ".gitmodules" + - ".typos.toml" + - "CODE-OF-CONDUCT.md" + - "CONTRIBUTING.md" + - "scripts/**" + - "LICENSE" + - "pyproject.toml" + - "README.md" + + workflow_dispatch: + inputs: + project: + description: "Project to build (leave empty to detect from changed files)" + required: false + default: "" + +jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Detect changed projects + id: set-matrix + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + if [ -n "${{ github.event.inputs.project }}" ]; then + PROJECTS="[\"${{ github.event.inputs.project }}\"]" + else + # Auto-detect changed files (same logic as push events) + CHANGED_FILES=$(git diff --name-only HEAD^ HEAD) + CHANGED_DIRS=$(echo "$CHANGED_FILES" | grep -o "^[^/]*" | sort -u | grep -v "^$") + ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d -not -path "*/\.*" -not -path "." | sed 's|^\./||' | grep -v "^_") + PROJECTS="[" + COMMA="" + for DIR in $CHANGED_DIRS; do + if echo "$ALL_PROJECT_DIRS" | grep -q "^$DIR$"; then + PROJECTS="${PROJECTS}${COMMA}\"${DIR}\"" + COMMA="," + fi + done + PROJECTS="${PROJECTS}]" + fi + else + CHANGED_FILES=$(git diff --name-only HEAD^ HEAD) + CHANGED_DIRS=$(echo "$CHANGED_FILES" | grep -o "^[^/]*" | sort -u | grep -v "^$") + ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d -not -path "*/\.*" -not -path "." | sed 's|^\./||' | grep -v "^_") + PROJECTS="[" + COMMA="" + for DIR in $CHANGED_DIRS; do + if echo "$ALL_PROJECT_DIRS" | grep -q "^$DIR$"; then + PROJECTS="${PROJECTS}${COMMA}\"${DIR}\"" + COMMA="," + fi + done + PROJECTS="${PROJECTS}]" + fi + echo "matrix=$PROJECTS" >> $GITHUB_OUTPUT + echo "Projects to build: $PROJECTS" + + build-and-push: + needs: detect-changes + runs-on: ubuntu-latest + strategy: + matrix: + project: ${{ fromJson(needs.detect-changes.outputs.matrix) }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Check for Dockerfile.sandbox + id: check-dockerfile + run: | + if [ -f "${{ matrix.project }}/Dockerfile.sandbox" ]; then + echo "dockerfile_exists=true" >> $GITHUB_OUTPUT + else + echo "dockerfile_exists=false" >> $GITHUB_OUTPUT + echo "No Dockerfile.sandbox found in ${{ matrix.project }}, will generate one." + fi + + - name: Set up Python + if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Generate Dockerfile.sandbox if needed + if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' + id: generate-dockerfile + run: | + python generate_sandbox_dockerfile.py "${{ matrix.project }}" + echo "Generated Dockerfile.sandbox for ${{ matrix.project }}" + + - name: Create Pull Request for new Dockerfile + if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' + uses: peter-evans/create-pull-request@v5 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: "Auto-generate Dockerfile.sandbox for ${{ matrix.project }}" + title: "Auto-generate Dockerfile.sandbox for ${{ matrix.project }}" + body: | + This PR adds a generated Dockerfile.sandbox for the ${{ matrix.project }} project. + + Please review the changes and merge if they look good. + + Once merged, the Docker image will be built and pushed automatically. + branch: "auto-dockerfile-${{ matrix.project }}" + base: main + labels: | + automated-pr + dockerfile + sandbox + + # Only build and push if Dockerfile already exists + - name: Set up Docker Buildx + if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' + uses: docker/setup-buildx-action@v2 + + - name: Login to DockerHub + if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Build and push + if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' + uses: docker/build-push-action@v4 + with: + context: . + file: ${{ matrix.project }}/Dockerfile.sandbox + push: true + tags: | + zenmldocker/projects-${{ matrix.project }}:latest + cache-from: type=gha + cache-to: type=gha,mode=max From f22584cba3c5d2257fe13df06456d36c9cf38c8a Mon Sep 17 00:00:00 2001 From: marwan37 Date: Mon, 21 Apr 2025 06:50:29 -0500 Subject: [PATCH 15/22] rename sandbox to codespace --- .github/workflows/build-push-sandbox.yml | 18 +++++++++--------- ...Dockerfile.sandbox => Dockerfile.codespace} | 0 ...Dockerfile.sandbox => Dockerfile.codespace} | 0 pyproject.toml | 2 +- ...ile.py => generate_codespace_dockerfile.py} | 10 +++++----- 5 files changed, 15 insertions(+), 15 deletions(-) rename omni-reader/{Dockerfile.sandbox => Dockerfile.codespace} (100%) rename oncoclear/{Dockerfile.sandbox => Dockerfile.codespace} (100%) rename scripts/{generate_sandbox_dockerfile.py => generate_codespace_dockerfile.py} (95%) diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-sandbox.yml index 38d4efb33..830e27cf1 100644 --- a/.github/workflows/build-push-sandbox.yml +++ b/.github/workflows/build-push-sandbox.yml @@ -85,14 +85,14 @@ jobs: with: fetch-depth: 0 - - name: Check for Dockerfile.sandbox + - name: Check for Dockerfile.codespace id: check-dockerfile run: | - if [ -f "${{ matrix.project }}/Dockerfile.sandbox" ]; then + if [ -f "${{ matrix.project }}/Dockerfile.codespace" ]; then echo "dockerfile_exists=true" >> $GITHUB_OUTPUT else echo "dockerfile_exists=false" >> $GITHUB_OUTPUT - echo "No Dockerfile.sandbox found in ${{ matrix.project }}, will generate one." + echo "No Dockerfile.codespace found in ${{ matrix.project }}, will generate one." fi - name: Set up Python @@ -101,22 +101,22 @@ jobs: with: python-version: "3.10" - - name: Generate Dockerfile.sandbox if needed + - name: Generate Dockerfile.codespace if needed if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' id: generate-dockerfile run: | python generate_sandbox_dockerfile.py "${{ matrix.project }}" - echo "Generated Dockerfile.sandbox for ${{ matrix.project }}" + echo "Generated Dockerfile.codespace for ${{ matrix.project }}" - name: Create Pull Request for new Dockerfile if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' uses: peter-evans/create-pull-request@v5 with: token: ${{ secrets.GITHUB_TOKEN }} - commit-message: "Auto-generate Dockerfile.sandbox for ${{ matrix.project }}" - title: "Auto-generate Dockerfile.sandbox for ${{ matrix.project }}" + commit-message: "Auto-generate Dockerfile.codespace for ${{ matrix.project }}" + title: "Auto-generate Dockerfile.codespace for ${{ matrix.project }}" body: | - This PR adds a generated Dockerfile.sandbox for the ${{ matrix.project }} project. + This PR adds a generated Dockerfile.codespace for the ${{ matrix.project }} project. Please review the changes and merge if they look good. @@ -145,7 +145,7 @@ jobs: uses: docker/build-push-action@v4 with: context: . - file: ${{ matrix.project }}/Dockerfile.sandbox + file: ${{ matrix.project }}/Dockerfile.codespace push: true tags: | zenmldocker/projects-${{ matrix.project }}:latest diff --git a/omni-reader/Dockerfile.sandbox b/omni-reader/Dockerfile.codespace similarity index 100% rename from omni-reader/Dockerfile.sandbox rename to omni-reader/Dockerfile.codespace diff --git a/oncoclear/Dockerfile.sandbox b/oncoclear/Dockerfile.codespace similarity index 100% rename from oncoclear/Dockerfile.sandbox rename to oncoclear/Dockerfile.codespace diff --git a/pyproject.toml b/pyproject.toml index 0858e040c..6e3f378c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["ZenML CodeMonkey "] [tool.poetry.dependencies] python = ">=3.7.0,<3.9.0" -tomli = "^2.0.1" # Added for parsing pyproject.toml files when generating Dockerfile.sandbox +tomli = "^2.0.1" # Added for parsing pyproject.toml files when generating Dockerfile.codespace [tool.poetry.dev-dependencies] pytest = "^6.2.5" diff --git a/scripts/generate_sandbox_dockerfile.py b/scripts/generate_codespace_dockerfile.py similarity index 95% rename from scripts/generate_sandbox_dockerfile.py rename to scripts/generate_codespace_dockerfile.py index f1044fd3c..8791e9ee8 100755 --- a/scripts/generate_sandbox_dockerfile.py +++ b/scripts/generate_codespace_dockerfile.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Generate Dockerfile.sandbox for ZenML projects.""" +"""Generate Dockerfile.codespace for ZenML projects.""" import argparse import re @@ -196,7 +196,7 @@ def generate_dockerfile( project_path: str, output_dir: str | None = None, ) -> bool: - """Create Dockerfile.sandbox using the template, dependencies, and environment setup. + """Create Dockerfile.codespace using the template, dependencies, and environment setup. Returns True on success, False otherwise. """ @@ -211,15 +211,15 @@ def generate_dockerfile( content = DOCKER_TEMPLATE.format( name=name, deps=deps_block, env_block=env_block ) - (out / "Dockerfile.sandbox").write_text(content) - print(f"Generated Dockerfile.sandbox at {out / 'Dockerfile.sandbox'}") + (out / "Dockerfile.codespace").write_text(content) + print(f"Generated Dockerfile.codespace at {out / 'Dockerfile.codespace'}") return True def main() -> None: """CLI entry point.""" parser = argparse.ArgumentParser( - "Generate Dockerfile.sandbox for ZenML projects" + "Generate Dockerfile.codespace for ZenML projects" ) parser.add_argument("project", help="Path to the project directory") parser.add_argument( From 22ebb939d4976cb9e09dc772fd073624de7e1822 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Mon, 21 Apr 2025 06:50:44 -0500 Subject: [PATCH 16/22] delete generate_zenml_project.py --- scripts/generate_zenml_project.py | 138 ------------------------------ 1 file changed, 138 deletions(-) delete mode 100644 scripts/generate_zenml_project.py diff --git a/scripts/generate_zenml_project.py b/scripts/generate_zenml_project.py deleted file mode 100644 index ee5a38ce6..000000000 --- a/scripts/generate_zenml_project.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Generate a ZenML project for a tool""" - -import argparse -import logging -import os -import shutil -from textwrap import dedent - - -def get_hello_world_str(): - return dedent( - """\ -import logging - -def main(): - pass - -if __name__ == "__main__": - logging.basicConfig(level="INFO") - main() - -""" - ) - - -def get_readme_str(name: str): - return dedent( - f"""\ -# Playground for {name} - -## Installation -``` -cd {name} -poetry install -``` - """ - ) - - -def get_flake8_str(): - return dedent( - """\ - [flake8] - max-line-length = 79 - max-complexity = 18 - select = B,C,E,F,W,T4,B9 - ignore = E203, E266, E501, W503, F403, F401 - """ - ) - - -def get_project_toml_str(name: str, author: str = "Author "): - return dedent( - f"""\ - [tool.poetry] - name = "{name}" - version = "1.0.0" - description = "{name}" - authors = ["{author}"] - license = "Apache 2.0" - - [tool.poetry.dependencies] - python = ">=3.7.0,<3.9.0" - - [tool.poetry.dev-dependencies] - black = "^21.9b0" - isort = "^5.9.3" - pytest = "^6.2.5" - - [build-system] - requires = ["poetry-core>=1.0.0"] - build-backend = "poetry.core.masonry.api" - - [tool.isort] - profile = "black" - known_third_party = [] - skip_glob = [] - line_length = 79 - - [tool.black] - line-length = 79 - include = '\.pyi?$' - exclude = ''' - /( - \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - )/ - ''' - """ - ) - - -def write_file(path: str, content: str): - with open(path, "w") as f: - f.write(content) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("tool_name", type=str, help="Name of the tool") - args = parser.parse_args() - - path = os.path.join(os.getcwd(), args.tool_name) - src_path = os.path.join(path, "src") - if os.path.exists(path): - raise AssertionError(f"{path} already exists!") - - toml_str = get_project_toml_str(args.tool_name) - flake8_str = get_flake8_str() - py_str = get_hello_world_str() - readme_str = get_readme_str(args.tool_name) - - # make dirs - os.mkdir(path) - os.mkdir(src_path) - - # copy .gitignore - shutil.copy( - os.path.join(os.getcwd(), ".gitignore"), - os.path.join(path, ".gitignore"), - ) - - # write files - write_file(os.path.join(path, ".flake8"), flake8_str) - write_file(os.path.join(src_path, "main.py"), py_str) - write_file(os.path.join(path, "pyproject.toml"), toml_str) - write_file(os.path.join(path, "README.md"), readme_str) - - -if __name__ == "__main__": - logging.basicConfig(level="INFO") - main() From ed92bdbb6e8c4308ddddc306f77a01124bb56050 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Tue, 22 Apr 2025 07:43:43 -0500 Subject: [PATCH 17/22] revert base image name to zenmldocker/zenml-sandbox --- omni-reader/Dockerfile.codespace | 2 +- oncoclear/Dockerfile.codespace | 2 +- scripts/generate_codespace_dockerfile.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/omni-reader/Dockerfile.codespace b/omni-reader/Dockerfile.codespace index d068455ed..aa20f0c0a 100644 --- a/omni-reader/Dockerfile.codespace +++ b/omni-reader/Dockerfile.codespace @@ -1,5 +1,5 @@ # Sandbox base image -FROM zenmldocker/zenml-projects:base +FROM zenmldocker/zenml-sandbox:latest # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/oncoclear/Dockerfile.codespace b/oncoclear/Dockerfile.codespace index 4c5b76da2..ddd573fc4 100644 --- a/oncoclear/Dockerfile.codespace +++ b/oncoclear/Dockerfile.codespace @@ -1,5 +1,5 @@ # Sandbox base image -FROM zenmldocker/zenml-projects:base +FROM zenmldocker/zenml-sandbox:latest # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/scripts/generate_codespace_dockerfile.py b/scripts/generate_codespace_dockerfile.py index 8791e9ee8..44315afc3 100755 --- a/scripts/generate_codespace_dockerfile.py +++ b/scripts/generate_codespace_dockerfile.py @@ -11,7 +11,7 @@ # Dockerfile template DOCKER_TEMPLATE = """# Sandbox base image -FROM zenmldocker/zenml-projects:base +FROM zenmldocker/zenml-sandbox:latest # Install uv from official distroless image COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ From 5020fa99d0ac4c49a216b2af919a6edd1bbae7c0 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Tue, 22 Apr 2025 07:58:15 -0500 Subject: [PATCH 18/22] bump python version in pyproject.toml and replace tomli with tomlib --- pyproject.toml | 3 +-- scripts/generate_codespace_dockerfile.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6e3f378c3..2f4073e52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,7 @@ description = "Explore MLOps production use-cases with ZenML." authors = ["ZenML CodeMonkey "] [tool.poetry.dependencies] -python = ">=3.7.0,<3.9.0" -tomli = "^2.0.1" # Added for parsing pyproject.toml files when generating Dockerfile.codespace +python = ">=3.11,<3.13" [tool.poetry.dev-dependencies] pytest = "^6.2.5" diff --git a/scripts/generate_codespace_dockerfile.py b/scripts/generate_codespace_dockerfile.py index 44315afc3..7fe7ea3e9 100755 --- a/scripts/generate_codespace_dockerfile.py +++ b/scripts/generate_codespace_dockerfile.py @@ -7,7 +7,7 @@ import sys from pathlib import Path -import tomli +import tomllib # Dockerfile template DOCKER_TEMPLATE = """# Sandbox base image @@ -84,7 +84,7 @@ def parse_pyproject(project_dir: Path) -> list[str]: if not file.exists(): return [] try: - data = tomli.loads(file.read_bytes()) + data = tomllib.loads(file.read_bytes()) # PEP 621 if deps := data.get("project", {}).get("dependencies"): # type: ignore raw = deps From fd25eb0da405d5465de6d1716f4cfc558c5a2e83 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 23 Apr 2025 21:23:40 -0500 Subject: [PATCH 19/22] Use UTC timestamp as Docker image tag in GH action workflow --- .github/workflows/build-push-sandbox.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-sandbox.yml index 830e27cf1..b8bfc28f3 100644 --- a/.github/workflows/build-push-sandbox.yml +++ b/.github/workflows/build-push-sandbox.yml @@ -85,6 +85,11 @@ jobs: with: fetch-depth: 0 + - name: Set image tag timestamp + id: timestamp + run: | + echo "timestamp=$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT + - name: Check for Dockerfile.codespace id: check-dockerfile run: | @@ -92,7 +97,6 @@ jobs: echo "dockerfile_exists=true" >> $GITHUB_OUTPUT else echo "dockerfile_exists=false" >> $GITHUB_OUTPUT - echo "No Dockerfile.codespace found in ${{ matrix.project }}, will generate one." fi - name: Set up Python @@ -148,6 +152,6 @@ jobs: file: ${{ matrix.project }}/Dockerfile.codespace push: true tags: | - zenmldocker/projects-${{ matrix.project }}:latest + zenmldocker/projects-${{ matrix.project }}:${{ steps.timestamp.outputs.timestamp }} cache-from: type=gha cache-to: type=gha,mode=max From 7f9911ebffbd0f978c8224e5e942eea0df67dccc Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 23 Apr 2025 21:43:25 -0500 Subject: [PATCH 20/22] split workflow into encapsulated jobs to avoid redundant dockerfile_exists checks --- .github/workflows/build-push-sandbox.yml | 99 ++++++++++++++---------- 1 file changed, 57 insertions(+), 42 deletions(-) diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-sandbox.yml index b8bfc28f3..8c40c6740 100644 --- a/.github/workflows/build-push-sandbox.yml +++ b/.github/workflows/build-push-sandbox.yml @@ -38,58 +38,49 @@ jobs: - name: Detect changed projects id: set-matrix run: | - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - if [ -n "${{ github.event.inputs.project }}" ]; then - PROJECTS="[\"${{ github.event.inputs.project }}\"]" - else - # Auto-detect changed files (same logic as push events) - CHANGED_FILES=$(git diff --name-only HEAD^ HEAD) - CHANGED_DIRS=$(echo "$CHANGED_FILES" | grep -o "^[^/]*" | sort -u | grep -v "^$") - ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d -not -path "*/\.*" -not -path "." | sed 's|^\./||' | grep -v "^_") - PROJECTS="[" - COMMA="" - for DIR in $CHANGED_DIRS; do - if echo "$ALL_PROJECT_DIRS" | grep -q "^$DIR$"; then - PROJECTS="${PROJECTS}${COMMA}\"${DIR}\"" - COMMA="," - fi - done - PROJECTS="${PROJECTS}]" - fi + # If this was a manual dispatch _and_ they provided a project, just use that + if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.project }}" ]]; then + PROJECTS="[\"${{ github.event.inputs.project }}\"]" else + # Otherwise auto-diff HEAD^ → HEAD for any changed top-level dirs CHANGED_FILES=$(git diff --name-only HEAD^ HEAD) - CHANGED_DIRS=$(echo "$CHANGED_FILES" | grep -o "^[^/]*" | sort -u | grep -v "^$") - ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d -not -path "*/\.*" -not -path "." | sed 's|^\./||' | grep -v "^_") + CHANGED_DIRS=$(echo "$CHANGED_FILES" \ + | awk -F/ '{print $1}' \ + | sort -u \ + | grep -v '^$') + ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d \ + -not -path '*/\.*' \ + -not -path '.' \ + | sed 's|^\./||' \ + | grep -v '^_') PROJECTS="[" - COMMA="" - for DIR in $CHANGED_DIRS; do - if echo "$ALL_PROJECT_DIRS" | grep -q "^$DIR$"; then - PROJECTS="${PROJECTS}${COMMA}\"${DIR}\"" - COMMA="," + sep="" + for d in $CHANGED_DIRS; do + if echo "$ALL_PROJECT_DIRS" | grep -qx "$d"; then + PROJECTS+="${sep}\"$d\"" + sep="," fi done - PROJECTS="${PROJECTS}]" + PROJECTS+="]" fi + echo "matrix=$PROJECTS" >> $GITHUB_OUTPUT echo "Projects to build: $PROJECTS" - build-and-push: + check-dockerfile: needs: detect-changes runs-on: ubuntu-latest strategy: matrix: project: ${{ fromJson(needs.detect-changes.outputs.matrix) }} + outputs: + dockerfile_exists: ${{ steps.check-dockerfile.outputs.dockerfile_exists }} steps: - name: Checkout code uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Set image tag timestamp - id: timestamp - run: | - echo "timestamp=$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT - - name: Check for Dockerfile.codespace id: check-dockerfile run: | @@ -97,23 +88,34 @@ jobs: echo "dockerfile_exists=true" >> $GITHUB_OUTPUT else echo "dockerfile_exists=false" >> $GITHUB_OUTPUT + echo "No Dockerfile.codespace found in ${{ matrix.project }}, will generate one." fi + generate-dockerfile: + needs: [detect-changes, check-dockerfile] + if: needs.check-dockerfile.outputs.dockerfile_exists == 'false' + runs-on: ubuntu-latest + strategy: + matrix: + project: ${{ fromJson(needs.detect-changes.outputs.matrix) }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python - if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' uses: actions/setup-python@v4 with: python-version: "3.10" - - name: Generate Dockerfile.codespace if needed - if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' + - name: Generate Dockerfile.codespace id: generate-dockerfile run: | python generate_sandbox_dockerfile.py "${{ matrix.project }}" echo "Generated Dockerfile.codespace for ${{ matrix.project }}" - name: Create Pull Request for new Dockerfile - if: steps.check-dockerfile.outputs.dockerfile_exists == 'false' uses: peter-evans/create-pull-request@v5 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -132,26 +134,39 @@ jobs: dockerfile sandbox - # Only build and push if Dockerfile already exists + build-and-push: + needs: [detect-changes, check-dockerfile] + if: needs.check-dockerfile.outputs.dockerfile_exists == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + project: ${{ fromJson(needs.detect-changes.outputs.matrix) }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + # Generate timestamp for image tag + - name: Generate timestamp + id: timestamp + run: echo "timestamp=$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT + - name: Set up Docker Buildx - if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' uses: docker/setup-buildx-action@v2 - name: Login to DockerHub - if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - name: Build and push - if: steps.check-dockerfile.outputs.dockerfile_exists == 'true' uses: docker/build-push-action@v4 with: context: . file: ${{ matrix.project }}/Dockerfile.codespace push: true - tags: | - zenmldocker/projects-${{ matrix.project }}:${{ steps.timestamp.outputs.timestamp }} + tags: zenmldocker/projects-${{ matrix.project }}:${{ steps.timestamp.outputs.timestamp }} cache-from: type=gha cache-to: type=gha,mode=max From 480bffd699e1b4b76a6d4cdb1ef23cf93245e015 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 23 Apr 2025 21:44:46 -0500 Subject: [PATCH 21/22] update run command to use updated script name and path --- .github/workflows/build-push-sandbox.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-sandbox.yml index 8c40c6740..0748f2917 100644 --- a/.github/workflows/build-push-sandbox.yml +++ b/.github/workflows/build-push-sandbox.yml @@ -1,4 +1,4 @@ -name: Build and Push Project Sandbox Images +name: Build and Push Project Codespace Images on: push: @@ -88,7 +88,6 @@ jobs: echo "dockerfile_exists=true" >> $GITHUB_OUTPUT else echo "dockerfile_exists=false" >> $GITHUB_OUTPUT - echo "No Dockerfile.codespace found in ${{ matrix.project }}, will generate one." fi generate-dockerfile: @@ -112,7 +111,7 @@ jobs: - name: Generate Dockerfile.codespace id: generate-dockerfile run: | - python generate_sandbox_dockerfile.py "${{ matrix.project }}" + python scripts/generate_codespace_dockerfile.py "${{ matrix.project }}" echo "Generated Dockerfile.codespace for ${{ matrix.project }}" - name: Create Pull Request for new Dockerfile @@ -132,7 +131,7 @@ jobs: labels: | automated-pr dockerfile - sandbox + codespace build-and-push: needs: [detect-changes, check-dockerfile] From 761af1d049c81dec831217787dccd64ee71ca584 Mon Sep 17 00:00:00 2001 From: marwan37 Date: Wed, 23 Apr 2025 21:47:33 -0500 Subject: [PATCH 22/22] rename filename: sandbox -> codespace --- .../{build-push-sandbox.yml => build-push-codespace.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{build-push-sandbox.yml => build-push-codespace.yml} (100%) diff --git a/.github/workflows/build-push-sandbox.yml b/.github/workflows/build-push-codespace.yml similarity index 100% rename from .github/workflows/build-push-sandbox.yml rename to .github/workflows/build-push-codespace.yml