Skip to content

Commit 604767d

Browse files
authored
Merge pull request #193 from zenml-io/sandbox/add-dockerfiles-for-remaining-projects
Add Dockerfile.sandbox for all projects with generator script
2 parents b820cc9 + 2be6375 commit 604767d

File tree

15 files changed

+794
-31
lines changed

15 files changed

+794
-31
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Sandbox base image
2+
FROM safoinext/zenml-sandbox:latest
3+
4+
# Project metadata
5+
LABEL project_name="databricks-production-qa-demo"
6+
LABEL project_version="0.1.0"
7+
8+
# Install project-specific dependencies
9+
RUN pip install --no-cache-dir \
10+
"zenml[server]>=0.70.0"
11+
12+
# Set workspace directory
13+
WORKDIR /workspace
14+
15+
# Clone only the project directory and reorganize
16+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
17+
cp -r /tmp/zenml-projects/databricks-production-qa-demo/* /workspace/ && \
18+
rm -rf /tmp/zenml-projects
19+
20+
# Create a template .env file for API keys
21+
RUN echo "ZENML_STORE_URL=YOUR_ZENML_KEY_HERE" && \
22+
echo "ZENML_PROJECT_SECRET_NAME=YOUR_ZENML_KEY_HERE" > .env
23+
24+
# Create a .vscode directory and settings.json file
25+
RUN mkdir -p /workspace/.vscode && \
26+
echo '{\n'\
27+
' "workbench.colorTheme": "Default Dark Modern"\n'\
28+
'}' > /workspace/.vscode/settings.json
29+
30+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Sandbox base image
2+
FROM safoinext/zenml-sandbox:latest
3+
4+
# Project metadata
5+
LABEL project_name="end-to-end-computer-vision"
6+
LABEL project_version="0.1.0"
7+
8+
# Install project-specific dependencies
9+
RUN pip install --no-cache-dir \
10+
"zenml[server]>=0.70.0" \
11+
"notebook" \
12+
"scikit-learn<1.3" \
13+
"pyarrow" \
14+
"seaborn" \
15+
"xgboost" \
16+
"ultralytics" \
17+
"torch" \
18+
"huggingface_hub>=0.20.0" \
19+
"fiftyone" \
20+
"datasets" \
21+
"albumentations" \
22+
"pillow>=10.0.0" \
23+
"dill"
24+
25+
# Set workspace directory
26+
WORKDIR /workspace
27+
28+
# Clone only the project directory and reorganize
29+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
30+
cp -r /tmp/zenml-projects/end-to-end-computer-vision/* /workspace/ && \
31+
rm -rf /tmp/zenml-projects
32+
33+
# Create a template .env file for API keys
34+
RUN echo "GEMINI_API_KEY=YOUR_GEMINI_KEY_HERE" && \
35+
echo "HF_TOKEN=YOUR_HUGGINGFACE_TOKEN_HERE" && \
36+
echo "FIFTYONE_LABELSTUDIO_API_KEY=YOUR_FIFTYONE_KEY_HERE" > .env
37+
38+
# Create a .vscode directory and settings.json file
39+
RUN mkdir -p /workspace/.vscode && \
40+
echo '{\n'\
41+
' "workbench.colorTheme": "Default Dark Modern"\n'\
42+
'}' > /workspace/.vscode/settings.json
43+
44+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Sandbox base image
2+
FROM safoinext/zenml-sandbox:latest
3+
4+
# Project metadata
5+
LABEL project_name="eurorate-predictor"
6+
LABEL project_version="0.1.0"
7+
8+
# Install project-specific dependencies
9+
RUN pip install --no-cache-dir \
10+
"zenml[server]" \
11+
"pandas" \
12+
"xgboost" \
13+
"google-cloud-bigquery" \
14+
"pyarrow" \
15+
"gradio" \
16+
"db-dtypes"
17+
18+
# Set workspace directory
19+
WORKDIR /workspace
20+
21+
# Clone only the project directory and reorganize
22+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
23+
cp -r /tmp/zenml-projects/eurorate-predictor/* /workspace/ && \
24+
rm -rf /tmp/zenml-projects
25+
26+
# Create a template .env file for API keys
27+
RUN echo "GEMINI_API_KEY=YOUR_GEMINI_KEY_HERE" > .env
28+
29+
# Create a .vscode directory and settings.json file
30+
RUN mkdir -p /workspace/.vscode && \
31+
echo '{\n'\
32+
' "workbench.colorTheme": "Default Dark Modern"\n'\
33+
'}' > /workspace/.vscode/settings.json
34+
35+

gamesense/Dockerfile.sandbox

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Sandbox base image
2+
FROM safoinext/zenml-sandbox:latest
3+
4+
# Project metadata
5+
LABEL project_name="gamesense"
6+
LABEL project_version="0.1.0"
7+
8+
# Install project-specific dependencies
9+
RUN pip install --no-cache-dir \
10+
"datasets>=2.19.1" \
11+
"transformers>=4.43.1" \
12+
"peft" \
13+
"bitsandbytes>=0.41.3" \
14+
"scipy" \
15+
"evaluate" \
16+
"rouge_score" \
17+
"nltk" \
18+
"accelerate>=0.30.0" \
19+
"urllib3<2" \
20+
"zenml>=0.62.0" \
21+
"torch>=2.2.0" \
22+
"sentencepiece" \
23+
"huggingface_hub"
24+
25+
# Set workspace directory
26+
WORKDIR /workspace
27+
28+
# Clone only the project directory and reorganize
29+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
30+
cp -r /tmp/zenml-projects/gamesense/* /workspace/ && \
31+
rm -rf /tmp/zenml-projects
32+
33+
# Create a template .env file for API keys
34+
RUN echo "HF_TOKEN=YOUR_HUGGINGFACE_TOKEN_HERE" && \
35+
echo "GH_ACCESS_TOKEN=YOUR_GH_KEY_HERE" > .env
36+
37+
# Create a .vscode directory and settings.json file
38+
RUN mkdir -p /workspace/.vscode && \
39+
echo '{\n'\
40+
' "workbench.colorTheme": "Default Dark Modern"\n'\
41+
'}' > /workspace/.vscode/settings.json
42+
43+
44+
# Set environment variables for compatibility and performance
45+
ENV TOKENIZERS_PARALLELISM=false

generate_sandbox_dockerfile.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
#!/usr/bin/env python3
2+
3+
"""Script to generate Dockerfile.sandbox files for ZenML projects.
4+
5+
This ensures consistency across all project Docker images.
6+
"""
7+
8+
import argparse
9+
import os
10+
import re
11+
import sys
12+
from pathlib import Path
13+
14+
DOCKERFILE_TEMPLATE = """# Sandbox base image
15+
FROM safoinext/zenml-sandbox:latest
16+
17+
# Project metadata
18+
LABEL project_name="{project_name}"
19+
LABEL project_version="0.1.0"
20+
21+
# Install project-specific dependencies
22+
RUN pip install --no-cache-dir \\
23+
{dependencies}
24+
25+
# Set workspace directory
26+
WORKDIR /workspace
27+
28+
# Clone only the project directory and reorganize
29+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \\
30+
cp -r /tmp/zenml-projects/{project_name}/* /workspace/ && \\
31+
rm -rf /tmp/zenml-projects
32+
33+
# Create a template .env file for API keys
34+
RUN echo "{api_vars}" > .env
35+
36+
# Create a .vscode directory and settings.json file
37+
RUN mkdir -p /workspace/.vscode && \\
38+
echo '{{\\n'\\
39+
' "workbench.colorTheme": "Default Dark Modern"\\n'\\
40+
'}}' > /workspace/.vscode/settings.json
41+
42+
{env_vars_block}
43+
"""
44+
45+
46+
def format_env_key(key):
47+
"""Format environment variable placeholder text."""
48+
# Extract the service name from the key
49+
service = key.split("_")[0] if "_" in key else key
50+
# Special case handling
51+
if key == "GOOGLE_APPLICATION_CREDENTIALS":
52+
return f"{key}=PATH_TO_YOUR_GOOGLE_CREDENTIALS_FILE"
53+
if key == "HF_TOKEN":
54+
return f"{key}=YOUR_HUGGINGFACE_TOKEN_HERE"
55+
return f"{key}=YOUR_{service}_KEY_HERE"
56+
57+
58+
def parse_requirements(project_dir):
59+
"""Parse requirements.txt file if it exists."""
60+
req_file = Path(project_dir) / "requirements.txt"
61+
if not req_file.exists():
62+
print(f"Warning: No requirements.txt found in {project_dir}")
63+
return []
64+
65+
dependencies = []
66+
with open(req_file, "r") as f:
67+
for line in f:
68+
line = line.strip()
69+
if line and not line.startswith("#"):
70+
if line.startswith("polars"):
71+
line = line.replace("polars", "polars-lts-cpu")
72+
dependencies.append(line)
73+
74+
return dependencies
75+
76+
77+
def detect_api_keys(project_dir):
78+
"""Attempt to detect required API keys by scanning Python files."""
79+
api_patterns = {
80+
# LLM Provider API Keys
81+
"HF_TOKEN": r"huggingface|hf_token",
82+
"OPENAI_API_KEY": r"openai|gpt",
83+
"ANTHROPIC_API_KEY": r"anthropic|claude",
84+
"MISTRAL_API_KEY": r"mistral|mistralai",
85+
"GEMINI_API_KEY": r"gemini|google",
86+
# ZenML-specific API Keys and Environment Variables
87+
"ZENML_STORE_API_KEY": r"zenml.*api_key|zenml_store_api_key",
88+
"ZENML_STORE_URL": r"zenml_store_url|zenml.*url",
89+
"ZENML_PROJECT_SECRET_NAME": r"zenml.*secret|secret_name",
90+
"ZENML_HF_USERNAME": r"zenml_hf_username|hf_username",
91+
"ZENML_HF_SPACE_NAME": r"zenml_hf_space_name|hf_space_name",
92+
# Monitoring and Logging
93+
"LANGFUSE_PUBLIC_KEY": r"langfuse.*public",
94+
"LANGFUSE_SECRET_KEY": r"langfuse.*secret",
95+
"LANGFUSE_HOST": r"langfuse.*host",
96+
# Vector Databases
97+
"PINECONE_API_KEY": r"pinecone",
98+
"SUPABASE_USER": r"supabase.*user",
99+
"SUPABASE_PASSWORD": r"supabase.*password",
100+
"SUPABASE_HOST": r"supabase.*host",
101+
"SUPABASE_PORT": r"supabase.*port",
102+
# Cloud Provider Keys
103+
"AWS_ACCESS_KEY_ID": r"aws.*access|aws_access_key_id",
104+
"AWS_SECRET_ACCESS_KEY": r"aws.*secret|aws_secret_access_key",
105+
"AWS_SESSION_TOKEN": r"aws.*session|aws_session_token",
106+
"AWS_REGION": r"aws.*region|aws_region",
107+
"GOOGLE_APPLICATION_CREDENTIALS": r"google.*credentials",
108+
# Other Service-Specific Keys
109+
"FIFTYONE_LABELSTUDIO_API_KEY": r"fiftyone|labelstudio",
110+
"NEPTUNE_API_TOKEN": r"neptune",
111+
"GH_ACCESS_TOKEN": r"gh_access_token|github",
112+
}
113+
114+
detected_keys = []
115+
116+
for py_file in Path(project_dir).glob("**/*.py"):
117+
with open(py_file, "r", encoding="utf-8", errors="ignore") as f:
118+
content = f.read().lower()
119+
for key, pattern in api_patterns.items():
120+
if re.search(pattern, content):
121+
detected_keys.append(key)
122+
123+
# Remove duplicates
124+
detected_keys = list(set(detected_keys))
125+
126+
if not detected_keys:
127+
detected_keys = ["API_KEY=YOUR_API_KEY_HERE"]
128+
129+
return [format_env_key(key) for key in detected_keys]
130+
131+
132+
def detect_env_variables(project_dir, dependencies):
133+
"""Detect which environment variables are needed based on dependencies and content."""
134+
env_vars = []
135+
136+
# Only add POLARS_SKIP_CPU_CHECK if any polars package is in dependencies
137+
if any("polars" in dep.lower() for dep in dependencies):
138+
env_vars.append("POLARS_SKIP_CPU_CHECK=1")
139+
140+
# Only add TOKENIZERS_PARALLELISM if transformers or tokenizers is used
141+
if any(
142+
dep.lower().startswith(("transform", "token")) for dep in dependencies
143+
):
144+
env_vars.append("TOKENIZERS_PARALLELISM=false")
145+
146+
# These are development convenience variables - could be made optional
147+
# env_vars.append("PYTHONUNBUFFERED=1")
148+
# env_vars.append("PYTHONDONTWRITEBYTECODE=1")
149+
150+
return env_vars
151+
152+
153+
def generate_dockerfile(project_name, output_dir=None):
154+
"""Generate a Dockerfile.sandbox for the specified project."""
155+
if output_dir is None:
156+
output_dir = project_name
157+
158+
project_dir = Path(output_dir)
159+
if not project_dir.exists():
160+
print(f"Error: Project directory {project_dir} not found")
161+
return False
162+
163+
# Get dependencies
164+
dependencies = parse_requirements(project_dir)
165+
if dependencies:
166+
formatted_deps = "\n".join(
167+
f' "{dep}" \\' for dep in dependencies[:-1]
168+
)
169+
if formatted_deps:
170+
formatted_deps += f'\n "{dependencies[-1]}"'
171+
else:
172+
formatted_deps = f' "{dependencies[-1]}"'
173+
else:
174+
formatted_deps = ""
175+
176+
# Detect API keys
177+
api_vars = detect_api_keys(project_dir)
178+
formatted_api_vars = '" && \\\n echo "'.join(api_vars)
179+
180+
env_vars = detect_env_variables(project_dir, dependencies)
181+
env_vars_block = ""
182+
if env_vars:
183+
env_vars_block = (
184+
"\n# Set environment variables for compatibility and performance"
185+
)
186+
for var in env_vars:
187+
env_vars_block += f"\nENV {var}"
188+
189+
# Generate Dockerfile content
190+
dockerfile_content = DOCKERFILE_TEMPLATE.format(
191+
project_name=project_name,
192+
dependencies=formatted_deps,
193+
api_vars=formatted_api_vars,
194+
env_vars_block=env_vars_block,
195+
)
196+
197+
# Write Dockerfile
198+
dockerfile_path = project_dir / "Dockerfile.sandbox"
199+
with open(dockerfile_path, "w") as f:
200+
f.write(dockerfile_content)
201+
202+
print(
203+
f"Generated Dockerfile.sandbox for {project_name} at {dockerfile_path}"
204+
)
205+
return True
206+
207+
208+
def main():
209+
"""Main function to parse arguments and generate Dockerfile.sandbox."""
210+
parser = argparse.ArgumentParser(
211+
description="Generate Dockerfile.sandbox for ZenML projects"
212+
)
213+
parser.add_argument("project", help="Project name")
214+
parser.add_argument(
215+
"--output-dir", help="Output directory (defaults to project name)"
216+
)
217+
218+
args = parser.parse_args()
219+
220+
success = generate_dockerfile(args.project, args.output_dir)
221+
return 0 if success else 1
222+
223+
224+
if __name__ == "__main__":
225+
sys.exit(main())

0 commit comments

Comments
 (0)