Skip to content

Commit fcc7996

Browse files
authored
Merge pull request #196 from zenml-io/add-sandbox-dockerfile-generator-script
Add sandbox dockerfile generator script
2 parents 0526719 + 761af1d commit fcc7996

File tree

7 files changed

+496
-183
lines changed

7 files changed

+496
-183
lines changed
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
name: Build and Push Project Codespace Images
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths-ignore:
8+
- "_assets/**"
9+
- ".github/**"
10+
- ".gitignore"
11+
- ".gitmodules"
12+
- ".typos.toml"
13+
- "CODE-OF-CONDUCT.md"
14+
- "CONTRIBUTING.md"
15+
- "scripts/**"
16+
- "LICENSE"
17+
- "pyproject.toml"
18+
- "README.md"
19+
20+
workflow_dispatch:
21+
inputs:
22+
project:
23+
description: "Project to build (leave empty to detect from changed files)"
24+
required: false
25+
default: ""
26+
27+
jobs:
28+
detect-changes:
29+
runs-on: ubuntu-latest
30+
outputs:
31+
matrix: ${{ steps.set-matrix.outputs.matrix }}
32+
steps:
33+
- name: Checkout code
34+
uses: actions/checkout@v3
35+
with:
36+
fetch-depth: 2
37+
38+
- name: Detect changed projects
39+
id: set-matrix
40+
run: |
41+
# If this was a manual dispatch _and_ they provided a project, just use that
42+
if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.project }}" ]]; then
43+
PROJECTS="[\"${{ github.event.inputs.project }}\"]"
44+
else
45+
# Otherwise auto-diff HEAD^ → HEAD for any changed top-level dirs
46+
CHANGED_FILES=$(git diff --name-only HEAD^ HEAD)
47+
CHANGED_DIRS=$(echo "$CHANGED_FILES" \
48+
| awk -F/ '{print $1}' \
49+
| sort -u \
50+
| grep -v '^$')
51+
ALL_PROJECT_DIRS=$(find . -maxdepth 1 -type d \
52+
-not -path '*/\.*' \
53+
-not -path '.' \
54+
| sed 's|^\./||' \
55+
| grep -v '^_')
56+
PROJECTS="["
57+
sep=""
58+
for d in $CHANGED_DIRS; do
59+
if echo "$ALL_PROJECT_DIRS" | grep -qx "$d"; then
60+
PROJECTS+="${sep}\"$d\""
61+
sep=","
62+
fi
63+
done
64+
PROJECTS+="]"
65+
fi
66+
67+
echo "matrix=$PROJECTS" >> $GITHUB_OUTPUT
68+
echo "Projects to build: $PROJECTS"
69+
70+
check-dockerfile:
71+
needs: detect-changes
72+
runs-on: ubuntu-latest
73+
strategy:
74+
matrix:
75+
project: ${{ fromJson(needs.detect-changes.outputs.matrix) }}
76+
outputs:
77+
dockerfile_exists: ${{ steps.check-dockerfile.outputs.dockerfile_exists }}
78+
steps:
79+
- name: Checkout code
80+
uses: actions/checkout@v3
81+
with:
82+
fetch-depth: 0
83+
84+
- name: Check for Dockerfile.codespace
85+
id: check-dockerfile
86+
run: |
87+
if [ -f "${{ matrix.project }}/Dockerfile.codespace" ]; then
88+
echo "dockerfile_exists=true" >> $GITHUB_OUTPUT
89+
else
90+
echo "dockerfile_exists=false" >> $GITHUB_OUTPUT
91+
fi
92+
93+
generate-dockerfile:
94+
needs: [detect-changes, check-dockerfile]
95+
if: needs.check-dockerfile.outputs.dockerfile_exists == 'false'
96+
runs-on: ubuntu-latest
97+
strategy:
98+
matrix:
99+
project: ${{ fromJson(needs.detect-changes.outputs.matrix) }}
100+
steps:
101+
- name: Checkout code
102+
uses: actions/checkout@v3
103+
with:
104+
fetch-depth: 0
105+
106+
- name: Set up Python
107+
uses: actions/setup-python@v4
108+
with:
109+
python-version: "3.10"
110+
111+
- name: Generate Dockerfile.codespace
112+
id: generate-dockerfile
113+
run: |
114+
python scripts/generate_codespace_dockerfile.py "${{ matrix.project }}"
115+
echo "Generated Dockerfile.codespace for ${{ matrix.project }}"
116+
117+
- name: Create Pull Request for new Dockerfile
118+
uses: peter-evans/create-pull-request@v5
119+
with:
120+
token: ${{ secrets.GITHUB_TOKEN }}
121+
commit-message: "Auto-generate Dockerfile.codespace for ${{ matrix.project }}"
122+
title: "Auto-generate Dockerfile.codespace for ${{ matrix.project }}"
123+
body: |
124+
This PR adds a generated Dockerfile.codespace for the ${{ matrix.project }} project.
125+
126+
Please review the changes and merge if they look good.
127+
128+
Once merged, the Docker image will be built and pushed automatically.
129+
branch: "auto-dockerfile-${{ matrix.project }}"
130+
base: main
131+
labels: |
132+
automated-pr
133+
dockerfile
134+
codespace
135+
136+
build-and-push:
137+
needs: [detect-changes, check-dockerfile]
138+
if: needs.check-dockerfile.outputs.dockerfile_exists == 'true'
139+
runs-on: ubuntu-latest
140+
strategy:
141+
matrix:
142+
project: ${{ fromJson(needs.detect-changes.outputs.matrix) }}
143+
steps:
144+
- name: Checkout code
145+
uses: actions/checkout@v3
146+
with:
147+
fetch-depth: 0
148+
149+
# Generate timestamp for image tag
150+
- name: Generate timestamp
151+
id: timestamp
152+
run: echo "timestamp=$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT
153+
154+
- name: Set up Docker Buildx
155+
uses: docker/setup-buildx-action@v2
156+
157+
- name: Login to DockerHub
158+
uses: docker/login-action@v2
159+
with:
160+
username: ${{ secrets.DOCKERHUB_USERNAME }}
161+
password: ${{ secrets.DOCKERHUB_PASSWORD }}
162+
163+
- name: Build and push
164+
uses: docker/build-push-action@v4
165+
with:
166+
context: .
167+
file: ${{ matrix.project }}/Dockerfile.codespace
168+
push: true
169+
tags: zenmldocker/projects-${{ matrix.project }}:${{ steps.timestamp.outputs.timestamp }}
170+
cache-from: type=gha
171+
cache-to: type=gha,mode=max

generate_zenml_project.py

Lines changed: 0 additions & 138 deletions
This file was deleted.

omni-reader/Dockerfile.codespace

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Sandbox base image
2+
FROM zenmldocker/zenml-sandbox:latest
3+
4+
# Install uv from official distroless image
5+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
6+
7+
# Set uv environment variables for optimization
8+
ENV UV_SYSTEM_PYTHON=1
9+
ENV UV_COMPILE_BYTECODE=1
10+
11+
# Project metadata
12+
LABEL project_name="omni-reader"
13+
LABEL project_version="0.1.0"
14+
15+
# Install dependencies with uv and cache optimization
16+
RUN --mount=type=cache,target=/root/.cache/uv \
17+
uv pip install --system \
18+
"instructor" \
19+
"jiwer" \
20+
"jiter" \
21+
"importlib-metadata<7.0,>=1.4.0" \
22+
"litellm" \
23+
"mistralai==1.0.3" \
24+
"numpy<2.0,>=1.9.0" \
25+
"openai==1.69.0" \
26+
"Pillow==11.1.0" \
27+
"polars-lts-cpu==1.26.0" \
28+
"pyarrow>=7.0.0" \
29+
"python-dotenv" \
30+
"streamlit==1.44.0" \
31+
"pydantic>=2.8.2,<2.9.0" \
32+
"tqdm==4.66.4" \
33+
"zenml>=0.80.0"
34+
35+
# Set workspace directory
36+
WORKDIR /workspace
37+
38+
# Clone only the project directory and reorganize
39+
RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
40+
cp -r /tmp/zenml-projects/omni-reader/* /workspace/ && \
41+
rm -rf /tmp/zenml-projects
42+
43+
# VSCode settings
44+
RUN mkdir -p /workspace/.vscode && \
45+
printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json
46+
47+
# Copy .env.example
48+
COPY .env.example /workspace/.env
49+
ENV POLARS_SKIP_CPU_CHECK=1

0 commit comments

Comments
 (0)