Skip to content

Commit 9537135

Browse files
committed
infra: Add Docker and Kubernetes deployment configurations
Add containerization and orchestration support: - General-purpose Dockerfile for amp-python - Snowflake-specific Dockerfile with parallel loader - GitHub Actions workflow for automated Docker publishing to ghcr.io - Kubernetes deployment manifest for GKE with resource limits - Comprehensive .dockerignore and .gitignore Docker images: - amp-python: Base image with all loaders - amp-snowflake: Optimized for Snowflake parallel loading - Includes snowflake_parallel_loader.py as entrypoint - Pre-configured with Snowflake connector and dependencies
1 parent 0dbce8a commit 9537135

File tree

6 files changed

+498
-0
lines changed

6 files changed

+498
-0
lines changed

.dockerignore

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
*.egg
8+
*.egg-info/
9+
dist/
10+
build/
11+
.eggs/
12+
13+
# Virtual environments
14+
.venv/
15+
venv/
16+
ENV/
17+
env/
18+
19+
# IDE
20+
.vscode/
21+
.idea/
22+
*.swp
23+
*.swo
24+
*~
25+
.DS_Store
26+
27+
# Testing
28+
.pytest_cache/
29+
.coverage
30+
.coverage.*
31+
htmlcov/
32+
.tox/
33+
*.cover
34+
35+
# Notebooks
36+
notebooks/
37+
*.ipynb
38+
.ipynb_checkpoints
39+
40+
# Documentation
41+
docs/
42+
*.md
43+
!README.md
44+
!DOCKER_DEPLOY.md
45+
46+
# Git
47+
.git/
48+
.gitignore
49+
.gitattributes
50+
51+
# CI/CD
52+
.github/
53+
.gitlab-ci.yml
54+
55+
# Local test data and logs
56+
tests/
57+
*.log
58+
/tmp/
59+
.test.env
60+
61+
# UV/pip cache
62+
.uv/
63+
uv.lock
64+
65+
# Docker
66+
Dockerfile*
67+
docker-compose*.yml
68+
.dockerignore
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
name: Build and Push Docker Images
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- 'src/**'
9+
- 'apps/**'
10+
- 'data/**'
11+
- 'Dockerfile*'
12+
- 'pyproject.toml'
13+
- '.github/workflows/docker-publish.yml'
14+
pull_request:
15+
branches:
16+
- main
17+
workflow_dispatch: # Allow manual trigger
18+
inputs:
19+
tag:
20+
description: 'Docker image tag suffix (default: latest)'
21+
required: false
22+
default: 'latest'
23+
24+
env:
25+
REGISTRY: ghcr.io
26+
IMAGE_NAME: ${{ github.repository }}
27+
28+
jobs:
29+
build-and-push:
30+
runs-on: ubuntu-latest
31+
permissions:
32+
contents: read
33+
packages: write
34+
35+
strategy:
36+
matrix:
37+
include:
38+
- dockerfile: Dockerfile
39+
suffix: ""
40+
description: "Full image with all loader dependencies"
41+
- dockerfile: Dockerfile.snowflake
42+
suffix: "-snowflake"
43+
description: "Snowflake-only image (minimal dependencies)"
44+
45+
steps:
46+
- name: Checkout repository
47+
uses: actions/checkout@v4
48+
49+
- name: Set up Docker Buildx
50+
uses: docker/setup-buildx-action@v3
51+
52+
- name: Log in to GitHub Container Registry
53+
uses: docker/login-action@v3
54+
with:
55+
registry: ${{ env.REGISTRY }}
56+
username: ${{ github.actor }}
57+
password: ${{ secrets.GITHUB_TOKEN }}
58+
59+
- name: Extract metadata for Docker
60+
id: meta
61+
uses: docker/metadata-action@v5
62+
with:
63+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
64+
flavor: |
65+
suffix=${{ matrix.suffix }},onlatest=true
66+
tags: |
67+
type=ref,event=branch
68+
type=ref,event=pr
69+
type=semver,pattern={{version}}
70+
type=semver,pattern={{major}}.{{minor}}
71+
type=sha,prefix=sha-
72+
type=raw,value=latest,enable={{is_default_branch}}
73+
74+
- name: Build and push Docker image (${{ matrix.description }})
75+
uses: docker/build-push-action@v5
76+
with:
77+
context: .
78+
file: ./${{ matrix.dockerfile }}
79+
push: ${{ github.event_name != 'pull_request' }}
80+
tags: ${{ steps.meta.outputs.tags }}
81+
labels: ${{ steps.meta.outputs.labels }}
82+
cache-from: type=gha,scope=${{ matrix.dockerfile }}
83+
cache-to: type=gha,mode=max,scope=${{ matrix.dockerfile }}
84+
platforms: linux/amd64,linux/arm64
85+
86+
- name: Image digest
87+
run: |
88+
echo "### ${{ matrix.description }}" >> $GITHUB_STEP_SUMMARY
89+
echo "Digest: ${{ steps.meta.outputs.digest }}" >> $GITHUB_STEP_SUMMARY
90+
echo "Tags: ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY

.gitignore

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Environment files
2+
.env
3+
.test.env
4+
*.env
5+
6+
# Kubernetes secrets (NEVER commit these!)
7+
k8s/secret.yaml
8+
k8s/secrets.yaml
9+
10+
# Python
11+
__pycache__/
12+
*.py[cod]
13+
*$py.class
14+
*.so
15+
.Python
16+
*.egg
17+
*.egg-info/
18+
dist/
19+
build/
20+
.eggs/
21+
22+
# Virtual environments
23+
.venv/
24+
venv/
25+
ENV/
26+
env/
27+
28+
# IDE
29+
.vscode/
30+
.idea/
31+
*.swp
32+
*.swo
33+
*~
34+
.DS_Store
35+
36+
# Testing
37+
.pytest_cache/
38+
.coverage
39+
.coverage.*
40+
htmlcov/
41+
.tox/
42+
*.cover
43+
.hypothesis/
44+
45+
# Notebooks
46+
.ipynb_checkpoints/
47+
48+
# Logs
49+
*.log
50+
/tmp/
51+
52+
# UV/pip cache
53+
.uv/
54+
uv.lock
55+
56+
# Data directories (local development)
57+
data/*.csv
58+
data/*.parquet
59+
data/*.db
60+
data/*.lmdb
61+
62+
# Build artifacts
63+
*.tar.gz
64+
*.zip

Dockerfile

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Multi-stage build for optimized image size
2+
# Stage 1: Build dependencies
3+
FROM python:3.12-slim AS builder
4+
5+
# Install system dependencies
6+
RUN apt-get update && apt-get install -y --no-install-recommends \
7+
build-essential \
8+
curl \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
# Install UV for fast dependency management
12+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
13+
14+
# Set working directory
15+
WORKDIR /app
16+
17+
# Copy dependency files
18+
COPY pyproject.toml README.md ./
19+
20+
# Install dependencies using UV (much faster than pip)
21+
# Install ALL dependencies including all loader dependencies
22+
# This ensures optional dependencies don't cause import errors
23+
RUN uv pip install --system --no-cache \
24+
pandas>=2.3.1 \
25+
pyarrow>=20.0.0 \
26+
typer>=0.15.2 \
27+
adbc-driver-manager>=1.5.0 \
28+
adbc-driver-postgresql>=1.5.0 \
29+
protobuf>=4.21.0 \
30+
base58>=2.1.1 \
31+
'eth-hash[pysha3]>=0.7.1' \
32+
eth-utils>=5.2.0 \
33+
google-cloud-bigquery>=3.30.0 \
34+
google-cloud-storage>=3.1.0 \
35+
arro3-core>=0.5.1 \
36+
arro3-compute>=0.5.1 \
37+
psycopg2-binary>=2.9.0 \
38+
redis>=4.5.0 \
39+
deltalake>=1.0.2 \
40+
'pyiceberg[sql-sqlite]>=0.10.0' \
41+
'pydantic>=2.0,<2.12' \
42+
snowflake-connector-python>=4.0.0 \
43+
snowpipe-streaming>=1.0.0 \
44+
lmdb>=1.4.0
45+
46+
# Stage 2: Runtime image
47+
FROM python:3.12-slim
48+
49+
# Install runtime dependencies only
50+
RUN apt-get update && apt-get install -y --no-install-recommends \
51+
libpq5 \
52+
&& rm -rf /var/lib/apt/lists/*
53+
54+
# Create non-root user for security
55+
RUN useradd -m -u 1000 amp && \
56+
mkdir -p /app /data && \
57+
chown -R amp:amp /app /data
58+
59+
# Set working directory
60+
WORKDIR /app
61+
62+
# Copy Python packages from builder
63+
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
64+
65+
# Copy UV from builder for package installation
66+
COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv
67+
68+
# Copy application code
69+
COPY --chown=amp:amp src/ ./src/
70+
COPY --chown=amp:amp apps/ ./apps/
71+
COPY --chown=amp:amp data/ ./data/
72+
COPY --chown=amp:amp pyproject.toml README.md ./
73+
74+
# Install the amp package in the system Python (NOT editable for Docker)
75+
RUN uv pip install --system --no-cache .
76+
77+
# Switch to non-root user
78+
USER amp
79+
80+
# Set Python path
81+
ENV PYTHONPATH=/app
82+
ENV PYTHONUNBUFFERED=1
83+
84+
# Health check
85+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
86+
CMD python -c "import sys; sys.exit(0)"
87+
88+
# Default command - run ERC20 loader
89+
# Can be overridden with docker run arguments
90+
ENTRYPOINT ["python", "apps/test_erc20_labeled_parallel.py"]
91+
CMD ["--blocks", "100000", "--workers", "8", "--flush-interval", "0.5"]

0 commit comments

Comments
 (0)