diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ed56dda --- /dev/null +++ b/.dockerignore @@ -0,0 +1,71 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +.venv/ +venv/ +ENV/ +env/ + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +frontend/build/ +frontend/node_modules/ +website/node_modules/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# Documentation +docs/_build/ +*.md +!README.md + +# Git +.git/ +.gitignore + +# Docker +Dockerfile* +docker-compose*.yml +.dockerignore + +# Data +data/*.db +data/*.db-journal + +# Logs +*.log + +# Environment +.env +.env.local +.env.*.local + +# OS +.DS_Store +Thumbs.db + +# Misc +*.bak +*.tmp +.cache/ + diff --git a/DOCKER.md b/DOCKER.md new file mode 100644 index 0000000..2db4a58 --- /dev/null +++ b/DOCKER.md @@ -0,0 +1,79 @@ +# Docker Setup + +This document describes how to run DataGenFlow using Docker. + +## Quick Start + +1. **Build and start the application:** + + ```bash + docker-compose up -d + ``` + +2. **Access the application:** + - Frontend: + - API: + +3. **Stop the application:** + + ```bash + docker-compose down + ``` + +## Custom Blocks + +Custom blocks can be added to `lib/blocks/custom/` on your host system. They will be automatically available after restarting the backend container: + +```bash +docker-compose restart backend +``` + +The `lib/blocks/custom/` directory is mounted as a volume, so you can add new block files directly from your host system without rebuilding the image. + +## Environment Variables + +You can configure the application using environment variables. Create a `.env` file in the project root: + +```env +LLM_ENDPOINT=http://localhost:11434/api/generate +LLM_API_KEY= +LLM_MODEL=llama3 +DEBUG=false +``` + +These variables are automatically passed to the container via `docker-compose.yml`. + +## Data Persistence + +The `data/` directory is mounted as a volume, so your database and other data will persist between container restarts. + +## Building Images + +To rebuild the images: + +```bash +docker-compose build +``` + +Or rebuild without cache: + +```bash +docker-compose build --no-cache +``` + +## Development + +For development, you may want to mount additional directories or use volume mounts for live code reloading. Modify `docker-compose.yml` as needed. + +## Architecture + +- **Backend**: Python 3.11 with uv, serves both API and frontend +- **Frontend**: Built with yarn/vite, served as static files by the backend +- **Port**: 8000 (both API and frontend) + +The backend Dockerfile: + +- Uses multi-stage builds for optimization +- Compiles Python bytecode for faster startup +- Builds the frontend and includes it in the final image +- Serves the frontend at the root path via FastAPI diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..29607a4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,77 @@ +# Backend Dockerfile using uv +FROM python:3.10-slim AS builder + +# Install uv +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +# Set working directory +WORKDIR /app + +# Copy dependency files +COPY pyproject.toml uv.lock ./ + +# Install dependencies using uv +RUN uv sync --frozen --no-dev + +# Copy application code +COPY . . + +# Compile Python bytecode for faster startup +# Compile all Python files recursively in lib/ and root +RUN python -m compileall -b -q -r lib/ && \ + python -m compileall -b -q app.py config.py models.py mock_llm.py debug_pipeline.py 2>/dev/null || true + +# Frontend build stage +FROM node:20-alpine AS frontend-builder + +WORKDIR /app + +# Copy frontend package files +COPY frontend/package.json frontend/yarn.lock ./ + +# Install dependencies +RUN yarn install --frozen-lockfile + +# Copy frontend source +COPY frontend/ ./ + +# Build the frontend +RUN yarn build + +# Production stage +FROM python:3.10-slim + +WORKDIR /app + +# Install uv in production image +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +# Copy virtual environment from builder +COPY --from=builder /app/.venv /app/.venv + +# Copy application code and compiled bytecode +# Copy lib directory (includes templates, blocks, etc. and their .pyc files) +COPY --from=builder /app/lib /app/lib +# Copy main application files (including .pyc files if they exist) +COPY --from=builder /app/app.py* /app/ +COPY --from=builder /app/config.py* /app/ +COPY --from=builder /app/models.py* /app/ +COPY --from=builder /app/pyproject.toml /app/pyproject.toml + +# Copy built frontend from frontend-builder +COPY --from=frontend-builder /app/build /app/frontend/build + +# Create data directory and ensure custom blocks directory exists +RUN mkdir -p /app/data /app/lib/blocks/custom + +# Set environment variables +ENV PATH="/app/.venv/bin:$PATH" +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["uv", "run", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] + diff --git a/Makefile b/Makefile index 6e45717..3a803a8 100644 --- a/Makefile +++ b/Makefile @@ -24,11 +24,11 @@ check-deps: @echo "✅ All dependencies are installed" install: check-deps - uv pip install -e . + uv venv && uv sync cd frontend && yarn install dev: check-deps - uv pip install -e ".[dev]" + uv venv && uv sync --extra dev cd frontend && yarn install dev-ui: diff --git a/app.py b/app.py index 9e17d23..41b6725 100644 --- a/app.py +++ b/app.py @@ -35,6 +35,11 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: app = FastAPI(title="DataGenFlow", version="0.1.0", lifespan=lifespan) +@app.get("/health") +async def health() -> dict[str, str]: + return {"status": "healthy"} + + @app.post("/generate_from_file") async def generate_from_file( file: UploadFile = File(...), pipeline_id: int = Form(...) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ca1f5f5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +services: + backend: + build: + context: . + dockerfile: Dockerfile + container_name: datagenflow + ports: + - "8000:8000" + env_file: + - .env + volumes: + # Mount data directory for persistence + - ./data:/app/data + # Mount custom blocks directory for hot-reloading (restart required for new blocks) + - ./lib/blocks/custom:/app/lib/blocks/custom + restart: unless-stopped + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health')"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + +volumes: + data: +