docker (#11)

david20571015 · web-flow · commit 840481c8a2a2 · 2025-03-11T02:32:07.000+08:00
* fix(serve): reduce server shutdown timeout from 3 seconds to 1 second for quicker graceful shutdown

* fix(gen_protos): update command to use python3 and add main guard for script execution

* feat(docker): add Docker files

* docs(docker): enhance documentation with overview, installation, usage instructions, and Docker setup
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,58 @@
+# Git
+.git/
+.gitignore
+.gitmodules
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.venv/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Cache
+.mypy_cache/
+.pytest_cache/
+.coverage
+htmlcov/
+.cache/
+
+# Build artifacts
+build/
+dist/
+*.manifest
+*.spec
+
+# Logs
+*.log
+logs/
+
+# Local development files
+.pytest_cache
+.coverage
+*.swp
+.DS_Store
+
+# IDE
+.idea/
+.vscode/
+*.sublime-project
+*.sublime-workspace
+
+# Project specific
+.python-version
+.pre-commit-config.yaml
+.github/
+
+# Environment
+.env
+.env.*
+env/
+
+# Generated files
+llm_backend/protos/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,35 @@
+FROM ghcr.io/astral-sh/uv:bookworm-slim AS builder
+
+ENV UV_COMPILE_BYTECODE=1\
+    UV_LINK_MODE=copy \
+    UV_PYTHON_INSTALL_DIR=/python \
+    UV_PYTHON_PREFERENCE=only-managed
+
+RUN uv python install 3.12
+
+WORKDIR /app
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-dev --no-install-project
+
+COPY . /app
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+
+FROM debian:bookworm-slim
+
+COPY --from=builder --chown=python:python /python /python
+COPY --from=builder --chown=app:app /app /app
+
+ENV PATH="/app/.venv/bin:$PATH"
+
+WORKDIR /app
+
+# Generate the protos
+RUN ["python3", "scripts/gen_protos.py"]
+
+# Run the application
+ENTRYPOINT ["python3", "scripts/serve.py", "--config", "configs/config.toml"]
diff --git a/README.md b/README.md
@@ -1,32 +1,70 @@
 # SYNC Server LLM
 
+## Overview
+
+SYNC Server LLM is a gRPC-based server that performs document retrieval and summarization. It leverages Qdrant for vector search and OpenAI models to generate summaries of retrieved content based on user-provided keywords.
+
 ## Installation
 
 ```shell
 git clone --recurse-submodules https://github.com/NCTU-SYNC/sync-server-llm.git
 cd sync-server-llm
 
+uv sync --no-dev --frozen
+
 uv run gen-protos
 ```
 
 ## Usage
 
-Please configure the `configs/config.toml` file.
-The following environment variables are required (`export` them or place them in a `.env` file):
+This section explains how to run the SYNC Server LLM using different methods.
 
-- `OPENAI_API_KEY`: Your ChatGPT API key.
-- `QDRANT_HOST`: The Qdrant host address.
-- `QDRANT_PORT`: The Qdrant host port.
-- `QDRANT_COLLECTION`: The Qdrant collection name.
+1. Configure the server by editing `configs/config.toml`
 
-```shell
-python3 scripts/serve.py --config configs/config.toml
-```
+2. Set up the required environment variables by adding them to a `.env` file
+
+   | Variable            | Description                   |
+   | ------------------- | ----------------------------- |
+   | `OPENAI_API_KEY`    | Your ChatGPT API key          |
+   | `QDRANT_HOST`       | The Qdrant host address       |
+   | `QDRANT_PORT`       | The Qdrant host REST API port |
+   | `QDRANT_COLLECTION` | The Qdrant collection name    |
+
+3. Start the server:
+
+   - To run the server locally:
+
+      ```shell
+      uv run scripts/serve.py --config configs/config.toml
+      ```
+
+   - To run the server using Docker:
+
+      Build the Docker image:
+
+      ```shell
+      docker build -t sync/backend-llm .
+      ```
+
+      Run the container:
+
+      ```shell
+      docker run -p 50051:50051 \
+            --env-file .env \
+            -v $(pwd)/path/to/configs:/app/configs/config.toml \
+            -v $(pwd)/path/to/hf_cache:/tmp/llama_index \
+            sync/backend-llm
+      ```
+
+      > 1. If you are using Windows, you can add `--gpus=all` to the `docker run` command. Ensure that your Docker installation supports GPU usage.
+      > 2. It is strongly recommended to mount the `hf_cache` directory to a persistent volume to avoid re-downloading the Hugging Face models every time the container is started.
+
+## Client Example
 
 You can refer to `scripts/client.py` for an example implementation of a client:
 
 ```shell
-python3 scripts/client.py
+uv run scripts/client.py
 ```
 
 ## Features
diff --git a/scripts/gen_protos.py b/scripts/gen_protos.py
@@ -9,9 +9,7 @@ def generate():
     proto_files = glob.glob(f"{proto_dir}/*.proto")
 
     command = [
-        "uv",
-        "run",
-        "python",
+        "python3",
         "-m",
         "grpc_tools.protoc",
         f"-I{target_dir}={proto_dir}",
@@ -21,3 +19,7 @@ def generate():
     ] + proto_files
 
     subprocess.run(command, shell=False, check=True)
+
+
+if __name__ == "__main__":
+    generate()
diff --git a/scripts/serve.py b/scripts/serve.py
@@ -49,7 +49,7 @@ async def serve(config: Config, logger: logging.Logger):
 
     async def server_graceful_shutdown():
         logging.info("Starting graceful shutdown...")
-        await server.stop(3)
+        await server.stop(1)
 
     _cleanup_coroutines.append(server_graceful_shutdown())