Skip to content

Commit b79a78d

Browse files
Xunzhuorootfs
andauthored
feat: containerize and auto-release llm-katan (#259)
Signed-off-by: bitliu <[email protected]> Co-authored-by: Huamin Chen <[email protected]>
1 parent fe60472 commit b79a78d

File tree

7 files changed

+283
-6
lines changed

7 files changed

+283
-6
lines changed

.github/workflows/docker-publish.yml

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Create and publish Docker image for extproc
1+
name: Create and publish Docker images
22

33
on:
44
workflow_dispatch:
@@ -18,7 +18,7 @@ on:
1818
branches: [ "main" ]
1919

2020
jobs:
21-
build_and_push:
21+
build_and_push_extproc:
2222
runs-on: ubuntu-latest
2323
permissions:
2424
contents: read
@@ -43,7 +43,7 @@ jobs:
4343
- name: Set lowercase repository owner
4444
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
4545

46-
- name: Build and push Docker image
46+
- name: Build and push extproc Docker image
4747
uses: docker/build-push-action@v5
4848
with:
4949
context: .
@@ -52,3 +52,45 @@ jobs:
5252
tags: |
5353
${{ inputs.is_nightly == true && format('ghcr.io/{0}/semantic-router/extproc:nightly-{1}', env.REPOSITORY_OWNER_LOWER, steps.date.outputs.date_tag) || format('ghcr.io/{0}/semantic-router/extproc:{1}', env.REPOSITORY_OWNER_LOWER, github.sha) }}
5454
${{ inputs.is_nightly != true && format('ghcr.io/{0}/semantic-router/extproc:latest', env.REPOSITORY_OWNER_LOWER) || '' }}
55+
56+
build_and_push_llm_katan:
57+
runs-on: ubuntu-latest
58+
permissions:
59+
contents: read
60+
packages: write
61+
62+
steps:
63+
- name: Check out the repo
64+
uses: actions/checkout@v4
65+
66+
- name: Log in to GitHub Container Registry
67+
uses: docker/login-action@v3
68+
with:
69+
registry: ghcr.io
70+
username: ${{ github.actor }}
71+
password: ${{ secrets.GITHUB_TOKEN }}
72+
73+
- name: Generate date tag for nightly builds
74+
id: date
75+
if: inputs.is_nightly == true
76+
run: echo "date_tag=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
77+
78+
- name: Set lowercase repository owner
79+
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
80+
81+
- name: Extract version from pyproject.toml
82+
id: version
83+
run: |
84+
VERSION=$(grep '^version = ' e2e-tests/llm-katan/pyproject.toml | sed 's/version = "\(.*\)"/\1/')
85+
echo "version=$VERSION" >> $GITHUB_OUTPUT
86+
87+
- name: Build and push llm-katan Docker image
88+
uses: docker/build-push-action@v5
89+
with:
90+
context: ./e2e-tests/llm-katan
91+
file: ./e2e-tests/llm-katan/Dockerfile
92+
push: ${{ github.event_name != 'pull_request' }} # Only push on merge to main, not on PRs
93+
tags: |
94+
${{ inputs.is_nightly == true && format('ghcr.io/{0}/semantic-router/llm-katan:nightly-{1}', env.REPOSITORY_OWNER_LOWER, steps.date.outputs.date_tag) || format('ghcr.io/{0}/semantic-router/llm-katan:{1}', env.REPOSITORY_OWNER_LOWER, github.sha) }}
95+
${{ inputs.is_nightly != true && format('ghcr.io/{0}/semantic-router/llm-katan:latest', env.REPOSITORY_OWNER_LOWER) || '' }}
96+
${{ inputs.is_nightly != true && format('ghcr.io/{0}/semantic-router/llm-katan:v{1}', env.REPOSITORY_OWNER_LOWER, steps.version.outputs.version) || '' }}

.github/workflows/docker-release.yml

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
name: Create and publish Docker release image
1+
name: Create and publish Docker release images
22

33
on:
44
push:
55
tags:
66
- 'v*' # Triggers on version tags like v1.0.0, v2.1.3, etc.
77

88
jobs:
9-
build_and_push:
9+
build_and_push_extproc:
1010
runs-on: ubuntu-latest
1111
permissions:
1212
contents: read
@@ -30,7 +30,7 @@ jobs:
3030
username: ${{ github.actor }}
3131
password: ${{ secrets.GITHUB_TOKEN }}
3232

33-
- name: Build and push Docker image
33+
- name: Build and push extproc Docker image
3434
uses: docker/build-push-action@v5
3535
with:
3636
context: .
@@ -39,3 +39,44 @@ jobs:
3939
tags: |
4040
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc:${{ steps.extract_tag.outputs.tag }}
4141
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc:latest
42+
43+
build_and_push_llm_katan:
44+
runs-on: ubuntu-latest
45+
permissions:
46+
contents: read
47+
packages: write
48+
49+
steps:
50+
- name: Check out the repo
51+
uses: actions/checkout@v4
52+
53+
- name: Extract tag name
54+
id: extract_tag
55+
run: echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
56+
57+
- name: Set lowercase repository owner
58+
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
59+
60+
- name: Log in to GitHub Container Registry
61+
uses: docker/login-action@v3
62+
with:
63+
registry: ghcr.io
64+
username: ${{ github.actor }}
65+
password: ${{ secrets.GITHUB_TOKEN }}
66+
67+
- name: Extract version from pyproject.toml
68+
id: version
69+
run: |
70+
VERSION=$(grep '^version = ' e2e-tests/llm-katan/pyproject.toml | sed 's/version = "\(.*\)"/\1/')
71+
echo "version=$VERSION" >> $GITHUB_OUTPUT
72+
73+
- name: Build and push llm-katan Docker image
74+
uses: docker/build-push-action@v5
75+
with:
76+
context: ./e2e-tests/llm-katan
77+
file: ./e2e-tests/llm-katan/Dockerfile
78+
push: true
79+
tags: |
80+
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/llm-katan:${{ steps.extract_tag.outputs.tag }}
81+
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/llm-katan:v${{ steps.version.outputs.version }}
82+
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/llm-katan:latest

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ _run:
1414
-f tools/make/milvus.mk \
1515
-f tools/make/models.mk \
1616
-f tools/make/pre-commit.mk \
17+
-f tools/make/docker.mk \
1718
-f tools/make/kube.mk \
1819
$(MAKECMDGOALS)
1920

docker-compose.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,21 @@ services:
9393
networks:
9494
- semantic-network
9595

96+
# LLM Katan service for testing
97+
llm-katan:
98+
build:
99+
context: ./e2e-tests/llm-katan
100+
dockerfile: Dockerfile
101+
container_name: llm-katan
102+
profiles: ["testing", "llm-katan"]
103+
ports:
104+
- "8002:8000"
105+
environment:
106+
- HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-}
107+
networks:
108+
- semantic-network
109+
command: ["llm-katan", "--model", "Qwen/Qwen3-0.6B", "--host", "0.0.0.0", "--port", "8000"]
110+
96111
networks:
97112
semantic-network:
98113
driver: bridge

e2e-tests/llm-katan/Dockerfile

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# LLM Katan Dockerfile
2+
# Lightweight LLM Server for Testing
3+
FROM python:3.11-slim
4+
5+
LABEL maintainer="vLLM Semantic Router Team"
6+
LABEL description="LLM Katan - Lightweight LLM Server for Testing"
7+
LABEL version="0.1.8"
8+
9+
# Set working directory
10+
WORKDIR /app
11+
12+
# Install system dependencies
13+
RUN apt-get update && apt-get install -y --no-install-recommends \
14+
curl \
15+
git \
16+
&& rm -rf /var/lib/apt/lists/*
17+
18+
# Copy requirements first for better layer caching
19+
COPY requirements.txt ./
20+
RUN pip install --no-cache-dir -r requirements.txt
21+
22+
# Copy the llm_katan package
23+
COPY llm_katan/ ./llm_katan/
24+
COPY pyproject.toml ./
25+
COPY README.md ./
26+
27+
# Install the package in development mode
28+
RUN pip install -e .
29+
30+
# Create a non-root user for security
31+
RUN useradd --create-home --shell /bin/bash llmkatan
32+
USER llmkatan
33+
34+
# Set environment variables
35+
ENV PYTHONUNBUFFERED=1
36+
ENV PYTHONDONTWRITEBYTECODE=1
37+
38+
# Expose the default port
39+
EXPOSE 8000
40+
41+
# Default command - can be overridden
42+
CMD ["llm-katan", "--model", "Qwen/Qwen3-0.6B", "--host", "0.0.0.0", "--port", "8000"]

e2e-tests/llm-katan/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,24 @@ designed for testing and development with real tiny models.
2020

2121
### Installation
2222

23+
#### Option 1: PyPI
24+
2325
```bash
2426
pip install llm-katan
2527
```
2628

29+
#### Option 2: Docker
30+
31+
```bash
32+
# Pull and run the latest Docker image
33+
docker pull ghcr.io/vllm-project/semantic-router/llm-katan:latest
34+
docker run -p 8000:8000 ghcr.io/vllm-project/semantic-router/llm-katan:latest
35+
36+
# Or with custom model
37+
docker run -p 8000:8000 ghcr.io/vllm-project/semantic-router/llm-katan:latest \
38+
llm-katan --served-model-name "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
39+
```
40+
2741
### Setup
2842

2943
#### HuggingFace Token (Required)

tools/make/docker.mk

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# ======== docker.mk ========
2+
# = Docker build and management =
3+
# ======== docker.mk ========
4+
5+
# Docker image tags
6+
DOCKER_REGISTRY ?= ghcr.io/vllm-project/semantic-router
7+
DOCKER_TAG ?= latest
8+
9+
# Build all Docker images
10+
docker-build-all: docker-build-extproc docker-build-llm-katan docker-build-precommit
11+
@$(LOG_TARGET)
12+
@echo "All Docker images built successfully"
13+
14+
# Build extproc Docker image
15+
docker-build-extproc:
16+
@$(LOG_TARGET)
17+
@echo "Building extproc Docker image..."
18+
@$(CONTAINER_RUNTIME) build -f Dockerfile.extproc -t $(DOCKER_REGISTRY)/extproc:$(DOCKER_TAG) .
19+
20+
# Build llm-katan Docker image
21+
docker-build-llm-katan:
22+
@$(LOG_TARGET)
23+
@echo "Building llm-katan Docker image..."
24+
@$(CONTAINER_RUNTIME) build -f e2e-tests/llm-katan/Dockerfile -t $(DOCKER_REGISTRY)/llm-katan:$(DOCKER_TAG) e2e-tests/llm-katan/
25+
26+
# Build precommit Docker image
27+
docker-build-precommit:
28+
@$(LOG_TARGET)
29+
@echo "Building precommit Docker image..."
30+
@$(CONTAINER_RUNTIME) build -f Dockerfile.precommit -t $(DOCKER_REGISTRY)/precommit:$(DOCKER_TAG) .
31+
32+
# Test llm-katan Docker image locally
33+
docker-test-llm-katan:
34+
@$(LOG_TARGET)
35+
@echo "Testing llm-katan Docker image..."
36+
@curl -f http://localhost:8000/v1/models || (echo "Models endpoint failed" && exit 1)
37+
@echo "\n✅ llm-katan Docker image test passed"
38+
39+
# Run llm-katan Docker image locally
40+
docker-run-llm-katan: docker-build-llm-katan
41+
@$(LOG_TARGET)
42+
@echo "Running llm-katan Docker image on port 8000..."
43+
@echo "Access the server at: http://localhost:8000"
44+
@echo "Press Ctrl+C to stop"
45+
@$(CONTAINER_RUNTIME) run --rm -p 8000:8000 $(DOCKER_REGISTRY)/llm-katan:$(DOCKER_TAG)
46+
47+
# Run llm-katan with custom served model name
48+
docker-run-llm-katan-custom:
49+
@$(LOG_TARGET)
50+
@echo "Running llm-katan with custom served model name..."
51+
@echo "Usage: make docker-run-llm-katan-custom SERVED_NAME=your-served-model-name"
52+
@if [ -z "$(SERVED_NAME)" ]; then \
53+
echo "Error: SERVED_NAME variable is required"; \
54+
echo "Example: make docker-run-llm-katan-custom SERVED_NAME=claude-3-haiku"; \
55+
exit 1; \
56+
fi
57+
@$(CONTAINER_RUNTIME) run --rm -p 8000:8000 $(DOCKER_REGISTRY)/llm-katan:$(DOCKER_TAG) \
58+
llm-katan --model "Qwen/Qwen3-0.6B" --served-model-name "$(SERVED_NAME)" --host 0.0.0.0 --port 8000
59+
60+
# Clean up Docker images
61+
docker-clean:
62+
@$(LOG_TARGET)
63+
@echo "Cleaning up Docker images..."
64+
@$(CONTAINER_RUNTIME) image prune -f
65+
@echo "Docker cleanup completed"
66+
67+
# Push Docker images (for CI/CD)
68+
docker-push-all: docker-push-extproc docker-push-llm-katan
69+
@$(LOG_TARGET)
70+
@echo "All Docker images pushed successfully"
71+
72+
docker-push-extproc:
73+
@$(LOG_TARGET)
74+
@echo "Pushing extproc Docker image..."
75+
@$(CONTAINER_RUNTIME) push $(DOCKER_REGISTRY)/extproc:$(DOCKER_TAG)
76+
77+
docker-push-llm-katan:
78+
@$(LOG_TARGET)
79+
@echo "Pushing llm-katan Docker image..."
80+
@$(CONTAINER_RUNTIME) push $(DOCKER_REGISTRY)/llm-katan:$(DOCKER_TAG)
81+
82+
# Docker compose shortcuts
83+
docker-compose-up:
84+
@$(LOG_TARGET)
85+
@echo "Starting services with docker-compose..."
86+
@docker compose up --build
87+
88+
docker-compose-up-testing:
89+
@$(LOG_TARGET)
90+
@echo "Starting services with testing profile..."
91+
@docker compose --profile testing up --build
92+
93+
docker-compose-up-llm-katan:
94+
@$(LOG_TARGET)
95+
@echo "Starting services with llm-katan profile..."
96+
@docker compose --profile llm-katan up --build
97+
98+
docker-compose-down:
99+
@$(LOG_TARGET)
100+
@echo "Stopping docker-compose services..."
101+
@docker compose down
102+
103+
# Help target for Docker commands
104+
docker-help:
105+
@echo "Docker Make Targets:"
106+
@echo " docker-build-all - Build all Docker images"
107+
@echo " docker-build-extproc - Build extproc Docker image"
108+
@echo " docker-build-llm-katan - Build llm-katan Docker image"
109+
@echo " docker-build-precommit - Build precommit Docker image"
110+
@echo " docker-test-llm-katan - Test llm-katan Docker image"
111+
@echo " docker-run-llm-katan - Run llm-katan Docker image locally"
112+
@echo " docker-run-llm-katan-custom SERVED_NAME=name - Run with custom served model name"
113+
@echo " docker-clean - Clean up Docker images"
114+
@echo " docker-compose-up - Start docker-compose services"
115+
@echo " docker-compose-up-testing - Start with testing profile"
116+
@echo " docker-compose-up-llm-katan - Start with llm-katan profile"
117+
@echo " docker-compose-down - Stop docker-compose services"
118+
@echo ""
119+
@echo "Environment Variables:"
120+
@echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)"
121+
@echo " DOCKER_TAG - Docker tag (default: latest)"
122+
@echo " SERVED_NAME - Served model name for custom runs"

0 commit comments

Comments
 (0)