From 929a118398997a7ea66f21a8fb446dc5002ba186 Mon Sep 17 00:00:00 2001 From: Noa Limoy Date: Mon, 8 Dec 2025 13:57:34 +0200 Subject: [PATCH] ci: optimize docker integration tests with minimal compose Replace heavy quickstart.sh full-stack deployment with lightweight CI-specific docker-compose configuration. Changes: - Add docker-compose.ci.yml with only 3 essential services (semantic-router, envoy, llm-katan) instead of 11+ services - Remove UI services (grafana, openwebui, chat-ui, prometheus, jaeger, dashboard, mongo, pipelines) - not needed for CI testing - Replace UI-based validation with simple curl health checks - Add make targets: docker-compose-{up,down,logs,ps}-ci - Reduce CI timeout from 30 to 20 minutes This fixes frequent CI timeouts caused by pulling many heavy container images from multiple registries on GitHub-hosted runners which have no persistent Docker cache. Fixes: #777 Signed-off-by: Noa Limoy --- .github/workflows/integration-test-docker.yml | 119 ++++++++++++++---- deploy/docker-compose/docker-compose.ci.yml | 92 ++++++++++++++ tools/make/docker.mk | 27 ++++ 3 files changed, 212 insertions(+), 26 deletions(-) create mode 100644 deploy/docker-compose/docker-compose.ci.yml diff --git a/.github/workflows/integration-test-docker.yml b/.github/workflows/integration-test-docker.yml index 5b9eb8d9c..7b642fe3f 100644 --- a/.github/workflows/integration-test-docker.yml +++ b/.github/workflows/integration-test-docker.yml @@ -21,10 +21,10 @@ concurrency: cancel-in-progress: true jobs: - test-quickstart: + test-ci-compose: if: github.repository == 'vllm-project/semantic-router' && !github.event.pull_request.draft runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 20 # Reduced from 30 - CI compose is faster steps: - name: Check out the repo @@ -46,33 +46,93 @@ jobs: with: python-version: '3.11' - - name: Install system dependencies + - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y \ - make \ - curl \ - docker-compose + sudo apt-get install -y make curl + pip install huggingface_hub[cli] - - name: Run quickstart script - id: quickstart + - name: Download models run: | - timeout 1200 bash scripts/quickstart.sh || { - exit_code=$? - if [ $exit_code -eq 124 ]; then - echo "::error::Quickstart script timed out after 20 minutes" - else - echo "::error::Quickstart script failed with exit code $exit_code" - fi - exit $exit_code - } + echo "Downloading minimal models for CI..." + make download-models env: CI: true CI_MINIMAL_MODELS: true - TERM: xterm HF_HUB_ENABLE_HF_TRANSFER: 1 HF_HUB_DISABLE_TELEMETRY: 1 + - name: Start CI services + run: | + echo "Starting minimal CI services (semantic-router, envoy, llm-katan)..." + make docker-compose-up-ci + env: + CI: true + + - name: Wait for services to be healthy + run: | + echo "Waiting for services to be healthy..." + max_attempts=60 + attempt=1 + + while [ $attempt -le $max_attempts ]; do + echo "Attempt $attempt/$max_attempts: Checking service health..." + + # Check semantic-router health + if docker ps --filter "name=semantic-router" --filter "health=healthy" --format "{{.Names}}" | grep -q "semantic-router"; then + echo "✅ semantic-router is healthy" + + # Check envoy health + if docker ps --filter "name=envoy-proxy" --filter "health=healthy" --format "{{.Names}}" | grep -q "envoy-proxy"; then + echo "✅ envoy-proxy is healthy" + + # Check llm-katan health + if docker ps --filter "name=llm-katan" --filter "health=healthy" --format "{{.Names}}" | grep -q "llm-katan"; then + echo "✅ llm-katan is healthy" + echo "🎉 All services are healthy!" + exit 0 + fi + fi + fi + + # Show current status + docker ps --format "table {{.Names}}\t{{.Status}}" | grep -E "NAMES|semantic-router|envoy|llm-katan" || true + + sleep 5 + ((attempt++)) + done + + echo "❌ Timeout waiting for services to be healthy" + docker ps -a + exit 1 + + - name: Test semantic router health endpoint + run: | + echo "Testing semantic router health..." + curl -f http://localhost:8080/health || { + echo "❌ Health check failed" + exit 1 + } + echo "✅ Health check passed" + + - name: Test envoy proxy endpoint + run: | + echo "Testing envoy proxy..." + curl -f http://localhost:19000/ready || { + echo "❌ Envoy ready check failed" + exit 1 + } + echo "✅ Envoy is ready" + + - name: Test llm-katan endpoint + run: | + echo "Testing llm-katan..." + curl -f http://localhost:8002/health || { + echo "❌ LLM-Katan health check failed" + exit 1 + } + echo "✅ LLM-Katan is healthy" + - name: Test semantic routing functionality run: | echo "Testing semantic router with a sample query..." @@ -85,24 +145,31 @@ jobs: "temperature": 0.7 }') - echo "Full response: $response" + echo "Response: $response" + + # Verify we got a response + if echo "$response" | grep -q "choices"; then + echo "✅ Chat completions test passed" + else + echo "⚠️ Response may not contain expected fields, but request succeeded" + fi - name: Show service logs on failure if: failure() run: | echo "=== Docker Compose Logs ===" - docker compose -f deploy/docker-compose/docker-compose.yml logs + make docker-compose-logs-ci || docker compose -f deploy/docker-compose/docker-compose.ci.yml logs echo "=== Container Status ===" docker ps -a echo "=== Semantic Router Logs ===" - docker logs semantic-router || true + docker logs semantic-router 2>&1 | tail -100 || true echo "=== Envoy Logs ===" - docker logs envoy-proxy || true - echo "=== Dashboard Logs ===" - docker logs semantic-router-dashboard || true + docker logs envoy-proxy 2>&1 | tail -100 || true + echo "=== LLM-Katan Logs ===" + docker logs llm-katan 2>&1 | tail -100 || true - name: Clean up if: always() run: | - make docker-compose-down || true + make docker-compose-down-ci || true docker system prune -af --volumes || true diff --git a/deploy/docker-compose/docker-compose.ci.yml b/deploy/docker-compose/docker-compose.ci.yml new file mode 100644 index 000000000..9ed541818 --- /dev/null +++ b/deploy/docker-compose/docker-compose.ci.yml @@ -0,0 +1,92 @@ +# Minimal Docker Compose for CI testing +# This file contains only essential services needed for integration testing. +# Excludes: grafana, prometheus, jaeger, openwebui, chat-ui, pipelines, mongo, dashboard +# +# Usage: +# make docker-compose-up-ci +# # or directly: +# docker compose -f deploy/docker-compose/docker-compose.ci.yml up -d + +services: + + # Semantic Router External Processor Service + semantic-router: + image: ghcr.io/vllm-project/semantic-router/extproc:latest + container_name: semantic-router + ports: + - "50051:50051" # gRPC for ExtProc + - "8080:8080" # HTTP API (health, classify, metrics) + volumes: + - ../../config:/app/config:ro,z + - ../../models:/app/models:ro,z + - ~/.cache/huggingface:/root/.cache/huggingface:z + environment: + - LD_LIBRARY_PATH=/app/lib + - CONFIG_FILE=${CONFIG_FILE:-/app/config/config.yaml} + - HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface + - HF_HUB_ENABLE_HF_TRANSFER=1 + networks: + - semantic-network + healthcheck: + test: ["CMD", "curl", "-f", "localhost:8080/health"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + + # Envoy Proxy Service + envoy: + image: envoyproxy/envoy:v1.31.7 + container_name: envoy-proxy + security_opt: + - label=disable + ports: + - "8801:8801" # Main proxy port + - "19000:19000" # Admin interface + volumes: + - ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro,z + command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:debug,router:debug"] + depends_on: + semantic-router: + condition: service_healthy + networks: + - semantic-network + healthcheck: + test: ["CMD", "bash", "-c", "(echo -e 'GET /ready HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n' >&3; timeout 2 cat <&3) 3<>/dev/tcp/localhost/19000 | grep -q LIVE"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + + # LLM Katan service - lightweight mock LLM for testing + llm-katan: + image: ghcr.io/vllm-project/semantic-router/llm-katan:latest + container_name: llm-katan + ports: + - "8002:8002" + environment: + - HF_HUB_ENABLE_HF_TRANSFER=1 + volumes: + - ../../models:/app/models:ro,z + - hf-cache:/home/llmkatan/.cache/huggingface + networks: + semantic-network: + ipv4_address: 172.28.0.20 + command: ["llm-katan", "--model", "/app/models/Qwen/Qwen3-0.6B", "--served-model-name", "qwen3", "--host", "0.0.0.0", "--port", "8002"] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:8002/health"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + +networks: + semantic-network: + driver: bridge + ipam: + config: + - subnet: 172.28.0.0/16 + +volumes: + hf-cache: + diff --git a/tools/make/docker.mk b/tools/make/docker.mk index 083fda460..bb193505b 100644 --- a/tools/make/docker.mk +++ b/tools/make/docker.mk @@ -179,6 +179,33 @@ docker-compose-down-llm-katan: @echo "Stopping services with $(COMPOSE_CMD) (llm-katan profile)..." @$(COMPOSE_CMD) --profile llm-katan down +##@ CI Docker Compose (minimal services for CI testing) + +# CI compose file path +CI_COMPOSE_FILE ?= deploy/docker-compose/docker-compose.ci.yml + +docker-compose-up-ci: ## Start minimal CI services (semantic-router, envoy, llm-katan) +docker-compose-up-ci: + @$(LOG_TARGET) + @echo "Starting CI services with $(COMPOSE_CMD) (minimal for CI)..." + @$(COMPOSE_CMD) -f $(CI_COMPOSE_FILE) up -d + +docker-compose-down-ci: ## Stop CI services +docker-compose-down-ci: + @$(LOG_TARGET) + @echo "Stopping CI services with $(COMPOSE_CMD)..." + @$(COMPOSE_CMD) -f $(CI_COMPOSE_FILE) down + +docker-compose-logs-ci: ## Show logs for CI services +docker-compose-logs-ci: + @$(LOG_TARGET) + @$(COMPOSE_CMD) -f $(CI_COMPOSE_FILE) logs + +docker-compose-ps-ci: ## Show status of CI services +docker-compose-ps-ci: + @$(LOG_TARGET) + @$(COMPOSE_CMD) -f $(CI_COMPOSE_FILE) ps + # Help target for Docker commands docker-help: docker-help: ## Show help for Docker-related make targets and environment variables