diff --git a/.github/workflows/quickstart-integration-test.yml b/.github/workflows/quickstart-integration-test.yml new file mode 100644 index 00000000..4b0288f1 --- /dev/null +++ b/.github/workflows/quickstart-integration-test.yml @@ -0,0 +1,110 @@ +name: Quickstart Integration Test + +on: + pull_request: + branches: + - main + paths: + - 'scripts/quickstart.sh' + - 'deploy/docker-compose/**' + - 'config/config.yaml' + - 'tools/make/common.mk' + - 'tools/make/models.mk' + - 'tools/make/docker.mk' + workflow_dispatch: # Allow manual triggering + +jobs: + test-quickstart: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Free up disk space + run: | + echo "Disk space before cleanup:" + df -h + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + echo "Disk space after cleanup:" + df -h + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + make \ + curl \ + docker-compose + + - name: Run quickstart script + id: quickstart + run: | + timeout 1200 bash scripts/quickstart.sh || { + exit_code=$? + if [ $exit_code -eq 124 ]; then + echo "::error::Quickstart script timed out after 20 minutes" + else + echo "::error::Quickstart script failed with exit code $exit_code" + fi + exit $exit_code + } + env: + CI: true + CI_MINIMAL_MODELS: true + TERM: xterm + HF_HUB_ENABLE_HF_TRANSFER: 1 + HF_HUB_DISABLE_TELEMETRY: 1 + + - name: Test semantic routing functionality + run: | + echo "Testing semantic router with a sample query..." + + response=$(curl -s -X POST http://localhost:8801/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen3", + "messages": [{"role": "user", "content": "What is 2 + 2?"}], + "temperature": 0.7 + }') + + echo "Full response: $response" + + # Validate response structure + if echo "$response" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then + echo "✓ Semantic router successfully routed and processed the query" + echo " Answer: $(echo "$response" | jq -r '.choices[0].message.content' | head -c 200)" + else + echo "::error::Semantic router failed to process query correctly" + echo "Response was: $response" + exit 1 + fi + + - name: Show service logs on failure + if: failure() + run: | + echo "=== Docker Compose Logs ===" + docker compose -f deploy/docker-compose/docker-compose.yml logs + echo "=== Container Status ===" + docker ps -a + echo "=== Semantic Router Logs ===" + docker logs semantic-router || true + echo "=== Envoy Logs ===" + docker logs envoy-proxy || true + echo "=== Dashboard Logs ===" + docker logs semantic-router-dashboard || true + + - name: Clean up + if: always() + run: | + make docker-compose-down || true + docker system prune -af --volumes || true diff --git a/config/config.yaml b/config/config.yaml index 893702f8..a7f02c00 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -24,15 +24,6 @@ semantic_cache: # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context) # Default: "bert" (fastest, lowest memory) embedding_model: "bert" - # HNSW index configuration (for memory backend only) - use_hnsw: true # Enable HNSW index for faster similarity search - hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory) - hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build) - - # Hybrid cache configuration (when backend_type: "hybrid") - # Combines in-memory HNSW for fast search with Milvus for scalable storage - # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000) - # backend_config_path: "config/milvus.yaml" # Path to Milvus config tools: enabled: true @@ -223,7 +214,7 @@ router: traditional_attention_dropout_prob: 0.1 # Traditional model attention dropout probability tie_break_confidence: 0.5 # Confidence value for tie-breaking situations -default_model: openai/gpt-oss-20b +default_model: qwen3 # Reasoning family configurations reasoning_families: diff --git a/deploy/docker-compose/docker-compose.yml b/deploy/docker-compose/docker-compose.yml index c47709f6..91b0fcdc 100644 --- a/deploy/docker-compose/docker-compose.yml +++ b/deploy/docker-compose/docker-compose.yml @@ -7,9 +7,9 @@ services: ports: - "50051:50051" volumes: - - ../../config:/app/config:ro - - ../../models:/app/models:ro - - ~/.cache/huggingface:/root/.cache/huggingface + - ../../config:/app/config:ro,z + - ../../models:/app/models:ro,z + - ~/.cache/huggingface:/root/.cache/huggingface:z environment: - LD_LIBRARY_PATH=/app/lib # Use main config by default; override via CONFIG_FILE if needed @@ -32,11 +32,13 @@ services: envoy: image: envoyproxy/envoy:v1.31.7 container_name: envoy-proxy + security_opt: + - label=disable ports: - "8801:8801" # Main proxy port - "19000:19000" # Admin interface volumes: - - ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro + - ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro,z command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"] depends_on: semantic-router: @@ -44,7 +46,7 @@ services: networks: - semantic-network healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:19000/ready"] + test: ["CMD", "bash", "-c", "(echo -e 'GET /ready HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n' >&3; timeout 2 cat <&3) 3<>/dev/tcp/localhost/19000 | grep -q LIVE"] interval: 10s timeout: 5s retries: 5 @@ -86,7 +88,7 @@ services: image: prom/prometheus:v2.53.0 container_name: prometheus volumes: - - ./addons/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro + - ./addons/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro,z - prometheus-data:/prometheus command: - --config.file=/etc/prometheus/prometheus.yaml @@ -106,11 +108,11 @@ services: ports: - "3000:3000" volumes: - - ./addons/grafana.ini:/etc/grafana/grafana.ini:ro - - ./addons/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro - - ./addons/grafana-datasource-jaeger.yaml:/etc/grafana/provisioning/datasources/datasource_jaeger.yaml:ro - - ./addons/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro - - ./addons/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro + - ./addons/grafana.ini:/etc/grafana/grafana.ini:ro,z + - ./addons/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro,z + - ./addons/grafana-datasource-jaeger.yaml:/etc/grafana/provisioning/datasources/datasource_jaeger.yaml:ro,z + - ./addons/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro,z + - ./addons/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro,z - grafana-data:/var/lib/grafana networks: - semantic-network @@ -175,9 +177,16 @@ services: - PYTHONUNBUFFERED=1 volumes: # Persistent pipelines storage (auto-loaded on start) - - openwebui-pipelines:/app/pipelines - # Mount our vLLM Semantic Router pipeline - - ./addons/vllm_semantic_router_pipe.py:/app/pipelines/vllm_semantic_router_pipe.py:ro + - type: volume + source: openwebui-pipelines + target: /app/pipelines + volume: + nocopy: true + # Mount our vLLM Semantic Router pipeline (read-only) into the persistent dir + - type: bind + source: ./addons/vllm_semantic_router_pipe.py + target: /app/pipelines/vllm_semantic_router_pipe.py + read_only: true networks: - semantic-network @@ -202,7 +211,7 @@ services: - HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-} - HF_HUB_ENABLE_HF_TRANSFER=1 volumes: - - ../../models:/app/models:ro + - ../../models:/app/models:ro,z - hf-cache:/home/llmkatan/.cache/huggingface networks: semantic-network: @@ -235,7 +244,7 @@ services: - TARGET_CHATUI_URL=http://chat-ui:3000 - ROUTER_CONFIG_PATH=/app/config/config.yaml volumes: - - ../../config:/app/config:rw + - ../../config:/app/config:rw,z ports: - "8700:8700" networks: diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh index 435df220..be6a05ce 100755 --- a/scripts/quickstart.sh +++ b/scripts/quickstart.sh @@ -21,6 +21,23 @@ print_color() { echo -e "${color}${text}${NC}" } +# Helper functions for common message types +success_msg() { + print_color "$GREEN" "$1" +} + +error_msg() { + print_color "$RED" "$1" +} + +info_msg() { + print_color "$YELLOW" "$1" +} + +section_header() { + print_color "$CYAN" "$1" +} + # Function to print with typewriter effect typewriter() { local text=$1 @@ -34,7 +51,8 @@ typewriter() { # Function to show ASCII art with animation show_ascii_art() { - clear + # Skip clear in CI environments (no proper terminal) + [ -z "${CI:-}" ] && clear || true echo echo print_color "$CYAN" " ██╗ ██╗██╗ ██╗ ███╗ ███╗" @@ -86,7 +104,7 @@ show_progress() { # Function to check prerequisites check_prerequisites() { - print_color "$YELLOW" "🔍 Checking prerequisites..." + info_msg "🔍 Checking prerequisites..." echo local missing_deps=() @@ -112,48 +130,41 @@ check_prerequisites() { fi if [ ${#missing_deps[@]} -ne 0 ]; then - print_color "$RED" "❌ Missing dependencies: ${missing_deps[*]}" - print_color "$YELLOW" "Please install the missing dependencies and try again." + error_msg "❌ Missing dependencies: ${missing_deps[*]}" + info_msg "Please install the missing dependencies and try again." exit 1 fi - print_color "$GREEN" "✅ All prerequisites satisfied!" + success_msg "✅ All prerequisites satisfied!" echo } # Function to install HuggingFace CLI if needed install_hf_cli() { if ! command -v hf &> /dev/null; then - print_color "$YELLOW" "📦 Installing HuggingFace CLI..." + info_msg "📦 Installing HuggingFace CLI..." pip install huggingface_hub[cli] || pip3 install huggingface_hub[cli] - print_color "$GREEN" "✅ HuggingFace CLI installed!" + success_msg "✅ HuggingFace CLI installed!" else - print_color "$GREEN" "✅ HuggingFace CLI already installed!" + success_msg "✅ HuggingFace CLI already installed!" fi echo } # Function to download models with progress download_models() { - print_color "$YELLOW" "📥 Downloading AI models..." + info_msg "📥 Downloading AI models..." echo # Use minimal model set for faster setup export CI_MINIMAL_MODELS=false - # Start the download process with filtered output - make download-models 2>&1 | grep -E "(downloading|downloaded|Downloaded|✓|✅|❌|Error|error|Failed|failed|CI_MINIMAL_MODELS|Running download-models)" | while IFS= read -r line; do - # Filter out verbose HuggingFace download progress - if [[ ! "$line" =~ (Fetching|\.safetensors|\.json|\.txt|\.bin|B/s|%|/s) ]]; then - # Suppress output - no information displayed - : - fi - done - - if make download-models > /dev/null 2>&1; then - print_color "$GREEN" "✅ Models downloaded successfully!" + # Download models and save output to log (visible in real-time) + if make download-models 2>&1 | tee /tmp/download-models-output.log; then + success_msg "✅ Models downloaded successfully!" else - print_color "$RED" "❌ Failed to download models!" + error_msg "❌ Failed to download models!" + info_msg "📋 Check logs: cat /tmp/download-models-output.log" exit 1 fi echo @@ -161,21 +172,31 @@ download_models() { # Function to start services start_services() { - print_color "$YELLOW" "🐳 Starting Docker services..." + info_msg "🐳 Starting Docker services..." echo - # Start docker-compose services with filtered output - make docker-compose-up 2>&1 | grep -E "(Running docker-compose-up|Starting services|Container.*Running|Container.*Healthy|Container.*Started|✓|✅|❌|Error|error|Failed|failed)" | while IFS= read -r line; do - # Show only key status updates - if [[ "$line" =~ (Container.*Running|Container.*Healthy|Starting services|Running docker-compose-up) ]]; then - echo " $line" - fi - done - - if make docker-compose-up > /dev/null 2>&1; then - print_color "$GREEN" "✅ Services started successfully!" + # Start docker-compose services (runs in detached mode via Makefile) + # Timeout: 600 seconds (10 minutes) to allow for: + # - Image pulls (semantic-router, envoy, jaeger, prometheus, grafana, openwebui, pipelines, llm-katan) + # - Dashboard build from Dockerfile (Go compilation can take 5-10 minutes) + # - Network/system variations + # Save output to log file for debugging + if timeout 600 make docker-compose-up 2>&1 | tee /tmp/docker-compose-output.log; then + success_msg "✅ Docker compose command completed!" + echo " Output saved to: /tmp/docker-compose-output.log" else - print_color "$RED" "❌ Failed to start services!" + local exit_code=$? + if [ $exit_code -eq 124 ]; then + error_msg "❌ Docker compose command timed out after 10 minutes!" + info_msg "📋 This might indicate:" + info_msg " - Very slow network (image pulls)" + info_msg " - System resource constraints" + info_msg " - Dashboard build taking too long" + info_msg "📋 Check logs: cat /tmp/docker-compose-output.log" + else + error_msg "❌ Failed to start services!" + info_msg "📋 Check logs: cat /tmp/docker-compose-output.log" + fi exit 1 fi echo @@ -183,28 +204,61 @@ start_services() { # Function to wait for services to be healthy wait_for_services() { - # Silently wait for services to become healthy - local max_attempts=30 + section_header "🔍 Checking service health..." + local max_attempts=60 local attempt=1 + # List of critical services that must be healthy + local critical_services=("semantic-router" "envoy-proxy") + while [ $attempt -le $max_attempts ]; do - # Check if semantic-router container is healthy - if docker ps --filter "name=semantic-router" --filter "health=healthy" --format "{{.Names}}" | grep -q "semantic-router" 2>/dev/null; then - print_color "$GREEN" "✅ All services are healthy and ready!" + local all_healthy=true + local unhealthy_services="" + + # Check each critical service + for service in "${critical_services[@]}"; do + if ! docker ps --filter "name=$service" --filter "health=healthy" --format "{{.Names}}" | grep -q "$service" 2>/dev/null; then + all_healthy=false + unhealthy_services="$unhealthy_services $service" + fi + done + + # Check for any exited/failed containers + local failed_containers=$(docker ps -a --filter "status=exited" --format "{{.Names}}" 2>/dev/null) + if [ -n "$failed_containers" ]; then + error_msg "❌ Some containers failed to start: $failed_containers" + info_msg "📋 Check logs with: docker compose logs $failed_containers" + return 1 + fi + + if [ "$all_healthy" = true ]; then + success_msg "✅ All critical services are healthy and ready!" + echo + # Show status of all containers + section_header "📊 Container Status:" + docker ps --format "table {{.Names}}\t{{.Status}}" | grep -E "NAMES|semantic-router|envoy|dashboard|prometheus|grafana|jaeger|openwebui|pipelines|llm-katan" + echo return 0 fi + # Show progress every 5 seconds + if [ $((attempt % 5)) -eq 0 ]; then + info_msg "⏳ Still waiting for:$unhealthy_services (attempt $attempt/$max_attempts)" + fi + sleep 2 ((attempt++)) done - print_color "$YELLOW" "⚠️ Services are starting but may not be fully healthy yet." - print_color "$WHITE" "You can check the status with: docker compose ps" + info_msg "⚠️ Timeout: Services are starting but not all are healthy yet." + print_color "$WHITE" "📋 Check status with: docker ps" + print_color "$WHITE" "📋 View logs with: docker compose logs -f" + return 1 } # Function to show service information show_service_info() { - print_color "$CYAN" "🌐 Service Information:" + section_header "🌐 Service Information:" echo print_color "$WHITE" "┌─────────────────────────────────────────────────────────────┐" print_color "$WHITE" "│ 🎯 Endpoints │" @@ -216,7 +270,7 @@ show_service_info() { print_color "$GREEN" "│ 🌐 Open WebUI: http://localhost:3001 │" print_color "$WHITE" "└─────────────────────────────────────────────────────────────┘" echo - print_color "$CYAN" "🔧 Useful Commands:" + section_header "🔧 Useful Commands:" echo print_color "$WHITE" " • Check service status: docker compose ps" print_color "$WHITE" " • View logs: docker compose logs -f" @@ -242,16 +296,18 @@ show_completion() { print_color "$CYAN" "╚══════════════════════════════════════════════════════════════════════════════╝" echo - # Ask if user wants to open browser - read -p "$(print_color "$YELLOW" "Would you like to open the dashboard in your browser? (y/N): ")" -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - if command -v open &> /dev/null; then - open http://localhost:8700 - elif command -v xdg-open &> /dev/null; then - xdg-open http://localhost:8700 - else - print_color "$YELLOW" "Please open http://localhost:8700 in your browser manually." + # Ask if user wants to open browser (skip in CI environments) + if [ -z "${CI:-}" ]; then + read -p "$(print_color "$YELLOW" "Would you like to open the dashboard in your browser? (y/N): ")" -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + if command -v open &> /dev/null; then + open http://localhost:8700 + elif command -v xdg-open &> /dev/null; then + xdg-open http://localhost:8700 + else + info_msg "Please open http://localhost:8700 in your browser manually." + fi fi fi } @@ -274,7 +330,12 @@ main() { start_services # Wait for services to be healthy - wait_for_services + if ! wait_for_services; then + error_msg "❌ Service health check failed or timed out!" + info_msg "📋 You can check logs with: docker compose logs" + info_msg "📋 Or continue manually if services are starting" + exit 1 + fi # Show service information show_service_info diff --git a/tools/make/linter.mk b/tools/make/linter.mk index a252d4ac..ee92ce90 100644 --- a/tools/make/linter.mk +++ b/tools/make/linter.mk @@ -40,4 +40,4 @@ codespell: ## Check for common misspellings in code and docs shellcheck: ## Lint all shell scripts in the project @$(LOG_TARGET) - shellcheck --rcfile=tools/linter/shellcheck/.shellcheckrc $(shell find . -type f -name "*.sh" -not -path "./node_modules/*" -not -path "./website/node_modules/*" -not -path "./dashboard/frontend/node_modules/*" -not -path "./models/*" -not -path "./.venv/*") + shellcheck --rcfile=tools/linter/shellcheck/.shellcheckrc $(shell find . -type f -name "*.sh" -not -path "./node_modules/*" -not -path "./website/node_modules/*" -not -path "./dashboard/frontend/node_modules/*" -not -path "./models/*" -not -path "./.venv/*") \ No newline at end of file