Skip to content

Commit 7fddb43

Browse files
yehudit1987Yehudit Kerido
andauthored
feat: fix quickstart.sh script (#548)
Signed-off-by: Yehudit Kerido <[email protected]> Co-authored-by: Yehudit Kerido <[email protected]>
1 parent 75523aa commit 7fddb43

File tree

5 files changed

+251
-80
lines changed

5 files changed

+251
-80
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
name: Quickstart Integration Test
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
paths:
8+
- 'scripts/quickstart.sh'
9+
- 'deploy/docker-compose/**'
10+
- 'config/config.yaml'
11+
- 'tools/make/common.mk'
12+
- 'tools/make/models.mk'
13+
- 'tools/make/docker.mk'
14+
workflow_dispatch: # Allow manual triggering
15+
16+
jobs:
17+
test-quickstart:
18+
runs-on: ubuntu-latest
19+
timeout-minutes: 30
20+
21+
steps:
22+
- name: Check out the repo
23+
uses: actions/checkout@v4
24+
25+
- name: Free up disk space
26+
run: |
27+
echo "Disk space before cleanup:"
28+
df -h
29+
sudo rm -rf /usr/share/dotnet
30+
sudo rm -rf /opt/ghc
31+
sudo rm -rf /usr/local/share/boost
32+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
33+
echo "Disk space after cleanup:"
34+
df -h
35+
36+
- name: Set up Python
37+
uses: actions/setup-python@v5
38+
with:
39+
python-version: '3.11'
40+
41+
- name: Install system dependencies
42+
run: |
43+
sudo apt-get update
44+
sudo apt-get install -y \
45+
make \
46+
curl \
47+
docker-compose
48+
49+
- name: Run quickstart script
50+
id: quickstart
51+
run: |
52+
timeout 1200 bash scripts/quickstart.sh || {
53+
exit_code=$?
54+
if [ $exit_code -eq 124 ]; then
55+
echo "::error::Quickstart script timed out after 20 minutes"
56+
else
57+
echo "::error::Quickstart script failed with exit code $exit_code"
58+
fi
59+
exit $exit_code
60+
}
61+
env:
62+
CI: true
63+
CI_MINIMAL_MODELS: true
64+
TERM: xterm
65+
HF_HUB_ENABLE_HF_TRANSFER: 1
66+
HF_HUB_DISABLE_TELEMETRY: 1
67+
68+
- name: Test semantic routing functionality
69+
run: |
70+
echo "Testing semantic router with a sample query..."
71+
72+
response=$(curl -s -X POST http://localhost:8801/v1/chat/completions \
73+
-H "Content-Type: application/json" \
74+
-d '{
75+
"model": "qwen3",
76+
"messages": [{"role": "user", "content": "What is 2 + 2?"}],
77+
"temperature": 0.7
78+
}')
79+
80+
echo "Full response: $response"
81+
82+
# Validate response structure
83+
if echo "$response" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
84+
echo "✓ Semantic router successfully routed and processed the query"
85+
echo " Answer: $(echo "$response" | jq -r '.choices[0].message.content' | head -c 200)"
86+
else
87+
echo "::error::Semantic router failed to process query correctly"
88+
echo "Response was: $response"
89+
exit 1
90+
fi
91+
92+
- name: Show service logs on failure
93+
if: failure()
94+
run: |
95+
echo "=== Docker Compose Logs ==="
96+
docker compose -f deploy/docker-compose/docker-compose.yml logs
97+
echo "=== Container Status ==="
98+
docker ps -a
99+
echo "=== Semantic Router Logs ==="
100+
docker logs semantic-router || true
101+
echo "=== Envoy Logs ==="
102+
docker logs envoy-proxy || true
103+
echo "=== Dashboard Logs ==="
104+
docker logs semantic-router-dashboard || true
105+
106+
- name: Clean up
107+
if: always()
108+
run: |
109+
make docker-compose-down || true
110+
docker system prune -af --volumes || true

config/config.yaml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,6 @@ semantic_cache:
2424
# Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
2525
# Default: "bert" (fastest, lowest memory)
2626
embedding_model: "bert"
27-
# HNSW index configuration (for memory backend only)
28-
use_hnsw: true # Enable HNSW index for faster similarity search
29-
hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory)
30-
hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build)
31-
32-
# Hybrid cache configuration (when backend_type: "hybrid")
33-
# Combines in-memory HNSW for fast search with Milvus for scalable storage
34-
# max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
35-
# backend_config_path: "config/milvus.yaml" # Path to Milvus config
3627

3728
tools:
3829
enabled: true
@@ -223,7 +214,7 @@ router:
223214
traditional_attention_dropout_prob: 0.1 # Traditional model attention dropout probability
224215
tie_break_confidence: 0.5 # Confidence value for tie-breaking situations
225216

226-
default_model: openai/gpt-oss-20b
217+
default_model: qwen3
227218

228219
# Reasoning family configurations
229220
reasoning_families:

deploy/docker-compose/docker-compose.yml

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ services:
77
ports:
88
- "50051:50051"
99
volumes:
10-
- ../../config:/app/config:ro
11-
- ../../models:/app/models:ro
12-
- ~/.cache/huggingface:/root/.cache/huggingface
10+
- ../../config:/app/config:ro,z
11+
- ../../models:/app/models:ro,z
12+
- ~/.cache/huggingface:/root/.cache/huggingface:z
1313
environment:
1414
- LD_LIBRARY_PATH=/app/lib
1515
# Use main config by default; override via CONFIG_FILE if needed
@@ -32,19 +32,21 @@ services:
3232
envoy:
3333
image: envoyproxy/envoy:v1.31.7
3434
container_name: envoy-proxy
35+
security_opt:
36+
- label=disable
3537
ports:
3638
- "8801:8801" # Main proxy port
3739
- "19000:19000" # Admin interface
3840
volumes:
39-
- ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro
41+
- ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro,z
4042
command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"]
4143
depends_on:
4244
semantic-router:
4345
condition: service_healthy
4446
networks:
4547
- semantic-network
4648
healthcheck:
47-
test: ["CMD", "curl", "-f", "http://localhost:19000/ready"]
49+
test: ["CMD", "bash", "-c", "(echo -e 'GET /ready HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n' >&3; timeout 2 cat <&3) 3<>/dev/tcp/localhost/19000 | grep -q LIVE"]
4850
interval: 10s
4951
timeout: 5s
5052
retries: 5
@@ -86,7 +88,7 @@ services:
8688
image: prom/prometheus:v2.53.0
8789
container_name: prometheus
8890
volumes:
89-
- ./addons/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
91+
- ./addons/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro,z
9092
- prometheus-data:/prometheus
9193
command:
9294
- --config.file=/etc/prometheus/prometheus.yaml
@@ -106,11 +108,11 @@ services:
106108
ports:
107109
- "3000:3000"
108110
volumes:
109-
- ./addons/grafana.ini:/etc/grafana/grafana.ini:ro
110-
- ./addons/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
111-
- ./addons/grafana-datasource-jaeger.yaml:/etc/grafana/provisioning/datasources/datasource_jaeger.yaml:ro
112-
- ./addons/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro
113-
- ./addons/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
111+
- ./addons/grafana.ini:/etc/grafana/grafana.ini:ro,z
112+
- ./addons/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro,z
113+
- ./addons/grafana-datasource-jaeger.yaml:/etc/grafana/provisioning/datasources/datasource_jaeger.yaml:ro,z
114+
- ./addons/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro,z
115+
- ./addons/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro,z
114116
- grafana-data:/var/lib/grafana
115117
networks:
116118
- semantic-network
@@ -175,9 +177,16 @@ services:
175177
- PYTHONUNBUFFERED=1
176178
volumes:
177179
# Persistent pipelines storage (auto-loaded on start)
178-
- openwebui-pipelines:/app/pipelines
179-
# Mount our vLLM Semantic Router pipeline
180-
- ./addons/vllm_semantic_router_pipe.py:/app/pipelines/vllm_semantic_router_pipe.py:ro
180+
- type: volume
181+
source: openwebui-pipelines
182+
target: /app/pipelines
183+
volume:
184+
nocopy: true
185+
# Mount our vLLM Semantic Router pipeline (read-only) into the persistent dir
186+
- type: bind
187+
source: ./addons/vllm_semantic_router_pipe.py
188+
target: /app/pipelines/vllm_semantic_router_pipe.py
189+
read_only: true
181190
networks:
182191
- semantic-network
183192

@@ -202,7 +211,7 @@ services:
202211
- HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-}
203212
- HF_HUB_ENABLE_HF_TRANSFER=1
204213
volumes:
205-
- ../../models:/app/models:ro
214+
- ../../models:/app/models:ro,z
206215
- hf-cache:/home/llmkatan/.cache/huggingface
207216
networks:
208217
semantic-network:
@@ -235,7 +244,7 @@ services:
235244
- TARGET_CHATUI_URL=http://chat-ui:3000
236245
- ROUTER_CONFIG_PATH=/app/config/config.yaml
237246
volumes:
238-
- ../../config:/app/config:rw
247+
- ../../config:/app/config:rw,z
239248
ports:
240249
- "8700:8700"
241250
networks:

0 commit comments

Comments
 (0)