Skip to content

Commit c942a9a

Browse files
committed
fix python pre-commit error & add MiniLM-L12-v2 & docker-compose-down
Signed-off-by: JaredforReal <[email protected]>
1 parent ea580ac commit c942a9a

File tree

4 files changed

+131
-107
lines changed

4 files changed

+131
-107
lines changed

.pre-commit-config.yaml

Lines changed: 86 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,100 @@
11
# See https://pre-commit.com for more information
22
# See https://pre-commit.com/hooks.html for more hooks
33
repos:
4-
# Basic hooks for Go, Rust, Python And JavaScript files only
5-
- repo: https://github.com/pre-commit/pre-commit-hooks
6-
rev: v6.0.0
7-
hooks:
8-
- id: trailing-whitespace
9-
files: \.(go|rs|py|js)$
10-
- id: end-of-file-fixer
11-
files: \.(go|rs|py|js)$
12-
- id: check-added-large-files
13-
args: ['--maxkb=500']
14-
files: \.(go|rs|py|js)$
4+
# Basic hooks for Go, Rust, Python And JavaScript files only
5+
- repo: https://github.com/pre-commit/pre-commit-hooks
6+
rev: v4.2.0
7+
hooks:
8+
- id: trailing-whitespace
9+
files: \.(go|rs|py|js)$
10+
- id: end-of-file-fixer
11+
files: \.(go|rs|py|js)$
12+
- id: check-added-large-files
13+
args: ["--maxkb=500"]
14+
files: \.(go|rs|py|js)$
1515

16-
# Go specific hooks
17-
- repo: local
18-
hooks:
19-
- id: go-fmt
20-
name: go fmt
21-
entry: gofmt -w
22-
language: system
23-
files: \.go$
16+
# Go specific hooks
17+
- repo: local
18+
hooks:
19+
- id: go-fmt
20+
name: go fmt
21+
entry: gofmt -w
22+
language: system
23+
files: \.go$
2424

25-
- repo: local
26-
hooks:
27-
- id: golang-lint
28-
name: go lint
29-
entry: make go-lint
30-
language: system
31-
files: \.go$
32-
pass_filenames: false
25+
- repo: local
26+
hooks:
27+
- id: golang-lint
28+
name: go lint
29+
entry: make go-lint
30+
language: system
31+
files: \.go$
32+
pass_filenames: false
3333

34-
# Markdown specific hooks
35-
- repo: local
36-
hooks:
37-
- id: md-fmt
38-
name: md fmt
39-
entry: bash -c "make markdown-lint"
40-
language: system
41-
files: \.md$
42-
exclude: ^(\node_modules/|CLAUDE\.md)
34+
# Markdown specific hooks
35+
- repo: local
36+
hooks:
37+
- id: md-fmt
38+
name: md fmt
39+
entry: bash -c "make markdown-lint"
40+
language: system
41+
files: \.md$
42+
exclude: ^(\node_modules/|CLAUDE\.md)
4343

44-
# Yaml specific hooks
45-
- repo: local
46-
hooks:
47-
- id: yaml-and-yml-fmt
48-
name: yaml/yml fmt
49-
entry: bash -c "make markdown-lint"
50-
language: system
51-
files: \.(yaml|yml)$
52-
exclude: ^(\node_modules/)
44+
# Yaml specific hooks
45+
- repo: local
46+
hooks:
47+
- id: yaml-and-yml-fmt
48+
name: yaml/yml fmt
49+
entry: bash -c "make markdown-lint"
50+
language: system
51+
files: \.(yaml|yml)$
52+
exclude: ^(\node_modules/)
5353

54-
# JavaScript and TypeScript specific hooks
55-
- repo: local
56-
hooks:
57-
- id: js-ts-lint
58-
name: js/ts lint
59-
entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
60-
language: system
61-
files: \.(js|ts|tsx)$
62-
exclude: ^(\node_modules/)
63-
pass_filenames: false
54+
# JavaScript and TypeScript specific hooks
55+
- repo: local
56+
hooks:
57+
- id: js-ts-lint
58+
name: js/ts lint
59+
entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
60+
language: system
61+
files: \.(js|ts|tsx)$
62+
exclude: ^(\node_modules/)
63+
pass_filenames: false
6464

65-
# Rust specific hooks
66-
- repo: local
67-
hooks:
68-
- id: cargo-fmt
69-
name: cargo fmt
70-
entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
71-
language: system
72-
files: \.rs$
73-
pass_filenames: false
74-
- id: cargo-check
75-
name: cargo check
76-
entry: bash -c 'cd candle-binding && cargo check'
77-
language: system
78-
files: \.rs$
79-
pass_filenames: false
65+
# Rust specific hooks
66+
- repo: local
67+
hooks:
68+
- id: cargo-fmt
69+
name: cargo fmt
70+
entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
71+
language: system
72+
files: \.rs$
73+
pass_filenames: false
74+
- id: cargo-check
75+
name: cargo check
76+
entry: bash -c 'cd candle-binding && cargo check'
77+
language: system
78+
files: \.rs$
79+
pass_filenames: false
8080

81-
# Python specific hooks
82-
- repo: https://github.com/psf/black
83-
rev: 25.1.0
84-
hooks:
85-
- id: black
86-
language_version: python3
87-
files: \.py$
88-
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
89-
90-
- repo: https://github.com/PyCQA/isort
91-
rev: 6.0.1
92-
hooks:
93-
- id: isort
94-
args: ["--profile", "black"]
95-
files: \.py$
96-
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
81+
# Python specific hooks
82+
# isort must run before black
83+
- repo: https://github.com/PyCQA/isort
84+
rev: 5.13.2
85+
hooks:
86+
- id: isort
87+
args: ["--profile", "black", "--line-length", "88"]
88+
files: \.py$
89+
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
9790

91+
- repo: https://github.com/psf/black
92+
rev: 25.1.0
93+
hooks:
94+
- id: black
95+
language_version: python3
96+
files: \.py$
97+
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
9898
# Commented out flake8 - only reports issues, doesn't auto-fix
9999
# - repo: https://github.com/PyCQA/flake8
100100
# rev: 7.3.0

config/config.yaml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
bert_model:
2-
model_id: sentence-transformers/all-MiniLM-L12-v2
2+
model_id: models/all-MiniLM-L12-v2
33
threshold: 0.6
44
use_cpu: true
55

66
semantic_cache:
77
enabled: true
8-
backend_type: "memory" # Options: "memory" or "milvus"
8+
backend_type: "memory" # Options: "memory" or "milvus"
99
similarity_threshold: 0.8
10-
max_entries: 1000 # Only applies to memory backend
10+
max_entries: 1000 # Only applies to memory backend
1111
ttl_seconds: 3600
12-
eviction_policy: "fifo"
12+
eviction_policy: "fifo"
1313

1414
tools:
1515
enabled: true
@@ -32,13 +32,13 @@ prompt_guard:
3232
# NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field)
3333
vllm_endpoints:
3434
- name: "endpoint1"
35-
address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network
35+
address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network
3636
port: 8002
3737
weight: 1
3838

3939
model_config:
4040
"qwen3":
41-
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
41+
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
4242
preferred_endpoints: ["endpoint1"]
4343
pii_policy:
4444
allow_by_default: true
@@ -65,7 +65,7 @@ categories:
6565
model_scores:
6666
- model: qwen3
6767
score: 0.7
68-
use_reasoning: false # Business performs better without reasoning
68+
use_reasoning: false # Business performs better without reasoning
6969
- name: law
7070
system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
7171
model_scores:
@@ -89,7 +89,7 @@ categories:
8989
model_scores:
9090
- model: qwen3
9191
score: 0.6
92-
use_reasoning: true # Enable reasoning for complex chemistry
92+
use_reasoning: true # Enable reasoning for complex chemistry
9393
- name: history
9494
system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
9595
model_scores:
@@ -119,13 +119,13 @@ categories:
119119
model_scores:
120120
- model: qwen3
121121
score: 1.0
122-
use_reasoning: true # Enable reasoning for complex math
122+
use_reasoning: true # Enable reasoning for complex math
123123
- name: physics
124124
system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
125125
model_scores:
126126
- model: qwen3
127127
score: 0.7
128-
use_reasoning: true # Enable reasoning for physics
128+
use_reasoning: true # Enable reasoning for physics
129129
- name: computer science
130130
system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
131131
model_scores:
@@ -178,23 +178,23 @@ api:
178178
detailed_goroutine_tracking: true
179179
high_resolution_timing: false
180180
sample_rate: 1.0
181-
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
181+
duration_buckets:
182+
[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
182183
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
183184

184185
# Observability Configuration
185186
observability:
186187
tracing:
187-
enabled: false # Enable distributed tracing (default: false)
188-
provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
188+
enabled: false # Enable distributed tracing (default: false)
189+
provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
189190
exporter:
190-
type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout
191-
endpoint: "localhost:4317" # OTLP endpoint (when type: otlp)
192-
insecure: true # Use insecure connection (no TLS)
191+
type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout
192+
endpoint: "localhost:4317" # OTLP endpoint (when type: otlp)
193+
insecure: true # Use insecure connection (no TLS)
193194
sampling:
194-
type: "always_on" # Sampling: always_on, always_off, probabilistic
195-
rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
195+
type: "always_on" # Sampling: always_on, always_off, probabilistic
196+
rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
196197
resource:
197198
service_name: "vllm-semantic-router"
198199
service_version: "v0.1.0"
199200
deployment_environment: "development"
200-

tools/make/docker.mk

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,24 @@ docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan
130130

131131
docker-compose-down:
132132
@$(LOG_TARGET)
133-
@echo "Stopping docker-compose services..."
133+
@echo "Stopping docker-compose services (default includes llm-katan)..."
134+
@docker compose --profile llm-katan down
135+
136+
docker-compose-down-core:
137+
@$(LOG_TARGET)
138+
@echo "Stopping core services only (no llm-katan)..."
134139
@docker compose down
135140

141+
docker-compose-down-testing:
142+
@$(LOG_TARGET)
143+
@echo "Stopping services with testing profile..."
144+
@docker compose --profile testing down
145+
146+
docker-compose-down-llm-katan:
147+
@$(LOG_TARGET)
148+
@echo "Stopping services with llm-katan profile..."
149+
@docker compose --profile llm-katan down
150+
136151
# Help target for Docker commands
137152
docker-help:
138153
@echo "Docker Make Targets:"
@@ -152,7 +167,10 @@ docker-help:
152167
@echo " docker-compose-rebuild - Force rebuild then start"
153168
@echo " docker-compose-rebuild-testing - Force rebuild (testing profile)"
154169
@echo " docker-compose-rebuild-llm-katan - Force rebuild (llm-katan profile)"
155-
@echo " docker-compose-down - Stop docker-compose services"
170+
@echo " docker-compose-down - Stop services (default includes llm-katan)"
171+
@echo " docker-compose-down-core - Stop core services only (no llm-katan)"
172+
@echo " docker-compose-down-testing - Stop services with testing profile"
173+
@echo " docker-compose-down-llm-katan - Stop services with llm-katan profile"
156174
@echo ""
157175
@echo "Environment Variables:"
158176
@echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)"

tools/make/models.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ download-models-minimal:
2828
@if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \
2929
hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \
3030
fi
31+
@if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \
32+
hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \
33+
fi
3134
@if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \
3235
hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \
3336
fi
@@ -49,6 +52,9 @@ download-models-full:
4952
@if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \
5053
hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \
5154
fi
55+
@if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \
56+
hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \
57+
fi
5258
@if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \
5359
hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \
5460
fi

0 commit comments

Comments
 (0)