fix python pre-commit error & add MiniLM-L12-v2 & docker-compose-down

JaredforReal · JaredforReal · commit c942a9a45e63 · 2025-10-16T14:31:30.000+08:00
Signed-off-by: JaredforReal &lt;w13431838023@gmail.com&gt;
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,100 +1,100 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
-# Basic hooks for Go, Rust, Python And JavaScript files only
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v6.0.0
-  hooks:
-  - id: trailing-whitespace
-    files: \.(go|rs|py|js)$
-  - id: end-of-file-fixer
-    files: \.(go|rs|py|js)$
-  - id: check-added-large-files
-    args: ['--maxkb=500']
-    files: \.(go|rs|py|js)$
+  # Basic hooks for Go, Rust, Python And JavaScript files only
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+      - id: trailing-whitespace
+        files: \.(go|rs|py|js)$
+      - id: end-of-file-fixer
+        files: \.(go|rs|py|js)$
+      - id: check-added-large-files
+        args: ["--maxkb=500"]
+        files: \.(go|rs|py|js)$
 
-# Go specific hooks
-- repo: local
-  hooks:
-  - id: go-fmt
-    name: go fmt
-    entry: gofmt -w
-    language: system
-    files: \.go$
+  # Go specific hooks
+  - repo: local
+    hooks:
+      - id: go-fmt
+        name: go fmt
+        entry: gofmt -w
+        language: system
+        files: \.go$
 
-- repo: local
-  hooks:
-    - id: golang-lint
-      name: go lint
-      entry: make go-lint
-      language: system
-      files: \.go$
-      pass_filenames: false
+  - repo: local
+    hooks:
+      - id: golang-lint
+        name: go lint
+        entry: make go-lint
+        language: system
+        files: \.go$
+        pass_filenames: false
 
-# Markdown specific hooks
-- repo: local
-  hooks:
-  - id: md-fmt
-    name: md fmt
-    entry: bash -c "make markdown-lint"
-    language: system
-    files: \.md$
-    exclude: ^(\node_modules/|CLAUDE\.md)
+  # Markdown specific hooks
+  - repo: local
+    hooks:
+      - id: md-fmt
+        name: md fmt
+        entry: bash -c "make markdown-lint"
+        language: system
+        files: \.md$
+        exclude: ^(\node_modules/|CLAUDE\.md)
 
-# Yaml specific hooks
-- repo: local
-  hooks:
-  - id: yaml-and-yml-fmt
-    name: yaml/yml fmt
-    entry: bash -c "make markdown-lint"
-    language: system
-    files: \.(yaml|yml)$
-    exclude: ^(\node_modules/)
+  # Yaml specific hooks
+  - repo: local
+    hooks:
+      - id: yaml-and-yml-fmt
+        name: yaml/yml fmt
+        entry: bash -c "make markdown-lint"
+        language: system
+        files: \.(yaml|yml)$
+        exclude: ^(\node_modules/)
 
-# JavaScript and TypeScript specific hooks
-- repo: local
-  hooks:
-  - id: js-ts-lint
-    name: js/ts lint
-    entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
-    language: system
-    files: \.(js|ts|tsx)$
-    exclude: ^(\node_modules/)
-    pass_filenames: false
+  # JavaScript and TypeScript specific hooks
+  - repo: local
+    hooks:
+      - id: js-ts-lint
+        name: js/ts lint
+        entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
+        language: system
+        files: \.(js|ts|tsx)$
+        exclude: ^(\node_modules/)
+        pass_filenames: false
 
-# Rust specific hooks
-- repo: local
-  hooks:
-  - id: cargo-fmt
-    name: cargo fmt
-    entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
-    language: system
-    files: \.rs$
-    pass_filenames: false
-  - id: cargo-check
-    name: cargo check
-    entry: bash -c 'cd candle-binding && cargo check'
-    language: system
-    files: \.rs$
-    pass_filenames: false
+  # Rust specific hooks
+  - repo: local
+    hooks:
+      - id: cargo-fmt
+        name: cargo fmt
+        entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
+        language: system
+        files: \.rs$
+        pass_filenames: false
+      - id: cargo-check
+        name: cargo check
+        entry: bash -c 'cd candle-binding && cargo check'
+        language: system
+        files: \.rs$
+        pass_filenames: false
 
-# Python specific hooks
-- repo: https://github.com/psf/black
-  rev: 25.1.0
-  hooks:
-  - id: black
-    language_version: python3
-    files: \.py$
-    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
-
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
-    args: ["--profile", "black"]
-    files: \.py$
-    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
+  # Python specific hooks
+  # isort must run before black
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: ["--profile", "black", "--line-length", "88"]
+        files: \.py$
+        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 
+  - repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+      - id: black
+        language_version: python3
+        files: \.py$
+        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 # Commented out flake8 - only reports issues, doesn't auto-fix
 # -   repo: https://github.com/PyCQA/flake8
 #     rev: 7.3.0
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,15 +1,15 @@
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true
 
 semantic_cache:
   enabled: true
-  backend_type: "memory"  # Options: "memory" or "milvus"
+  backend_type: "memory" # Options: "memory" or "milvus"
   similarity_threshold: 0.8
-  max_entries: 1000  # Only applies to memory backend
+  max_entries: 1000 # Only applies to memory backend
   ttl_seconds: 3600
-  eviction_policy: "fifo"  
+  eviction_policy: "fifo"
 
 tools:
   enabled: true
@@ -32,13 +32,13 @@ prompt_guard:
 # NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field)
 vllm_endpoints:
   - name: "endpoint1"
-    address: "172.28.0.20"  # Static IPv4 of llm-katan within docker compose network
+    address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network
     port: 8002
     weight: 1
 
 model_config:
   "qwen3":
-    reasoning_family: "qwen3"  # This model uses Qwen-3 reasoning syntax
+    reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
     preferred_endpoints: ["endpoint1"]
     pii_policy:
       allow_by_default: true
@@ -65,7 +65,7 @@ categories:
     model_scores:
       - model: qwen3
         score: 0.7
-        use_reasoning: false  # Business performs better without reasoning
+        use_reasoning: false # Business performs better without reasoning
   - name: law
     system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
     model_scores:
@@ -89,7 +89,7 @@ categories:
     model_scores:
       - model: qwen3
         score: 0.6
-        use_reasoning: true  # Enable reasoning for complex chemistry
+        use_reasoning: true # Enable reasoning for complex chemistry
   - name: history
     system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
     model_scores:
@@ -119,13 +119,13 @@ categories:
     model_scores:
       - model: qwen3
         score: 1.0
-        use_reasoning: true  # Enable reasoning for complex math
+        use_reasoning: true # Enable reasoning for complex math
   - name: physics
     system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
     model_scores:
       - model: qwen3
         score: 0.7
-        use_reasoning: true  # Enable reasoning for physics
+        use_reasoning: true # Enable reasoning for physics
   - name: computer science
     system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
     model_scores:
@@ -178,23 +178,23 @@ api:
       detailed_goroutine_tracking: true
       high_resolution_timing: false
       sample_rate: 1.0
-      duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+      duration_buckets:
+        [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
       size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
 
 # Observability Configuration
 observability:
   tracing:
-    enabled: false  # Enable distributed tracing (default: false)
-    provider: "opentelemetry"  # Provider: opentelemetry, openinference, openllmetry
+    enabled: false # Enable distributed tracing (default: false)
+    provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
     exporter:
-      type: "stdout"  # Exporter: otlp, jaeger, zipkin, stdout
-      endpoint: "localhost:4317"  # OTLP endpoint (when type: otlp)
-      insecure: true  # Use insecure connection (no TLS)
+      type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout
+      endpoint: "localhost:4317" # OTLP endpoint (when type: otlp)
+      insecure: true # Use insecure connection (no TLS)
     sampling:
-      type: "always_on"  # Sampling: always_on, always_off, probabilistic
-      rate: 1.0  # Sampling rate for probabilistic (0.0-1.0)
+      type: "always_on" # Sampling: always_on, always_off, probabilistic
+      rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
     resource:
       service_name: "vllm-semantic-router"
       service_version: "v0.1.0"
       deployment_environment: "development"
-
diff --git a/tools/make/docker.mk b/tools/make/docker.mk
@@ -130,9 +130,24 @@ docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan
 
 docker-compose-down:
 	@$(LOG_TARGET)
-	@echo "Stopping docker-compose services..."
+	@echo "Stopping docker-compose services (default includes llm-katan)..."
+	@docker compose --profile llm-katan down
+
+docker-compose-down-core:
+	@$(LOG_TARGET)
+	@echo "Stopping core services only (no llm-katan)..."
 	@docker compose down
 
+docker-compose-down-testing:
+	@$(LOG_TARGET)
+	@echo "Stopping services with testing profile..."
+	@docker compose --profile testing down
+
+docker-compose-down-llm-katan:
+	@$(LOG_TARGET)
+	@echo "Stopping services with llm-katan profile..."
+	@docker compose --profile llm-katan down
+
 # Help target for Docker commands
 docker-help:
 	@echo "Docker Make Targets:"
@@ -152,7 +167,10 @@ docker-help:
 	@echo "  docker-compose-rebuild               - Force rebuild then start"
 	@echo "  docker-compose-rebuild-testing       - Force rebuild (testing profile)"
 	@echo "  docker-compose-rebuild-llm-katan     - Force rebuild (llm-katan profile)"
-	@echo "  docker-compose-down                  - Stop docker-compose services"
+	@echo "  docker-compose-down                  - Stop services (default includes llm-katan)"
+	@echo "  docker-compose-down-core             - Stop core services only (no llm-katan)"
+	@echo "  docker-compose-down-testing          - Stop services with testing profile"
+	@echo "  docker-compose-down-llm-katan        - Stop services with llm-katan profile"
 	@echo ""
 	@echo "Environment Variables:"
 	@echo "  DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)"
diff --git a/tools/make/models.mk b/tools/make/models.mk
@@ -28,6 +28,9 @@ download-models-minimal:
 	@if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \
 		hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \
 	fi
+	@if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \
+		hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \
+	fi
 	@if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \
 		hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \
 	fi
@@ -49,6 +52,9 @@ download-models-full:
 	@if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \
 		hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \
 	fi
+	@if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \
+		hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \
+	fi
 	@if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \
 		hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \
 	fi