diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index cae4785c..0d23aaba 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -5,12 +5,12 @@ on: workflow_call: inputs: tag_suffix: - description: 'Custom tag suffix for the Docker image' + description: "Custom tag suffix for the Docker image" required: false type: string - default: '' + default: "" is_nightly: - description: 'Whether this is a nightly build' + description: "Whether this is a nightly build" required: false type: boolean default: false @@ -20,7 +20,7 @@ on: type: boolean default: true push: - branches: [ "main" ] + branches: ["main"] pull_request: paths: - ".github/workflows/docker-publish.yml" @@ -42,16 +42,32 @@ jobs: # Multi-architecture build strategy: # - AMD64: Native build on ubuntu-latest (fast) # - ARM64: Cross-compilation on ubuntu-latest (faster than emulation) -# arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }} + # arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }} arch: ["amd64", "arm64"] fail-fast: false steps: + - name: Free up disk space + run: | + echo "Before cleanup:" + df -h + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + echo "After cleanup:" + df -h + - name: Check out the repo uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + with: + driver-opts: | + image=moby/buildkit:latest + network=host - name: Set up QEMU for cross-compilation if: matrix.arch == 'arm64' @@ -172,6 +188,14 @@ jobs: fi fi + - name: Additional cleanup for llm-katan (large Python packages) + if: matrix.image == 'llm-katan' + run: | + echo "Freeing up more space for llm-katan build..." + sudo apt-get clean + sudo rm -rf /var/lib/apt/lists/* + df -h + - name: Build and push ${{ matrix.image }} Docker image id: build uses: docker/build-push-action@v5 @@ -182,10 +206,8 @@ jobs: push: ${{ github.event_name != 'pull_request' }} load: ${{ github.event_name == 'pull_request' }} tags: ${{ steps.tags.outputs.tags }} - cache-from: | - type=gha - type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache,mode=max + cache-from: type=gha + cache-to: type=gha,mode=max build-args: | BUILDKIT_INLINE_CACHE=1 CARGO_BUILD_JOBS=${{ github.event_name == 'pull_request' && '8' || '16' }} diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 864c3159..d77545f5 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -64,6 +64,7 @@ jobs: key: ${{ runner.os }}-models-v1-${{ hashFiles('tools/make/models.mk') }} restore-keys: | ${{ runner.os }}-models-v1- + continue-on-error: true # Don't fail the job if caching fails - name: Check go mod tidy run: make check-go-mod-tidy diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8367a124..41c055b4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,100 +1,100 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -# Basic hooks for Go, Rust, Python And JavaScript files only -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 - hooks: - - id: trailing-whitespace - files: \.(go|rs|py|js)$ - - id: end-of-file-fixer - files: \.(go|rs|py|js)$ - - id: check-added-large-files - args: ['--maxkb=500'] - files: \.(go|rs|py|js)$ + # Basic hooks for Go, Rust, Python And JavaScript files only + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: trailing-whitespace + files: \.(go|rs|py|js)$ + - id: end-of-file-fixer + files: \.(go|rs|py|js)$ + - id: check-added-large-files + args: ["--maxkb=500"] + files: \.(go|rs|py|js)$ -# Go specific hooks -- repo: local - hooks: - - id: go-fmt - name: go fmt - entry: gofmt -w - language: system - files: \.go$ + # Go specific hooks + - repo: local + hooks: + - id: go-fmt + name: go fmt + entry: gofmt -w + language: system + files: \.go$ -- repo: local - hooks: - - id: golang-lint - name: go lint - entry: make go-lint - language: system - files: \.go$ - pass_filenames: false + - repo: local + hooks: + - id: golang-lint + name: go lint + entry: make go-lint + language: system + files: \.go$ + pass_filenames: false -# Markdown specific hooks -- repo: local - hooks: - - id: md-fmt - name: md fmt - entry: bash -c "make markdown-lint" - language: system - files: \.md$ - exclude: ^(\node_modules/|CLAUDE\.md) + # Markdown specific hooks + - repo: local + hooks: + - id: md-fmt + name: md fmt + entry: bash -c "make markdown-lint" + language: system + files: \.md$ + exclude: ^(\node_modules/|CLAUDE\.md) -# Yaml specific hooks -- repo: local - hooks: - - id: yaml-and-yml-fmt - name: yaml/yml fmt - entry: bash -c "make markdown-lint" - language: system - files: \.(yaml|yml)$ - exclude: ^(\node_modules/) + # Yaml specific hooks + - repo: local + hooks: + - id: yaml-and-yml-fmt + name: yaml/yml fmt + entry: bash -c "make markdown-lint" + language: system + files: \.(yaml|yml)$ + exclude: ^(\node_modules/) -# JavaScript and TypeScript specific hooks -- repo: local - hooks: - - id: js-ts-lint - name: js/ts lint - entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint' - language: system - files: \.(js|ts|tsx)$ - exclude: ^(\node_modules/) - pass_filenames: false + # JavaScript and TypeScript specific hooks + - repo: local + hooks: + - id: js-ts-lint + name: js/ts lint + entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint' + language: system + files: \.(js|ts|tsx)$ + exclude: ^(\node_modules/) + pass_filenames: false -# Rust specific hooks -- repo: local - hooks: - - id: cargo-fmt - name: cargo fmt - entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt' - language: system - files: \.rs$ - pass_filenames: false - - id: cargo-check - name: cargo check - entry: bash -c 'cd candle-binding && cargo check' - language: system - files: \.rs$ - pass_filenames: false + # Rust specific hooks + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt' + language: system + files: \.rs$ + pass_filenames: false + - id: cargo-check + name: cargo check + entry: bash -c 'cd candle-binding && cargo check' + language: system + files: \.rs$ + pass_filenames: false -# Python specific hooks -- repo: https://github.com/psf/black - rev: 25.1.0 - hooks: - - id: black - language_version: python3 - files: \.py$ - exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) - -- repo: https://github.com/PyCQA/isort - rev: 6.0.1 - hooks: - - id: isort - args: ["--profile", "black"] - files: \.py$ - exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) + # Python specific hooks + # isort must run before black + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: ["--profile", "black", "--line-length", "88"] + files: \.py$ + exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + language_version: python3 + files: \.py$ + exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) # Commented out flake8 - only reports issues, doesn't auto-fix # - repo: https://github.com/PyCQA/flake8 # rev: 7.3.0 diff --git a/config/config-mcp-classifier-example.yaml b/config/config-mcp-classifier-example.yaml index 22468df6..4d7f6530 100644 --- a/config/config-mcp-classifier-example.yaml +++ b/config/config-mcp-classifier-example.yaml @@ -14,7 +14,7 @@ # BERT model for semantic caching and tool selection bert_model: - model_id: "sentence-transformers/all-MiniLM-L6-v2" + model_id: models/all-MiniLM-L12-v2 threshold: 0.85 use_cpu: true diff --git a/config/config.development.yaml b/config/config.development.yaml index fa7afdef..31051e7c 100644 --- a/config/config.development.yaml +++ b/config/config.development.yaml @@ -3,7 +3,7 @@ # for local development and debugging. bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.e2e.yaml b/config/config.e2e.yaml index 42167503..b588849f 100644 --- a/config/config.e2e.yaml +++ b/config/config.e2e.yaml @@ -1,5 +1,5 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true semantic_cache: diff --git a/config/config.production.yaml b/config/config.production.yaml index edd049a3..9c4dd4f8 100644 --- a/config/config.production.yaml +++ b/config/config.production.yaml @@ -3,7 +3,7 @@ # for production deployment with Jaeger or other OTLP-compatible backends. bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.recipe-accuracy.yaml b/config/config.recipe-accuracy.yaml index 18f2751d..584b0291 100644 --- a/config/config.recipe-accuracy.yaml +++ b/config/config.recipe-accuracy.yaml @@ -13,7 +13,7 @@ # - Jailbreak protection enabled bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.7 # Higher threshold for better precision use_cpu: true diff --git a/config/config.recipe-latency.yaml b/config/config.recipe-latency.yaml index 00b3ae00..ce31a36f 100644 --- a/config/config.recipe-latency.yaml +++ b/config/config.recipe-latency.yaml @@ -13,7 +13,7 @@ # - Minimal observability overhead bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.4 # Very low threshold for fast matching use_cpu: true diff --git a/config/config.recipe-token-efficiency.yaml b/config/config.recipe-token-efficiency.yaml index b76aeec4..49008db5 100644 --- a/config/config.recipe-token-efficiency.yaml +++ b/config/config.recipe-token-efficiency.yaml @@ -13,7 +13,7 @@ # - Larger batch sizes for efficient processing bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.5 # Lower threshold for faster matching use_cpu: true diff --git a/config/config.testing.yaml b/config/config.testing.yaml index 9dc59e5c..91722f56 100644 --- a/config/config.testing.yaml +++ b/config/config.testing.yaml @@ -1,5 +1,5 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.yaml b/config/config.yaml index 5ad29d5a..1e2c43d7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,15 +1,15 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true semantic_cache: enabled: true - backend_type: "memory" # Options: "memory" or "milvus" + backend_type: "memory" # Options: "memory" or "milvus" similarity_threshold: 0.8 - max_entries: 1000 # Only applies to memory backend + max_entries: 1000 # Only applies to memory backend ttl_seconds: 3600 - eviction_policy: "fifo" + eviction_policy: "fifo" tools: enabled: true @@ -32,13 +32,13 @@ prompt_guard: # NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field) vllm_endpoints: - name: "endpoint1" - address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network + address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network port: 8002 weight: 1 model_config: "qwen3": - reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax + reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax preferred_endpoints: ["endpoint1"] pii_policy: allow_by_default: true @@ -65,7 +65,7 @@ categories: model_scores: - model: qwen3 score: 0.7 - use_reasoning: false # Business performs better without reasoning + use_reasoning: false # Business performs better without reasoning - name: law system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." model_scores: @@ -89,7 +89,7 @@ categories: model_scores: - model: qwen3 score: 0.6 - use_reasoning: true # Enable reasoning for complex chemistry + use_reasoning: true # Enable reasoning for complex chemistry - name: history system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." model_scores: @@ -119,13 +119,13 @@ categories: model_scores: - model: qwen3 score: 1.0 - use_reasoning: true # Enable reasoning for complex math + use_reasoning: true # Enable reasoning for complex math - name: physics system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." model_scores: - model: qwen3 score: 0.7 - use_reasoning: true # Enable reasoning for physics + use_reasoning: true # Enable reasoning for physics - name: computer science system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." model_scores: @@ -178,23 +178,23 @@ api: detailed_goroutine_tracking: true high_resolution_timing: false sample_rate: 1.0 - duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + duration_buckets: + [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] # Observability Configuration observability: tracing: - enabled: false # Enable distributed tracing (default: false) - provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry + enabled: false # Enable distributed tracing (default: false) + provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry exporter: - type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout - endpoint: "localhost:4317" # OTLP endpoint (when type: otlp) - insecure: true # Use insecure connection (no TLS) + type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout + endpoint: "localhost:4317" # OTLP endpoint (when type: otlp) + insecure: true # Use insecure connection (no TLS) sampling: - type: "always_on" # Sampling: always_on, always_off, probabilistic - rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) + type: "always_on" # Sampling: always_on, always_off, probabilistic + rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) resource: service_name: "vllm-semantic-router" service_version: "v0.1.0" deployment_environment: "development" - diff --git a/e2e-tests/llm-katan/Dockerfile b/e2e-tests/llm-katan/Dockerfile index 9e29080e..303fc016 100644 --- a/e2e-tests/llm-katan/Dockerfile +++ b/e2e-tests/llm-katan/Dockerfile @@ -17,7 +17,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Copy requirements first for better layer caching COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt +# Install PyTorch CPU-only version to save space (no CUDA for testing server) +RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \ + pip install --no-cache-dir -r requirements.txt # Copy the llm_katan package COPY llm_katan/ ./llm_katan/ diff --git a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py index 783de39a..ba7c0ab6 100644 --- a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py +++ b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py @@ -69,13 +69,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py index 01378b03..147d564f 100644 --- a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py +++ b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py @@ -53,13 +53,7 @@ import torch from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py b/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py index e9147caf..a48c4d1d 100644 --- a/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py +++ b/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py @@ -70,13 +70,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py b/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py index 408792dc..da5007cd 100644 --- a/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py +++ b/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py @@ -77,13 +77,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/tools/make/docker.mk b/tools/make/docker.mk index 975d91f4..9437354a 100644 --- a/tools/make/docker.mk +++ b/tools/make/docker.mk @@ -130,9 +130,24 @@ docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan docker-compose-down: @$(LOG_TARGET) - @echo "Stopping docker-compose services..." + @echo "Stopping docker-compose services (default includes llm-katan)..." + @docker compose --profile llm-katan down + +docker-compose-down-core: + @$(LOG_TARGET) + @echo "Stopping core services only (no llm-katan)..." @docker compose down +docker-compose-down-testing: + @$(LOG_TARGET) + @echo "Stopping services with testing profile..." + @docker compose --profile testing down + +docker-compose-down-llm-katan: + @$(LOG_TARGET) + @echo "Stopping services with llm-katan profile..." + @docker compose --profile llm-katan down + # Help target for Docker commands docker-help: @echo "Docker Make Targets:" @@ -152,7 +167,10 @@ docker-help: @echo " docker-compose-rebuild - Force rebuild then start" @echo " docker-compose-rebuild-testing - Force rebuild (testing profile)" @echo " docker-compose-rebuild-llm-katan - Force rebuild (llm-katan profile)" - @echo " docker-compose-down - Stop docker-compose services" + @echo " docker-compose-down - Stop services (default includes llm-katan)" + @echo " docker-compose-down-core - Stop core services only (no llm-katan)" + @echo " docker-compose-down-testing - Stop services with testing profile" + @echo " docker-compose-down-llm-katan - Stop services with llm-katan profile" @echo "" @echo "Environment Variables:" @echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)" diff --git a/tools/make/models.mk b/tools/make/models.mk index 500b8031..a22828e0 100644 --- a/tools/make/models.mk +++ b/tools/make/models.mk @@ -28,6 +28,9 @@ download-models-minimal: @if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \ hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \ fi + @if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \ + hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \ + fi @if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \ hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \ fi @@ -49,6 +52,9 @@ download-models-full: @if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \ hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \ fi + @if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \ + hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \ + fi @if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \ hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \ fi