|
| 1 | +# Router Configuration Template |
| 2 | +# This template is used by the reasoning evaluation script to generate optimized configs |
| 3 | + |
| 4 | +# BERT model configuration for semantic similarity |
| 5 | +bert_model: |
| 6 | + model_id: "sentence-transformers/all-MiniLM-L12-v2" |
| 7 | + threshold: 0.6 |
| 8 | + use_cpu: true |
| 9 | + |
| 10 | +# Semantic caching configuration |
| 11 | +semantic_cache: |
| 12 | + enabled: false # Disabled by default for development |
| 13 | + similarity_threshold: 0.8 |
| 14 | + max_entries: 1000 |
| 15 | + ttl_seconds: 3600 |
| 16 | + |
| 17 | +# Tool selection configuration |
| 18 | +tools: |
| 19 | + enabled: true |
| 20 | + top_k: 3 |
| 21 | + similarity_threshold: 0.2 |
| 22 | + tools_db_path: "config/tools_db.json" |
| 23 | + fallback_to_empty: true |
| 24 | + |
| 25 | +# Prompt guard (jailbreak detection) configuration |
| 26 | +prompt_guard: |
| 27 | + enabled: false # Disabled by default for development |
| 28 | + use_modernbert: true |
| 29 | + model_id: "models/jailbreak_classifier_modernbert-base_model" |
| 30 | + threshold: 0.7 |
| 31 | + use_cpu: true |
| 32 | + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" |
| 33 | + |
| 34 | +# Classification models configuration |
| 35 | +classifier: |
| 36 | + category_model: |
| 37 | + model_id: "models/category_classifier_modernbert-base_model" |
| 38 | + use_modernbert: true |
| 39 | + threshold: 0.6 |
| 40 | + use_cpu: true |
| 41 | + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" |
| 42 | + |
| 43 | + pii_model: |
| 44 | + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" |
| 45 | + use_modernbert: true |
| 46 | + threshold: 0.7 |
| 47 | + use_cpu: true |
| 48 | + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" |
| 49 | + |
| 50 | +# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models |
| 51 | +vllm_endpoints: |
| 52 | + - name: "endpoint1" |
| 53 | + address: "127.0.0.1" |
| 54 | + port: 8000 |
| 55 | + models: |
| 56 | + - "" # Will be populated by evaluation script |
| 57 | + weight: 1 # Load balancing weight |
| 58 | + health_check_path: "/health" # Optional health check endpoint |
| 59 | + |
| 60 | +# Model-specific configuration |
| 61 | +model_config: |
| 62 | + # Will be populated by evaluation script with model-specific settings |
| 63 | + # Example structure: |
| 64 | + # "model-name": |
| 65 | + # reasoning_family: "qwen3" # or "deepseek", "gpt-oss", etc. |
| 66 | + # preferred_endpoints: ["endpoint1"] |
| 67 | + # pii_policy: |
| 68 | + # allow_by_default: true |
| 69 | + |
| 70 | +# These will be populated by the evaluation script |
| 71 | +default_model: "" |
| 72 | +default_reasoning_effort: "high" |
| 73 | +categories: [] |
| 74 | + |
| 75 | +# Reasoning family configurations - define how different model families handle reasoning syntax |
| 76 | +reasoning_families: |
| 77 | + deepseek: |
| 78 | + type: "chat_template_kwargs" |
| 79 | + parameter: "thinking" |
| 80 | + |
| 81 | + qwen3: |
| 82 | + type: "chat_template_kwargs" |
| 83 | + parameter: "enable_thinking" |
| 84 | + |
| 85 | + gpt-oss: |
| 86 | + type: "reasoning_effort" |
| 87 | + parameter: "reasoning_effort" |
| 88 | + |
| 89 | + gpt: |
| 90 | + type: "reasoning_effort" |
| 91 | + parameter: "reasoning_effort" |
0 commit comments