Skip to content

Commit 5227553

Browse files
committed
feat: add reasoning in model eval and generate config to use the best reasoning options
Signed-off-by: Huamin Chen <[email protected]>
1 parent 14cb752 commit 5227553

File tree

2 files changed

+1002
-0
lines changed

2 files changed

+1002
-0
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Router Configuration Template
2+
# This template is used by the reasoning evaluation script to generate optimized configs
3+
4+
# BERT model configuration for semantic similarity
5+
bert_model:
6+
model_id: "sentence-transformers/all-MiniLM-L12-v2"
7+
threshold: 0.6
8+
use_cpu: true
9+
10+
# Semantic caching configuration
11+
semantic_cache:
12+
enabled: false # Disabled by default for development
13+
similarity_threshold: 0.8
14+
max_entries: 1000
15+
ttl_seconds: 3600
16+
17+
# Tool selection configuration
18+
tools:
19+
enabled: true
20+
top_k: 3
21+
similarity_threshold: 0.2
22+
tools_db_path: "config/tools_db.json"
23+
fallback_to_empty: true
24+
25+
# Prompt guard (jailbreak detection) configuration
26+
prompt_guard:
27+
enabled: false # Disabled by default for development
28+
use_modernbert: true
29+
model_id: "models/jailbreak_classifier_modernbert-base_model"
30+
threshold: 0.7
31+
use_cpu: true
32+
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
33+
34+
# Classification models configuration
35+
classifier:
36+
category_model:
37+
model_id: "models/category_classifier_modernbert-base_model"
38+
use_modernbert: true
39+
threshold: 0.6
40+
use_cpu: true
41+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
42+
43+
pii_model:
44+
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
45+
use_modernbert: true
46+
threshold: 0.7
47+
use_cpu: true
48+
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
49+
50+
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
51+
vllm_endpoints:
52+
- name: "endpoint1"
53+
address: "127.0.0.1"
54+
port: 8000
55+
models:
56+
- "" # Will be populated by evaluation script
57+
weight: 1 # Load balancing weight
58+
health_check_path: "/health" # Optional health check endpoint
59+
60+
# Model-specific configuration
61+
model_config:
62+
# Will be populated by evaluation script with model-specific settings
63+
# Example structure:
64+
# "model-name":
65+
# reasoning_family: "qwen3" # or "deepseek", "gpt-oss", etc.
66+
# preferred_endpoints: ["endpoint1"]
67+
# pii_policy:
68+
# allow_by_default: true
69+
70+
# These will be populated by the evaluation script
71+
default_model: ""
72+
default_reasoning_effort: "high"
73+
categories: []
74+
75+
# Reasoning family configurations - define how different model families handle reasoning syntax
76+
reasoning_families:
77+
deepseek:
78+
type: "chat_template_kwargs"
79+
parameter: "thinking"
80+
81+
qwen3:
82+
type: "chat_template_kwargs"
83+
parameter: "enable_thinking"
84+
85+
gpt-oss:
86+
type: "reasoning_effort"
87+
parameter: "reasoning_effort"
88+
89+
gpt:
90+
type: "reasoning_effort"
91+
parameter: "reasoning_effort"

0 commit comments

Comments
 (0)