Skip to content

Commit 898da32

Browse files
authored
Merge pull request #30 from rootfs/map
2 parents 8e8c325 + ad85839 commit 898da32

File tree

3 files changed

+236
-8
lines changed

3 files changed

+236
-8
lines changed

src/training/model_eval/mmlu_pro_vllm_eval.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,18 @@ def get_available_models(endpoint: str, api_key: str = "") -> List[str]:
9696
try:
9797
models = client.models.list()
9898
return [model.id for model in models.data]
99-
except:
99+
except Exception as e:
100100
print(f"Error communicating with vLLM endpoint: {e}")
101+
# Try direct HTTP request as fallback
102+
try:
103+
response = requests.get(f"{endpoint}/models")
104+
if response.status_code == 200:
105+
models_data = response.json()
106+
return [model["id"] for model in models_data.get("data", [])]
107+
else:
108+
print(f"Failed to get models list. Status code: {response.status_code}")
109+
except Exception as e:
110+
print(f"Failed to get models list via HTTP: {e}")
101111
return []
102112

103113

src/training/model_eval/result_to_config.py

Lines changed: 129 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,45 @@ def generate_config_yaml(category_accuracies, similarity_threshold):
8787
"max_entries": 1000,
8888
"ttl_seconds": 3600,
8989
},
90-
"classifier": {
91-
"model_id": "models/category_classifier_modernbert-base_model",
92-
"threshold": 0.1,
90+
"tools": {
91+
"enabled": True,
92+
"top_k": 3,
93+
"similarity_threshold": 0.2,
94+
"tools_db_path": "config/tools_db.json",
95+
"fallback_to_empty": True,
96+
},
97+
"prompt_guard": {
98+
"enabled": True,
99+
"use_modernbert": True,
100+
"model_id": "models/jailbreak_classifier_modernbert-base_model",
101+
"threshold": 0.7,
93102
"use_cpu": True,
94-
"category_mapping_path": "models/category_classifier_modernbert-base_model/category_mapping.json",
103+
"jailbreak_mapping_path": "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json",
104+
},
105+
"gpu_config": {
106+
"flops": 312000000000000, # 312e12 fp16
107+
"hbm": 2000000000000, # 2e12 (2 TB/s)
108+
"description": "A100-80G",
109+
},
110+
"classifier": {
111+
"category_model": {
112+
"model_id": "models/category_classifier_modernbert-base_model",
113+
"use_modernbert": True,
114+
"threshold": 0.6,
115+
"use_cpu": True,
116+
"category_mapping_path": "models/category_classifier_modernbert-base_model/category_mapping.json",
117+
},
118+
"pii_model": {
119+
"model_id": "models/pii_classifier_modernbert-base_presidio_token_model",
120+
"use_modernbert": True,
121+
"threshold": 0.7,
122+
"use_cpu": True,
123+
"pii_mapping_path": "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json",
124+
},
125+
"load_aware": False,
95126
},
96127
"categories": [],
128+
"default_reasoning_effort": "medium", # Default reasoning effort level (low, medium, high)
97129
}
98130

99131
# Get the best model overall to use as default (excluding 'auto')
@@ -114,6 +146,80 @@ def generate_config_yaml(category_accuracies, similarity_threshold):
114146
default_model = max(model_avg_accuracies, key=model_avg_accuracies.get)
115147
config["default_model"] = default_model.split(":")[0] # Remove the approach suffix
116148

149+
# Define category-specific reasoning settings
150+
category_reasoning = {
151+
"math": {
152+
"use_reasoning": True,
153+
"reasoning_description": "Mathematical problems require step-by-step reasoning",
154+
"reasoning_effort": "high",
155+
},
156+
"physics": {
157+
"use_reasoning": True,
158+
"reasoning_description": "Physics concepts need logical analysis",
159+
"reasoning_effort": "high",
160+
},
161+
"chemistry": {
162+
"use_reasoning": True,
163+
"reasoning_description": "Chemical reactions and formulas require systematic thinking",
164+
"reasoning_effort": "high",
165+
},
166+
"computer science": {
167+
"use_reasoning": True,
168+
"reasoning_description": "Programming and algorithms need logical reasoning",
169+
"reasoning_effort": "high",
170+
},
171+
"engineering": {
172+
"use_reasoning": True,
173+
"reasoning_description": "Engineering problems require systematic problem-solving",
174+
"reasoning_effort": "high",
175+
},
176+
"biology": {
177+
"use_reasoning": True,
178+
"reasoning_description": "Biological processes benefit from structured analysis",
179+
"reasoning_effort": "medium",
180+
},
181+
"business": {
182+
"use_reasoning": False,
183+
"reasoning_description": "Business content is typically conversational",
184+
"reasoning_effort": "low",
185+
},
186+
"law": {
187+
"use_reasoning": False,
188+
"reasoning_description": "Legal content is typically explanatory",
189+
"reasoning_effort": "medium",
190+
},
191+
"psychology": {
192+
"use_reasoning": False,
193+
"reasoning_description": "Psychology content is usually explanatory",
194+
"reasoning_effort": "medium",
195+
},
196+
"history": {
197+
"use_reasoning": False,
198+
"reasoning_description": "Historical content is narrative-based",
199+
"reasoning_effort": "low",
200+
},
201+
"economics": {
202+
"use_reasoning": False,
203+
"reasoning_description": "Economic discussions are usually explanatory",
204+
"reasoning_effort": "medium",
205+
},
206+
"philosophy": {
207+
"use_reasoning": False,
208+
"reasoning_description": "Philosophical discussions are conversational",
209+
"reasoning_effort": "medium",
210+
},
211+
"health": {
212+
"use_reasoning": False,
213+
"reasoning_description": "Health information is typically informational",
214+
"reasoning_effort": "medium",
215+
},
216+
"other": {
217+
"use_reasoning": False,
218+
"reasoning_description": "General content doesn't require reasoning",
219+
"reasoning_effort": "low",
220+
},
221+
}
222+
117223
# Create category entries with ranked model-score pairs (excluding 'auto')
118224
for category, models in category_accuracies.items():
119225
# Sort models by accuracy (descending), exclude 'auto'
@@ -126,8 +232,25 @@ def generate_config_yaml(category_accuracies, similarity_threshold):
126232
model_scores = [
127233
{"model": model, "score": float(acc)} for model, acc in ranked_models
128234
]
129-
# Add category to config
130-
config["categories"].append({"name": category, "model_scores": model_scores})
235+
# Get reasoning settings for the category
236+
reasoning_settings = category_reasoning.get(
237+
category.lower(),
238+
{
239+
"use_reasoning": False,
240+
"reasoning_description": "General content doesn't require reasoning",
241+
"reasoning_effort": "low",
242+
},
243+
)
244+
# Add category to config with reasoning settings
245+
config["categories"].append(
246+
{
247+
"name": category,
248+
"use_reasoning": reasoning_settings["use_reasoning"],
249+
"reasoning_description": reasoning_settings["reasoning_description"],
250+
"reasoning_effort": reasoning_settings["reasoning_effort"],
251+
"model_scores": model_scores,
252+
}
253+
)
131254

132255
return config
133256

website/docs/getting-started/configuration.md

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,105 @@ make test-pii # PII detection
439439
make test-prompt-guard # Jailbreak protection
440440
```
441441

442+
## Configuration Generation
443+
444+
The Semantic Router supports automated configuration generation based on model performance benchmarks. This workflow uses MMLU-Pro evaluation results to determine optimal model routing for different categories.
445+
446+
### Benchmarking Workflow
447+
448+
1. **Run MMLU-Pro Evaluation:**
449+
```bash
450+
# Evaluate models using MMLU-Pro benchmark
451+
python src/training/model_eval/mmlu_pro_vllm_eval.py \
452+
--endpoint http://localhost:8000/v1 \
453+
--models phi4,gemma3:27b,mistral-small3.1 \
454+
--samples-per-category 5 \
455+
--use-cot \
456+
--concurrent-requests 4 \
457+
--output-dir results
458+
```
459+
460+
2. **Generate Configuration:**
461+
```bash
462+
# Generate config.yaml from benchmark results
463+
python src/training/model_eval/result_to_config.py \
464+
--results-dir results \
465+
--output-file config/config.yaml \
466+
--similarity-threshold 0.80
467+
```
468+
469+
### Generated Configuration Features
470+
471+
The generated configuration includes:
472+
473+
- **Model Performance Rankings:** Models are ranked by performance for each category
474+
- **Reasoning Settings:** Automatically configures reasoning requirements per category:
475+
- `use_reasoning`: Whether to use step-by-step reasoning
476+
- `reasoning_description`: Description of reasoning approach
477+
- `reasoning_effort`: Required effort level (low/medium/high)
478+
- **Default Model Selection:** Best overall performing model is set as default
479+
- **Security and Performance Settings:** Pre-configured optimal values for:
480+
- PII detection thresholds
481+
- Semantic cache settings
482+
- Tool selection parameters
483+
484+
### Customizing Generated Config
485+
486+
The generated config.yaml can be customized by:
487+
488+
1. Editing category-specific settings in `result_to_config.py`
489+
2. Adjusting thresholds and parameters via command line arguments
490+
3. Manually modifying the generated config.yaml
491+
492+
### Example Workflow
493+
494+
Here's a complete example workflow for generating and testing a configuration:
495+
496+
```bash
497+
# Run MMLU-Pro evaluation
498+
# Option 1: Specify models manually
499+
python src/training/model_eval/mmlu_pro_vllm_eval.py \
500+
--endpoint http://localhost:8000/v1 \
501+
--models phi4,gemma3:27b,mistral-small3.1 \
502+
--samples-per-category 5 \
503+
--use-cot \
504+
--concurrent-requests 4 \
505+
--output-dir results \
506+
--max-tokens 2048 \
507+
--temperature 0.0 \
508+
--seed 42
509+
510+
# Option 2: Auto-discover models from endpoint
511+
python src/training/model_eval/mmlu_pro_vllm_eval.py \
512+
--endpoint http://localhost:8000/v1 \
513+
--samples-per-category 5 \
514+
--use-cot \
515+
--concurrent-requests 4 \
516+
--output-dir results \
517+
--max-tokens 2048 \
518+
--temperature 0.0 \
519+
--seed 42
520+
521+
# Generate initial config
522+
python src/training/model_eval/result_to_config.py \
523+
--results-dir results \
524+
--output-file config/config.yaml \
525+
--similarity-threshold 0.80
526+
527+
# Test the generated config
528+
make test
529+
```
530+
531+
This workflow ensures your configuration is:
532+
- Based on actual model performance
533+
- Properly tested before deployment
534+
- Version controlled for tracking changes
535+
- Optimized for your specific use case
536+
442537
## Next Steps
443538

444539
- **[Installation Guide](installation.md)** - Setup instructions
445540
- **[Quick Start Guide](installation.md)** - Basic usage examples
446541
- **[API Documentation](../api/router.md)** - Complete API reference
447542

448-
The configuration system is designed to be simple yet powerful. Start with the basic configuration and gradually enable advanced features as needed.
543+
The configuration system is designed to be simple yet powerful. Start with the basic configuration and gradually enable advanced features as needed.

0 commit comments

Comments
 (0)