From b3cb66e4089a3b96e8e8327b8631c183c812e10b Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Wed, 3 Sep 2025 13:16:22 -0400 Subject: [PATCH] doc: update workflow to create config.yaml Signed-off-by: Huamin Chen --- src/training/model_eval/mmlu_pro_vllm_eval.py | 12 +- src/training/model_eval/result_to_config.py | 135 +++++++++++++++++- website/docs/getting-started/configuration.md | 97 ++++++++++++- 3 files changed, 236 insertions(+), 8 deletions(-) diff --git a/src/training/model_eval/mmlu_pro_vllm_eval.py b/src/training/model_eval/mmlu_pro_vllm_eval.py index 8ca5e10b..b6d82233 100755 --- a/src/training/model_eval/mmlu_pro_vllm_eval.py +++ b/src/training/model_eval/mmlu_pro_vllm_eval.py @@ -96,8 +96,18 @@ def get_available_models(endpoint: str, api_key: str = "") -> List[str]: try: models = client.models.list() return [model.id for model in models.data] - except: + except Exception as e: print(f"Error communicating with vLLM endpoint: {e}") + # Try direct HTTP request as fallback + try: + response = requests.get(f"{endpoint}/models") + if response.status_code == 200: + models_data = response.json() + return [model["id"] for model in models_data.get("data", [])] + else: + print(f"Failed to get models list. Status code: {response.status_code}") + except Exception as e: + print(f"Failed to get models list via HTTP: {e}") return [] diff --git a/src/training/model_eval/result_to_config.py b/src/training/model_eval/result_to_config.py index c7bb9927..6d7feb79 100644 --- a/src/training/model_eval/result_to_config.py +++ b/src/training/model_eval/result_to_config.py @@ -87,13 +87,45 @@ def generate_config_yaml(category_accuracies, similarity_threshold): "max_entries": 1000, "ttl_seconds": 3600, }, - "classifier": { - "model_id": "models/category_classifier_modernbert-base_model", - "threshold": 0.1, + "tools": { + "enabled": True, + "top_k": 3, + "similarity_threshold": 0.2, + "tools_db_path": "config/tools_db.json", + "fallback_to_empty": True, + }, + "prompt_guard": { + "enabled": True, + "use_modernbert": True, + "model_id": "models/jailbreak_classifier_modernbert-base_model", + "threshold": 0.7, "use_cpu": True, - "category_mapping_path": "models/category_classifier_modernbert-base_model/category_mapping.json", + "jailbreak_mapping_path": "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json", + }, + "gpu_config": { + "flops": 312000000000000, # 312e12 fp16 + "hbm": 2000000000000, # 2e12 (2 TB/s) + "description": "A100-80G", + }, + "classifier": { + "category_model": { + "model_id": "models/category_classifier_modernbert-base_model", + "use_modernbert": True, + "threshold": 0.6, + "use_cpu": True, + "category_mapping_path": "models/category_classifier_modernbert-base_model/category_mapping.json", + }, + "pii_model": { + "model_id": "models/pii_classifier_modernbert-base_presidio_token_model", + "use_modernbert": True, + "threshold": 0.7, + "use_cpu": True, + "pii_mapping_path": "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json", + }, + "load_aware": False, }, "categories": [], + "default_reasoning_effort": "medium", # Default reasoning effort level (low, medium, high) } # Get the best model overall to use as default (excluding 'auto') @@ -114,6 +146,80 @@ def generate_config_yaml(category_accuracies, similarity_threshold): default_model = max(model_avg_accuracies, key=model_avg_accuracies.get) config["default_model"] = default_model.split(":")[0] # Remove the approach suffix + # Define category-specific reasoning settings + category_reasoning = { + "math": { + "use_reasoning": True, + "reasoning_description": "Mathematical problems require step-by-step reasoning", + "reasoning_effort": "high", + }, + "physics": { + "use_reasoning": True, + "reasoning_description": "Physics concepts need logical analysis", + "reasoning_effort": "high", + }, + "chemistry": { + "use_reasoning": True, + "reasoning_description": "Chemical reactions and formulas require systematic thinking", + "reasoning_effort": "high", + }, + "computer science": { + "use_reasoning": True, + "reasoning_description": "Programming and algorithms need logical reasoning", + "reasoning_effort": "high", + }, + "engineering": { + "use_reasoning": True, + "reasoning_description": "Engineering problems require systematic problem-solving", + "reasoning_effort": "high", + }, + "biology": { + "use_reasoning": True, + "reasoning_description": "Biological processes benefit from structured analysis", + "reasoning_effort": "medium", + }, + "business": { + "use_reasoning": False, + "reasoning_description": "Business content is typically conversational", + "reasoning_effort": "low", + }, + "law": { + "use_reasoning": False, + "reasoning_description": "Legal content is typically explanatory", + "reasoning_effort": "medium", + }, + "psychology": { + "use_reasoning": False, + "reasoning_description": "Psychology content is usually explanatory", + "reasoning_effort": "medium", + }, + "history": { + "use_reasoning": False, + "reasoning_description": "Historical content is narrative-based", + "reasoning_effort": "low", + }, + "economics": { + "use_reasoning": False, + "reasoning_description": "Economic discussions are usually explanatory", + "reasoning_effort": "medium", + }, + "philosophy": { + "use_reasoning": False, + "reasoning_description": "Philosophical discussions are conversational", + "reasoning_effort": "medium", + }, + "health": { + "use_reasoning": False, + "reasoning_description": "Health information is typically informational", + "reasoning_effort": "medium", + }, + "other": { + "use_reasoning": False, + "reasoning_description": "General content doesn't require reasoning", + "reasoning_effort": "low", + }, + } + # Create category entries with ranked model-score pairs (excluding 'auto') for category, models in category_accuracies.items(): # Sort models by accuracy (descending), exclude 'auto' @@ -126,8 +232,25 @@ def generate_config_yaml(category_accuracies, similarity_threshold): model_scores = [ {"model": model, "score": float(acc)} for model, acc in ranked_models ] - # Add category to config - config["categories"].append({"name": category, "model_scores": model_scores}) + # Get reasoning settings for the category + reasoning_settings = category_reasoning.get( + category.lower(), + { + "use_reasoning": False, + "reasoning_description": "General content doesn't require reasoning", + "reasoning_effort": "low", + }, + ) + # Add category to config with reasoning settings + config["categories"].append( + { + "name": category, + "use_reasoning": reasoning_settings["use_reasoning"], + "reasoning_description": reasoning_settings["reasoning_description"], + "reasoning_effort": reasoning_settings["reasoning_effort"], + "model_scores": model_scores, + } + ) return config diff --git a/website/docs/getting-started/configuration.md b/website/docs/getting-started/configuration.md index 6ba1ffda..56391ef8 100644 --- a/website/docs/getting-started/configuration.md +++ b/website/docs/getting-started/configuration.md @@ -439,10 +439,105 @@ make test-pii # PII detection make test-prompt-guard # Jailbreak protection ``` +## Configuration Generation + +The Semantic Router supports automated configuration generation based on model performance benchmarks. This workflow uses MMLU-Pro evaluation results to determine optimal model routing for different categories. + +### Benchmarking Workflow + +1. **Run MMLU-Pro Evaluation:** + ```bash + # Evaluate models using MMLU-Pro benchmark + python src/training/model_eval/mmlu_pro_vllm_eval.py \ + --endpoint http://localhost:8000/v1 \ + --models phi4,gemma3:27b,mistral-small3.1 \ + --samples-per-category 5 \ + --use-cot \ + --concurrent-requests 4 \ + --output-dir results + ``` + +2. **Generate Configuration:** + ```bash + # Generate config.yaml from benchmark results + python src/training/model_eval/result_to_config.py \ + --results-dir results \ + --output-file config/config.yaml \ + --similarity-threshold 0.80 + ``` + +### Generated Configuration Features + +The generated configuration includes: + +- **Model Performance Rankings:** Models are ranked by performance for each category +- **Reasoning Settings:** Automatically configures reasoning requirements per category: + - `use_reasoning`: Whether to use step-by-step reasoning + - `reasoning_description`: Description of reasoning approach + - `reasoning_effort`: Required effort level (low/medium/high) +- **Default Model Selection:** Best overall performing model is set as default +- **Security and Performance Settings:** Pre-configured optimal values for: + - PII detection thresholds + - Semantic cache settings + - Tool selection parameters + +### Customizing Generated Config + +The generated config.yaml can be customized by: + +1. Editing category-specific settings in `result_to_config.py` +2. Adjusting thresholds and parameters via command line arguments +3. Manually modifying the generated config.yaml + +### Example Workflow + +Here's a complete example workflow for generating and testing a configuration: + +```bash +# Run MMLU-Pro evaluation +# Option 1: Specify models manually +python src/training/model_eval/mmlu_pro_vllm_eval.py \ + --endpoint http://localhost:8000/v1 \ + --models phi4,gemma3:27b,mistral-small3.1 \ + --samples-per-category 5 \ + --use-cot \ + --concurrent-requests 4 \ + --output-dir results \ + --max-tokens 2048 \ + --temperature 0.0 \ + --seed 42 + +# Option 2: Auto-discover models from endpoint +python src/training/model_eval/mmlu_pro_vllm_eval.py \ + --endpoint http://localhost:8000/v1 \ + --samples-per-category 5 \ + --use-cot \ + --concurrent-requests 4 \ + --output-dir results \ + --max-tokens 2048 \ + --temperature 0.0 \ + --seed 42 + +# Generate initial config +python src/training/model_eval/result_to_config.py \ + --results-dir results \ + --output-file config/config.yaml \ + --similarity-threshold 0.80 + +# Test the generated config +make test +``` + +This workflow ensures your configuration is: +- Based on actual model performance +- Properly tested before deployment +- Version controlled for tracking changes +- Optimized for your specific use case + ## Next Steps - **[Installation Guide](installation.md)** - Setup instructions - **[Quick Start Guide](installation.md)** - Basic usage examples - **[API Documentation](../api/router.md)** - Complete API reference -The configuration system is designed to be simple yet powerful. Start with the basic configuration and gradually enable advanced features as needed. +The configuration system is designed to be simple yet powerful. Start with the basic configuration and gradually enable advanced features as needed. \ No newline at end of file