From 95e00550662ed639c1bdec5110be3e0144f72fb7 Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 19:48:30 +0800 Subject: [PATCH 1/8] feat(router): add intent-aware LoRA routing support - Add LoRAAdapter struct to define available LoRA adapters per model - Add lora_name field to ModelScore for specifying LoRA adapter - Implement validation to ensure lora_name references defined LoRAs - Update model selection logic to use LoRA name when specified - Add comprehensive example configuration and documentation - Update README to reflect LoRA adapter routing capability This enables semantic router to route requests to different LoRA adapters based on classified intent/category, allowing domain-specific fine-tuned models to be selected automatically. Fixes: https://github.com/vllm-project/semantic-router/issues/545 Signed-off-by: bitliu --- README.md | 8 +- .../in-tree/lora_routing_example.yaml | 116 ++++++++++++++++++ .../pkg/classification/classifier.go | 18 ++- src/semantic-router/pkg/config/config.go | 17 +++ src/semantic-router/pkg/config/validator.go | 35 ++++++ .../docs/overview/categories/configuration.md | 98 ++++++++++++++- 6 files changed, 283 insertions(+), 9 deletions(-) create mode 100644 config/intelligent-routing/in-tree/lora_routing_example.yaml diff --git a/README.md b/README.md index 3ec38610..d9fa5f15 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ ### Intelligent Routing 🧠 -#### Auto-Reasoning and Auto-Selection of Models +#### Auto-Selection of Models and LoRA Adapters -An **Mixture-of-Models** (MoM) router that intelligently directs OpenAI API requests to the most suitable models from a defined pool based on **Semantic Understanding** of the request's intent (Complexity, Task, Tools). +An **Mixture-of-Models** (MoM) router that intelligently directs OpenAI API requests to the most suitable models or LoRA adapters from a defined pool based on **Semantic Understanding** of the request's intent (Complexity, Task, Tools). ![mom-overview](./website/static/img/mom-overview.png) @@ -79,10 +79,6 @@ Detect PII in the prompt, avoiding sending PII to the LLM so as to protect the p Detect if the prompt is a jailbreak prompt, avoiding sending jailbreak prompts to the LLM so as to prevent the LLM from misbehaving. Can be configured globally or at the category level for fine-grained security control. -### Distributed Tracing 🔍 - -Comprehensive observability with OpenTelemetry distributed tracing provides fine-grained visibility into the request processing pipeline. - ### vLLM Semantic Router Dashboard 💬 Watch the quick demo of the dashboard below: diff --git a/config/intelligent-routing/in-tree/lora_routing_example.yaml b/config/intelligent-routing/in-tree/lora_routing_example.yaml new file mode 100644 index 00000000..e3510e25 --- /dev/null +++ b/config/intelligent-routing/in-tree/lora_routing_example.yaml @@ -0,0 +1,116 @@ +# Example configuration for Intent-Aware LoRA Routing +# This demonstrates how to use the lora_name field to route requests to different +# LoRA adapters based on the classified intent/category. +# +# Prerequisites: +# 1. vLLM server must be started with --enable-lora flag +# 2. LoRA adapters must be registered at server startup using --lora-modules +# Example: vllm serve meta-llama/Llama-2-7b-hf \ +# --enable-lora \ +# --lora-modules technical-lora=/path/to/technical-adapter \ +# medical-lora=/path/to/medical-adapter \ +# legal-lora=/path/to/legal-adapter +# +# How it works: +# - When a request is classified into a category (e.g., "technical") +# - The router selects the best ModelScore for that category +# - If the ModelScore has a lora_name specified, that name is used as the final model name +# - The request is sent to vLLM with model="technical-lora" instead of model="llama2-7b" +# - vLLM automatically routes to the appropriate LoRA adapter + +bert_model: + model_id: models/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + +# vLLM Endpoints Configuration +vllm_endpoints: + - name: "vllm-primary" + address: "172.28.0.20" + port: 8002 + weight: 1 + +# Base model configuration +# IMPORTANT: LoRA adapters must be defined here before they can be referenced in model_scores +model_config: + "llama2-7b": + reasoning_family: "llama2" + preferred_endpoints: ["vllm-primary"] + pii_policy: + allow_by_default: true + # Define available LoRA adapters for this model + # These names must match the LoRA modules registered with vLLM at startup + loras: + - name: "technical-lora" + description: "Optimized for programming and technical questions" + - name: "medical-lora" + description: "Specialized for medical and healthcare domain" + - name: "legal-lora" + description: "Fine-tuned for legal questions and law-related topics" + +# Classifier configuration +classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + +# Categories with LoRA routing +categories: + - name: technical + description: "Programming, software engineering, and technical questions" + system_prompt: "You are an expert software engineer with deep knowledge of programming languages, algorithms, system design, and best practices. Provide clear, accurate technical guidance with code examples when appropriate." + model_scores: + - model: llama2-7b # Base model name (for endpoint selection and PII policy) + lora_name: technical-lora # LoRA adapter name (used as final model name in request) + score: 1.0 + use_reasoning: true + reasoning_effort: medium + + - name: medical + description: "Medical and healthcare questions" + system_prompt: "You are a medical expert with comprehensive knowledge of anatomy, physiology, diseases, treatments, and healthcare practices. Provide accurate medical information while emphasizing that responses are for educational purposes only and not a substitute for professional medical advice." + model_scores: + - model: llama2-7b + lora_name: medical-lora # Different LoRA adapter for medical domain + score: 1.0 + use_reasoning: true + reasoning_effort: high + + - name: legal + description: "Legal questions and law-related topics" + system_prompt: "You are a legal expert with knowledge of legal principles, case law, and statutory interpretation. Provide accurate legal information while clearly stating that responses are for informational purposes only and do not constitute legal advice." + model_scores: + - model: llama2-7b + lora_name: legal-lora # Different LoRA adapter for legal domain + score: 1.0 + use_reasoning: true + reasoning_effort: high + + - name: general + description: "General questions that don't fit specific domains" + system_prompt: "You are a helpful AI assistant with broad knowledge across many topics. Provide clear, accurate, and helpful responses." + model_scores: + - model: llama2-7b # No lora_name specified - uses base model + score: 0.8 + use_reasoning: false + +# Default model for fallback +default_model: llama2-7b + +# Benefits of LoRA Routing: +# 1. Domain-Specific Expertise: Each LoRA adapter is fine-tuned for specific domains +# 2. Cost Efficiency: Share base model weights across adapters, reducing memory footprint +# 3. Easy A/B Testing: Gradually roll out new adapters by adjusting scores +# 4. Flexible Deployment: Add/remove adapters without restarting the router +# 5. Performance: vLLM efficiently serves multiple LoRA adapters with minimal overhead +# +# Use Cases: +# - Multi-domain chatbots (technical support, medical advice, legal information) +# - Task-specific optimization (code generation, summarization, translation) +# - Language-specific adapters for multilingual systems +# - Customer-specific adapters for personalized experiences +# - Version testing (compare different adapter versions) + diff --git a/src/semantic-router/pkg/classification/classifier.go b/src/semantic-router/pkg/classification/classifier.go index 99550192..19677e12 100644 --- a/src/semantic-router/pkg/classification/classifier.go +++ b/src/semantic-router/pkg/classification/classifier.go @@ -984,7 +984,15 @@ func (c *Classifier) selectBestModelInternal(cat *config.Category, modelFilter f if modelFilter != nil && !modelFilter(model) { return } - c.updateBestModel(modelScore.Score, model, &bestScore, &bestModel) + // Use LoRA name if specified, otherwise use the base model name + // This enables intent-aware LoRA routing where the final model name + // in the request becomes the LoRA adapter name + finalModelName := model + if modelScore.LoRAName != "" { + finalModelName = modelScore.LoRAName + logging.Debugf("Using LoRA adapter '%s' for base model '%s'", finalModelName, model) + } + c.updateBestModel(modelScore.Score, finalModelName, &bestScore, &bestModel) }) return bestModel, bestScore @@ -1024,13 +1032,19 @@ func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryN } // GetModelsForCategory returns all models that are configured for the given category +// If a ModelScore has a LoRAName specified, the LoRA name is returned instead of the base model name func (c *Classifier) GetModelsForCategory(categoryName string) []string { var models []string for _, category := range c.Config.Categories { if strings.EqualFold(category.Name, categoryName) { for _, modelScore := range category.ModelScores { - models = append(models, modelScore.Model) + // Use LoRA name if specified, otherwise use the base model name + if modelScore.LoRAName != "" { + models = append(models, modelScore.LoRAName) + } else { + models = append(models, modelScore.Model) + } } break } diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index b1a63cd9..eb1c1107 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -373,6 +373,18 @@ type ModelParams struct { // Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss") // If empty, the model doesn't support reasoning mode ReasoningFamily string `yaml:"reasoning_family,omitempty"` + + // LoRA adapters available for this model + // These must be registered with vLLM using --lora-modules flag + LoRAs []LoRAAdapter `yaml:"loras,omitempty"` +} + +// LoRAAdapter represents a LoRA adapter configuration for a model +type LoRAAdapter struct { + // Name of the LoRA adapter (must match the name registered with vLLM) + Name string `yaml:"name"` + // Description of what this LoRA adapter is optimized for + Description string `yaml:"description,omitempty"` } // ReasoningFamilyConfig defines how a reasoning family handles reasoning mode @@ -426,6 +438,11 @@ type Category struct { type ModelScore struct { Model string `yaml:"model"` Score float64 `yaml:"score"` + // Optional LoRA adapter name - when specified, this LoRA adapter name will be used + // as the final model name in requests instead of the base model name. + // This enables intent-aware LoRA routing where different LoRA adapters can be + // selected based on the classified category. + LoRAName string `yaml:"lora_name,omitempty"` // Reasoning mode control on Model Level ModelReasoningControl `yaml:",inline"` } diff --git a/src/semantic-router/pkg/config/validator.go b/src/semantic-router/pkg/config/validator.go index 4cae4c5e..0f855f67 100644 --- a/src/semantic-router/pkg/config/validator.go +++ b/src/semantic-router/pkg/config/validator.go @@ -102,6 +102,13 @@ func validateConfigStructure(cfg *RouterConfig) error { if modelScore.UseReasoning == nil { return fmt.Errorf("category '%s', model '%s': missing required field 'use_reasoning'", category.Name, modelScore.Model) } + + // Validate LoRA name if specified + if modelScore.LoRAName != "" { + if err := validateLoRAName(cfg, modelScore.Model, modelScore.LoRAName); err != nil { + return fmt.Errorf("category '%s', model '%s': %w", category.Name, modelScore.Model, err) + } + } } } @@ -112,3 +119,31 @@ func validateConfigStructure(cfg *RouterConfig) error { return nil } + +// validateLoRAName checks if the specified LoRA name is defined in the model's configuration +func validateLoRAName(cfg *RouterConfig, modelName string, loraName string) error { + // Check if the model exists in model_config + modelParams, exists := cfg.ModelConfig[modelName] + if !exists { + return fmt.Errorf("lora_name '%s' specified but model '%s' is not defined in model_config", loraName, modelName) + } + + // Check if the model has any LoRAs defined + if len(modelParams.LoRAs) == 0 { + return fmt.Errorf("lora_name '%s' specified but model '%s' has no loras defined in model_config", loraName, modelName) + } + + // Check if the specified LoRA name exists in the model's LoRA list + for _, lora := range modelParams.LoRAs { + if lora.Name == loraName { + return nil // Valid LoRA name found + } + } + + // LoRA name not found, provide helpful error message + availableLoRAs := make([]string, len(modelParams.LoRAs)) + for i, lora := range modelParams.LoRAs { + availableLoRAs[i] = lora.Name + } + return fmt.Errorf("lora_name '%s' is not defined in model '%s' loras. Available LoRAs: %v", loraName, modelName, availableLoRAs) +} diff --git a/website/docs/overview/categories/configuration.md b/website/docs/overview/categories/configuration.md index 46f15c31..4fd3fb2f 100644 --- a/website/docs/overview/categories/configuration.md +++ b/website/docs/overview/categories/configuration.md @@ -223,6 +223,62 @@ categories: - **0.4-0.5**: Adequate capability - **0.0-0.3**: Poor capability, avoid if possible +#### `lora_name` (Optional) + +- **Type**: String +- **Description**: LoRA adapter name to use for this model +- **Purpose**: Enable intent-aware LoRA routing +- **Validation**: Must be defined in the model's `loras` list in `model_config` + +When specified, the `lora_name` becomes the final model name in requests to vLLM, enabling automatic routing to LoRA adapters based on classified intent. + +```yaml +# First, define available LoRA adapters in model_config +model_config: + "llama2-7b": + reasoning_family: "llama2" + preferred_endpoints: ["vllm-primary"] + loras: + - name: "technical-lora" + description: "Optimized for technical questions" + - name: "medical-lora" + description: "Specialized for medical domain" + +# Then reference them in categories +categories: + - name: "technical" + model_scores: + - model: "llama2-7b" # Base model (for endpoint selection) + lora_name: "technical-lora" # LoRA adapter name (final model name) + score: 1.0 +``` + +**How LoRA Routing Works**: + +1. LoRA adapters are defined in `model_config` under the base model +2. Request is classified into a category (e.g., "technical") +3. Router selects the best `ModelScore` for that category +4. Configuration validator ensures `lora_name` is defined in model's `loras` list +5. If `lora_name` is specified, it replaces the base model name +6. Request is sent to vLLM with `model="technical-lora"` +7. vLLM automatically routes to the appropriate LoRA adapter + +**Prerequisites**: + +- vLLM server must be started with `--enable-lora` flag +- LoRA adapters must be registered using `--lora-modules` parameter +- LoRA names must be defined in `model_config` before use in `model_scores` + +**Benefits**: + +- **Domain Expertise**: Fine-tuned adapters for specific domains +- **Cost Efficiency**: Share base model weights across adapters +- **Easy A/B Testing**: Compare adapter versions by adjusting scores +- **Flexible Deployment**: Add/remove adapters without router restart +- **Configuration Validation**: Prevents typos and missing LoRA definitions + +See [LoRA Routing Example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing_example.yaml) for complete configuration. + ## Complete Configuration Examples ### Example 1: STEM Category (Reasoning Enabled) @@ -261,7 +317,47 @@ categories: score: 0.2 ``` -### Example 3: Security-Focused Configuration (Jailbreak Protection) +### Example 3: Intent-Aware LoRA Routing + +```yaml +# Define LoRA adapters in model_config first +model_config: + "llama2-7b": + reasoning_family: "llama2" + preferred_endpoints: ["vllm-primary"] + loras: + - name: "technical-lora" + description: "Optimized for technical questions" + - name: "medical-lora" + description: "Specialized for medical domain" + +# Then reference them in categories +categories: + - name: "technical" + description: "Programming and technical questions" + model_scores: + - model: "llama2-7b" + lora_name: "technical-lora" # Routes to technical LoRA adapter + score: 1.0 + use_reasoning: true + + - name: "medical" + description: "Medical and healthcare questions" + model_scores: + - model: "llama2-7b" + lora_name: "medical-lora" # Routes to medical LoRA adapter + score: 1.0 + use_reasoning: true + + - name: "general" + description: "General questions" + model_scores: + - model: "llama2-7b" # No lora_name - uses base model + score: 0.8 + use_reasoning: false +``` + +### Example 4: Security-Focused Configuration (Jailbreak Protection) ```yaml categories: From 90c20c3d9b833039291b1a12ec627110a113be5b Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 19:53:03 +0800 Subject: [PATCH 2/8] docs(tutorial): add minimal LoRA routing guide Signed-off-by: bitliu --- ...routing_example.yaml => lora_routing.yaml} | 0 .../intelligent-route/lora-routing.md | 178 ++++++++++++++++++ 2 files changed, 178 insertions(+) rename config/intelligent-routing/in-tree/{lora_routing_example.yaml => lora_routing.yaml} (100%) create mode 100644 website/docs/tutorials/intelligent-route/lora-routing.md diff --git a/config/intelligent-routing/in-tree/lora_routing_example.yaml b/config/intelligent-routing/in-tree/lora_routing.yaml similarity index 100% rename from config/intelligent-routing/in-tree/lora_routing_example.yaml rename to config/intelligent-routing/in-tree/lora_routing.yaml diff --git a/website/docs/tutorials/intelligent-route/lora-routing.md b/website/docs/tutorials/intelligent-route/lora-routing.md new file mode 100644 index 00000000..533631fb --- /dev/null +++ b/website/docs/tutorials/intelligent-route/lora-routing.md @@ -0,0 +1,178 @@ +# LoRA Routing + +This guide shows how to enable intent-aware LoRA (Low-Rank Adaptation) routing in the Semantic Router: + +- Minimal configuration for LoRA routing +- vLLM server setup with LoRA adapters +- Example request/response showing automatic LoRA selection +- Verification steps + +## Prerequisites + +- A running vLLM server with LoRA support enabled +- LoRA adapter files (fine-tuned for specific domains) +- Envoy + the router (see [Start the router](../../getting-started/quickstart.md) section) + +## 1. Start vLLM with LoRA Adapters + +First, start your vLLM server with LoRA support enabled: + +```bash +vllm serve meta-llama/Llama-2-7b-hf \ + --enable-lora \ + --lora-modules \ + technical-lora=/path/to/technical-adapter \ + medical-lora=/path/to/medical-adapter \ + legal-lora=/path/to/legal-adapter \ + --host 0.0.0.0 \ + --port 8000 +``` + +**Key flags**: + +- `--enable-lora`: Enables LoRA adapter support +- `--lora-modules`: Registers LoRA adapters with their names and paths +- Format: `adapter-name=/path/to/adapter` + +## 2. Minimal Configuration + +Put this in `config/config.yaml` (or merge into your existing config): + +```yaml +# Category classifier (required for intent detection) +classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + +# vLLM endpoint hosting your base model + LoRA adapters +vllm_endpoints: + - name: "vllm-primary" + address: "127.0.0.1" + port: 8000 + weight: 1 + +# Define base model and available LoRA adapters +model_config: + "llama2-7b": + reasoning_family: "llama2" + preferred_endpoints: ["vllm-primary"] + # IMPORTANT: Define all available LoRA adapters here + loras: + - name: "technical-lora" + description: "Optimized for programming and technical questions" + - name: "medical-lora" + description: "Specialized for medical and healthcare domain" + - name: "legal-lora" + description: "Fine-tuned for legal questions" + +# Default model for fallback +default_model: "llama2-7b" + +# Categories with LoRA routing +categories: + - name: "technical" + description: "Programming, software engineering, and technical questions" + system_prompt: "You are an expert software engineer." + model_scores: + - model: "llama2-7b" # Base model name + lora_name: "technical-lora" # LoRA adapter to use + score: 1.0 + use_reasoning: true + reasoning_effort: "medium" + + - name: "medical" + description: "Medical and healthcare questions" + system_prompt: "You are a medical expert." + model_scores: + - model: "llama2-7b" + lora_name: "medical-lora" # Different LoRA for medical + score: 1.0 + use_reasoning: true + reasoning_effort: "high" + + - name: "legal" + description: "Legal questions and law-related topics" + system_prompt: "You are a legal expert." + model_scores: + - model: "llama2-7b" + lora_name: "legal-lora" # Different LoRA for legal + score: 1.0 + use_reasoning: true + reasoning_effort: "high" + + - name: "general" + description: "General questions" + system_prompt: "You are a helpful assistant." + model_scores: + - model: "llama2-7b" # No lora_name = uses base model + score: 0.8 + use_reasoning: false +``` + +## 3. How It Works + +```mermaid +graph TB + A[User Query] --> B[Semantic Router] + B --> C[Category Classifier] + + C --> D{Classified Category} + D -->|Technical| E[technical-lora] + D -->|Medical| F[medical-lora] + D -->|Legal| G[legal-lora] + D -->|General| H[llama2-7b base] + + E --> I[vLLM Server] + F --> I + G --> I + H --> I + + I --> J[Response] +``` + +**Flow**: + +1. User sends a query to the router +2. Category classifier detects the intent (e.g., "technical") +3. Router looks up the best `ModelScore` for that category +4. If `lora_name` is specified, it becomes the final model name +5. Request is sent to vLLM with `model="technical-lora"` +6. vLLM routes to the appropriate LoRA adapter +7. Response is returned to the user + +### Test Domain Aware LoRA Routing + +Send test queries and verify they're classified correctly: + +```bash +# Technical query +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "MoM", "messages": [{"role": "user", "content": "Explain async/await in JavaScript"}]}' + +# Medical query +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "MoM", "messages": [{"role": "user", "content": "What causes high blood pressure?"}]}' +``` + +Check the router logs to confirm the correct LoRA adapter is selected for each query. + + +## Benefits + +- **Domain Expertise**: Each LoRA adapter is fine-tuned for specific domains +- **Cost Efficiency**: Share base model weights across adapters (lower memory usage) +- **Easy A/B Testing**: Compare adapter versions by adjusting scores +- **Flexible Deployment**: Add/remove adapters without restarting the router +- **Automatic Selection**: Users don't need to know which adapter to use + +## Next Steps + +- See [complete LoRA routing example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing_example.yaml) +- Learn about [category configuration](../../overview/categories/configuration.md#lora_name-optional) +- Explore [reasoning routing](./reasoning.md) to combine with LoRA adapters From fa5c846049cec12fdd9c1a75af6adcfbeff38fa7 Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 19:55:31 +0800 Subject: [PATCH 3/8] docs(tutorial): add minimal LoRA routing guide Signed-off-by: bitliu --- website/sidebars.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/website/sidebars.ts b/website/sidebars.ts index 4699f115..dfe8a8a3 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -64,6 +64,7 @@ const sidebars: SidebarsConfig = { items: [ 'tutorials/intelligent-route/overview', 'tutorials/intelligent-route/reasoning', + 'tutorials/intelligent-route/lora-routing', ], }, { From 6127faa41a3ce2d18dbd25f172d66444a876f2d9 Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 19:58:34 +0800 Subject: [PATCH 4/8] docs(sidebar): add hybrid-cache tutorial to navigation Add hybrid-cache.md to the Semantic Cache section in sidebar. Signed-off-by: bitliu --- website/sidebars.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/website/sidebars.ts b/website/sidebars.ts index dfe8a8a3..40525eab 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -74,6 +74,7 @@ const sidebars: SidebarsConfig = { 'tutorials/semantic-cache/overview', 'tutorials/semantic-cache/in-memory-cache', 'tutorials/semantic-cache/milvus-cache', + 'tutorials/semantic-cache/hybrid-cache', ], }, { From 0b043bd2f906bd889ee7f3315102de65a1c3e1a8 Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 20:00:34 +0800 Subject: [PATCH 5/8] fix(docs): correct broken link in lora-routing tutorial Fix broken link from getting-started/quickstart.md to installation/installation.md Signed-off-by: bitliu --- website/docs/tutorials/intelligent-route/lora-routing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/tutorials/intelligent-route/lora-routing.md b/website/docs/tutorials/intelligent-route/lora-routing.md index 533631fb..e1501445 100644 --- a/website/docs/tutorials/intelligent-route/lora-routing.md +++ b/website/docs/tutorials/intelligent-route/lora-routing.md @@ -11,7 +11,7 @@ This guide shows how to enable intent-aware LoRA (Low-Rank Adaptation) routing i - A running vLLM server with LoRA support enabled - LoRA adapter files (fine-tuned for specific domains) -- Envoy + the router (see [Start the router](../../getting-started/quickstart.md) section) +- Envoy + the router (see [Installation](../../installation/installation.md) guide) ## 1. Start vLLM with LoRA Adapters From 7686e5e87d97cbcabe19356db2e3c20f3d5f5e50 Mon Sep 17 00:00:00 2001 From: bitliu Date: Mon, 3 Nov 2025 20:24:00 +0800 Subject: [PATCH 6/8] lint Signed-off-by: bitliu --- website/docs/tutorials/intelligent-route/lora-routing.md | 1 - 1 file changed, 1 deletion(-) diff --git a/website/docs/tutorials/intelligent-route/lora-routing.md b/website/docs/tutorials/intelligent-route/lora-routing.md index e1501445..adb2df2a 100644 --- a/website/docs/tutorials/intelligent-route/lora-routing.md +++ b/website/docs/tutorials/intelligent-route/lora-routing.md @@ -162,7 +162,6 @@ curl -X POST http://localhost:8080/v1/chat/completions \ Check the router logs to confirm the correct LoRA adapter is selected for each query. - ## Benefits - **Domain Expertise**: Each LoRA adapter is fine-tuned for specific domains From 64286586197b157730d182d90bad29763ae76e07 Mon Sep 17 00:00:00 2001 From: Xunzhuo Date: Mon, 3 Nov 2025 22:36:53 +0800 Subject: [PATCH 7/8] uodate configuration.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Xunzhuo --- website/docs/overview/categories/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/overview/categories/configuration.md b/website/docs/overview/categories/configuration.md index 4fd3fb2f..7ee80148 100644 --- a/website/docs/overview/categories/configuration.md +++ b/website/docs/overview/categories/configuration.md @@ -277,7 +277,7 @@ categories: - **Flexible Deployment**: Add/remove adapters without router restart - **Configuration Validation**: Prevents typos and missing LoRA definitions -See [LoRA Routing Example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing_example.yaml) for complete configuration. +See [LoRA Routing Example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing.yaml) for complete configuration. ## Complete Configuration Examples From bea9fa9957236573e50f457e5512497a05a3b288 Mon Sep 17 00:00:00 2001 From: Xunzhuo Date: Mon, 3 Nov 2025 22:37:18 +0800 Subject: [PATCH 8/8] update lora-routing.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Xunzhuo --- website/docs/tutorials/intelligent-route/lora-routing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/tutorials/intelligent-route/lora-routing.md b/website/docs/tutorials/intelligent-route/lora-routing.md index adb2df2a..b5eff404 100644 --- a/website/docs/tutorials/intelligent-route/lora-routing.md +++ b/website/docs/tutorials/intelligent-route/lora-routing.md @@ -172,6 +172,6 @@ Check the router logs to confirm the correct LoRA adapter is selected for each q ## Next Steps -- See [complete LoRA routing example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing_example.yaml) +- See [complete LoRA routing example](https://github.com/vllm-project/semantic-router/blob/main/config/intelligent-routing/in-tree/lora_routing.yaml) - Learn about [category configuration](../../overview/categories/configuration.md#lora_name-optional) - Explore [reasoning routing](./reasoning.md) to combine with LoRA adapters