vllm-project
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎config/config.yaml‎
Lines changed: 14 additions & 0 deletions b/‎config/config.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎config/examples/system_prompt_example.yaml‎
Lines changed: 112 additions & 0 deletions b/‎config/examples/system_prompt_example.yaml‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 2 additions & 0 deletions b/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/semantic-router/pkg/extproc/request_handler.go‎
Lines changed: 70 additions & 0 deletions b/‎src/semantic-router/pkg/extproc/request_handler.go‎
Lines changed: 70 additions & 0 deletions
@@ -44,6 +44,10 @@ Benchmarking will be conducted to determine the best implementation.
 
 Select the tools to use based on the prompt, avoiding the use of tools that are not relevant to the prompt so as to reduce the number of prompt tokens and improve tool selection accuracy by the LLM.
 
+#### Category-Specific System Prompts
+
+Automatically inject specialized system prompts based on query classification, ensuring optimal model behavior for different domains (math, coding, business, etc.) without manual prompt engineering.
+
 ### Enterprise Security 🔒
 
 #### PII detection
 
@@ -63,71 +63,85 @@ classifier:
 # Categories with new use_reasoning field structure
 categories:
   - name: business
+    system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
         use_reasoning: false  # Business performs better without reasoning
   - name: law
+    system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.4
         use_reasoning: false
   - name: psychology
+    system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.6
         use_reasoning: false
   - name: biology
+    system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.9
         use_reasoning: false
   - name: chemistry
+    system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.6
         use_reasoning: true  # Enable reasoning for complex chemistry
   - name: history
+    system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
         use_reasoning: false
   - name: other
+    system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
         use_reasoning: false
   - name: health
+    system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.5
         use_reasoning: false
   - name: economics
+    system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 1.0
         use_reasoning: false
   - name: math
+    system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 1.0
         use_reasoning: true  # Enable reasoning for complex math
   - name: physics
+    system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
         use_reasoning: true  # Enable reasoning for physics
   - name: computer science
+    system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.6
         use_reasoning: false
   - name: philosophy
+    system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.5
         use_reasoning: false
   - name: engineering
+    system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
 
@@ -0,0 +1,112 @@
+# System Prompt Configuration Example
+# This example demonstrates how to configure category-specific system prompts
+# that will be automatically injected into requests based on query classification
+
+# Basic configuration
+classifier:
+  category_model:
+    model_id: "sentence-transformers/all-MiniLM-L6-v2"
+    threshold: 0.7
+    use_cpu: false
+    use_modernbert: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+
+# Categories with system prompts for different domains
+categories:
+  - name: math
+    description: "Mathematical queries, calculations, and problem solving"
+    system_prompt: "You are a mathematics expert. Always provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way. When solving equations, break down each step and explain the reasoning behind it."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.9
+        use_reasoning: true
+
+  - name: computer science
+    description: "Programming, algorithms, software engineering, and technical topics"
+    system_prompt: "You are a computer science expert with deep knowledge of algorithms, data structures, programming languages, and software engineering best practices. Provide clear, practical solutions with well-commented code examples when helpful. Always consider performance, readability, and maintainability."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.8
+        use_reasoning: true
+
+  - name: creative writing
+    description: "Creative writing, storytelling, poetry, and literary analysis"
+    system_prompt: "You are a creative writing expert with a passion for storytelling, poetry, and literature. Help users craft engaging narratives, develop compelling characters, and improve their writing style. Provide constructive feedback and creative suggestions."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
+
+  - name: business
+    description: "Business strategy, management, finance, and professional advice"
+    system_prompt: "You are a professional business consultant with expertise in strategy, operations, management, and finance. Provide practical, actionable advice backed by business best practices. Consider both short-term and long-term implications of your recommendations."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.8
+        use_reasoning: false
+
+  - name: science
+    description: "General science questions, research, and scientific concepts"
+    system_prompt: "You are a scientist with broad knowledge across multiple scientific disciplines. Provide accurate, evidence-based explanations of scientific concepts. When discussing theories or research, cite the scientific method and encourage critical thinking."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.8
+        use_reasoning: true
+
+  - name: health
+    description: "Health, wellness, medical information, and fitness"
+    system_prompt: "You are a knowledgeable health and wellness expert. Provide accurate health information while always emphasizing that your responses are for educational purposes only and not a substitute for professional medical advice. Encourage users to consult healthcare professionals for medical concerns."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
+
+  - name: education
+    description: "Teaching, learning, educational methods, and academic topics"
+    system_prompt: "You are an experienced educator with expertise in pedagogy and learning theory. Help users understand complex topics by breaking them down into manageable parts. Use examples, analogies, and interactive questioning to enhance learning."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.8
+        use_reasoning: false
+
+  - name: other
+    description: "General queries that don't fit into specific categories"
+    system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics. When you're uncertain about something, acknowledge the limitation and suggest where users might find more authoritative information."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.6
+        use_reasoning: false
+
+# Default model for fallback
+default_model: openai/gpt-oss-20b
+
+# Model configuration
+model_config:
+  "openai/gpt-oss-20b":
+    reasoning_family: "gpt-oss"
+    preferred_endpoints: ["mock"]
+    pii_policy:
+      allow_by_default: true
+
+# Reasoning family configurations
+reasoning_families:
+  gpt-oss:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+
+# Global default reasoning effort level
+default_reasoning_effort: medium
+
+# vLLM endpoints configuration
+vllm_endpoints:
+  - name: "mock"
+    address: "http://127.0.0.1:8000"
+    models:
+      - "openai/gpt-oss-20b"
+
+# Usage Notes:
+# 1. System prompts are automatically injected based on query classification
+# 2. If a request already has a system message, it will be replaced with the category-specific one
+# 3. If no system_prompt is configured for a category, no system message is added
+# 4. System prompts work with both "auto" model selection and specific model requests
+# 5. The system prompt is added before reasoning mode processing
@@ -273,6 +273,8 @@ type Category struct {
 	// used by the classifier model. When provided, classifier outputs will be translated
 	// from these MMLU categories to this generic category name.
 	MMLUCategories []string `yaml:"mmlu_categories,omitempty"`
+	// SystemPrompt is an optional category-specific system prompt automatically injected into requests
+	SystemPrompt string `yaml:"system_prompt,omitempty"`
 }
 
 // Legacy types - can be removed once migration is complete
 
@@ -32,6 +32,62 @@ func serializeOpenAIRequest(req *openai.ChatCompletionNewParams) ([]byte, error)
 	return json.Marshal(req)
 }
 
+// addSystemPromptToRequestBody adds a system prompt to the beginning of the messages array in the JSON request body
+func addSystemPromptToRequestBody(requestBody []byte, systemPrompt string) ([]byte, error) {
+	if systemPrompt == "" {
+		return requestBody, nil
+	}
+
+	// Parse the JSON request body
+	var requestMap map[string]interface{}
+	if err := json.Unmarshal(requestBody, &requestMap); err != nil {
+		return nil, err
+	}
+
+	// Get the messages array
+	messagesInterface, ok := requestMap["messages"]
+	if !ok {
+		return requestBody, nil // No messages array, return original
+	}
+
+	messages, ok := messagesInterface.([]interface{})
+	if !ok {
+		return requestBody, nil // Messages is not an array, return original
+	}
+
+	// Create a new system message
+	systemMessage := map[string]interface{}{
+		"role":    "system",
+		"content": systemPrompt,
+	}
+
+	// Check if there's already a system message at the beginning
+	hasSystemMessage := false
+	if len(messages) > 0 {
+		if firstMsg, ok := messages[0].(map[string]interface{}); ok {
+			if role, ok := firstMsg["role"].(string); ok && role == "system" {
+				hasSystemMessage = true
+			}
+		}
+	}
+
+	if hasSystemMessage {
+		// Replace the existing system message
+		messages[0] = systemMessage
+		observability.Infof("Replaced existing system message with category-specific system prompt")
+	} else {
+		// Prepend the system message to the beginning of the messages array
+		messages = append([]interface{}{systemMessage}, messages...)
+		observability.Infof("Added category-specific system prompt to the beginning of messages")
+	}
+
+	// Update the messages in the request map
+	requestMap["messages"] = messages
+
+	// Marshal back to JSON
+	return json.Marshal(requestMap)
+}
+
 // extractUserAndNonUserContent extracts content from request messages
 func extractUserAndNonUserContent(req *openai.ChatCompletionNewParams) (string, []string) {
 	var userContent string
@@ -416,6 +472,20 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 					return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)
 				}
 
+				// Add category-specific system prompt if configured
+				if categoryName != "" {
+					category := r.Classifier.GetCategoryByName(categoryName)
+					if category != nil && category.SystemPrompt != "" {
+						modifiedBody, err = addSystemPromptToRequestBody(modifiedBody, category.SystemPrompt)
+						if err != nil {
+							observability.Errorf("Error adding system prompt to request: %v", err)
+							metrics.RecordRequestError(actualModel, "serialization_error")
+							return nil, status.Errorf(codes.Internal, "error adding system prompt: %v", err)
+						}
+						observability.Infof("Added category-specific system prompt for category: %s", categoryName)
+					}
+				}
+
 				// Create body mutation with the modified body
 				bodyMutation := &ext_proc.BodyMutation{
 					Mutation: &ext_proc.BodyMutation_Body{
Original file line number	Diff line number	Diff line change
`@@ -273,6 +273,8 @@ type Category struct {`
`273`	`273`	`// used by the classifier model. When provided, classifier outputs will be translated`
`274`	`274`	`// from these MMLU categories to this generic category name.`
`275`	`275`	MMLUCategories []string `yaml:"mmlu_categories,omitempty"`
	`276`	`+ // SystemPrompt is an optional category-specific system prompt automatically injected into requests`
	`277`	+ SystemPrompt string `yaml:"system_prompt,omitempty"`
`276`	`278`	`}`
`277`	`279`
`278`	`280`	`// Legacy types - can be removed once migration is complete`