vllm-project
diff --git a/‎config/intelligent-routing/in-tree/embedding.yaml‎
Lines changed: 4 additions & 4 deletions b/‎config/intelligent-routing/in-tree/embedding.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎deploy/helm/semantic-router/templates/crds.yaml‎
Lines changed: 147 additions & 0 deletions b/‎deploy/helm/semantic-router/templates/crds.yaml‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎deploy/helm/semantic-router/values.yaml‎
Lines changed: 5 additions & 0 deletions b/‎deploy/helm/semantic-router/values.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎deploy/kubernetes/ai-gateway/semantic-router/config.yaml‎
Lines changed: 75 additions & 8 deletions b/‎deploy/kubernetes/ai-gateway/semantic-router/config.yaml‎
Lines changed: 75 additions & 8 deletions
diff --git a/‎deploy/kubernetes/crds/examples/intelligentpool-example.yaml‎
Lines changed: 39 additions & 0 deletions b/‎deploy/kubernetes/crds/examples/intelligentpool-example.yaml‎
Lines changed: 39 additions & 0 deletions
@@ -136,7 +136,7 @@ categories:
         score: 0.9
         use_reasoning: true
     jailbreak_enabled: true
-    pii_detection_enabled: true
+    pii_enabled: true
 
   - name: product_inquiry
     system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
@@ -145,7 +145,7 @@ categories:
         score: 0.85
         use_reasoning: false
     jailbreak_enabled: true
-    pii_detection_enabled: false
+    pii_enabled: false
 
   - name: account_management
     system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy."
@@ -154,7 +154,7 @@ categories:
         score: 0.88
         use_reasoning: false
     jailbreak_enabled: true
-    pii_detection_enabled: true
+    pii_enabled: true
 
   - name: general_inquiry
     system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions."
@@ -163,7 +163,7 @@ categories:
         score: 0.75
         use_reasoning: false
     jailbreak_enabled: true
-    pii_detection_enabled: false
+    pii_enabled: false
 
 # Embedding Models Configuration
 # These models provide intelligent embedding generation with automatic routing:
 
@@ -0,0 +1,147 @@
+{{- if .Values.crds.install }}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: intelligentpools.vllm.ai
+  annotations:
+    "helm.sh/resource-policy": keep
+spec:
+  group: vllm.ai
+  names:
+    kind: IntelligentPool
+    listKind: IntelligentPoolList
+    plural: intelligentpools
+    singular: intelligentpool
+    shortNames:
+      - ipool
+  scope: Namespaced
+  versions:
+    - name: v1alpha1
+      served: true
+      storage: true
+      schema:
+        openAPIV3Schema:
+          type: object
+          properties:
+            spec:
+              type: object
+              required:
+                - defaultModel
+                - models
+              properties:
+                defaultModel:
+                  type: string
+                models:
+                  type: array
+                  items:
+                    type: object
+                    required:
+                      - name
+                    properties:
+                      name:
+                        type: string
+                      reasoningFamily:
+                        type: string
+                      piiPolicy:
+                        type: object
+                        properties:
+                          allowByDefault:
+                            type: boolean
+                      pricing:
+                        type: object
+                        properties:
+                          inputTokenPrice:
+                            type: number
+                            format: double
+                          outputTokenPrice:
+                            type: number
+                            format: double
+                      loras:
+                        type: array
+                        items:
+                          type: object
+                          required:
+                            - name
+                          properties:
+                            name:
+                              type: string
+                            description:
+                              type: string
+            status:
+              type: object
+              properties:
+                conditions:
+                  type: array
+                  items:
+                    type: object
+                    required:
+                      - type
+                      - status
+                    properties:
+                      type:
+                        type: string
+                      status:
+                        type: string
+                      reason:
+                        type: string
+                      message:
+                        type: string
+                      lastTransitionTime:
+                        type: string
+                        format: date-time
+                      observedGeneration:
+                        type: integer
+                        format: int64
+                observedGeneration:
+                  type: integer
+                  format: int64
+                modelCount:
+                  type: integer
+                  format: int32
+      subresources:
+        status: {}
+      additionalPrinterColumns:
+        - name: Default Model
+          type: string
+          jsonPath: .spec.defaultModel
+        - name: Models
+          type: integer
+          jsonPath: .status.modelCount
+        - name: Status
+          type: string
+          jsonPath: .status.conditions[?(@.type=="Ready")].status
+        - name: Age
+          type: date
+          jsonPath: .metadata.creationTimestamp
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: intelligentroutes.vllm.ai
+  annotations:
+    "helm.sh/resource-policy": keep
+spec:
+  group: vllm.ai
+  names:
+    kind: IntelligentRoute
+    listKind: IntelligentRouteList
+    plural: intelligentroutes
+    singular: intelligentroute
+    shortNames:
+      - iroute
+  scope: Namespaced
+  versions:
+    - name: v1alpha1
+      served: true
+      storage: true
+      schema:
+        openAPIV3Schema:
+          type: object
+          # Note: Full schema is too large for this template
+          # See deploy/kubernetes/crds/intelligentroute-crd.yaml for complete definition
+          x-kubernetes-preserve-unknown-fields: true
+      subresources:
+        status: {}
+{{- end }}
+
@@ -7,6 +7,11 @@ global:
   # -- Namespace for all resources (if not specified, uses Release.Namespace)
   namespace: ""
 
+# CRD configuration
+crds:
+  # -- Install CRDs (IntelligentPool and IntelligentRoute)
+  install: true
+
 # -- Number of replicas for the deployment
 replicaCount: 1
 
 
@@ -20,6 +20,8 @@ model_config:
       - name: "general-expert"
         description: "General-purpose adapter for diverse topics"
 
+default_model: general-expert
+
 # Categories with LoRA routing
 # Each category uses the base-model model with a specific LoRA adapter
 categories:
@@ -129,14 +131,85 @@ categories:
         lora_name: science-expert
         score: 0.7
         use_reasoning: false
-  - name: thinking
+  - name: urgent request
     system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
     model_scores:
       - model: general-expert
         score: 0.7
         use_reasoning: true
+  # Embedding-based categories
+  - name: technical_support
+    system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps."
+    model_scores:
+      - model: general-expert
+        score: 0.9
+        use_reasoning: true
+    jailbreak_enabled: true
+    pii_enabled: true
+  - name: business
+    system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
+    model_scores:
+      - model: general-expert
+        score: 0.85
+        use_reasoning: false
+    jailbreak_enabled: true
+    pii_enabled: true
 
-default_model: general-expert
+keyword_rules:
+  # Keyword Rule 1: Emergency/Urgent Requests
+  # Use case: Fast routing for time-sensitive queries that need immediate attention
+  # Examples: "URGENT: server down", "EMERGENCY: data loss", "CRITICAL: security breach"
+  - category: "urgent request"
+    operator: "OR"
+    keywords: ["urgent", "emergency", "critical", "asap", "immediately", "help!", "sos"]
+    case_sensitive: false
+
+  # Keyword Rule 2: Programming Language Detection
+  # Use case: Route code-related queries to appropriate handlers based on language
+  # Examples: "python error", "java exception", "golang panic", "rust compiler error"
+  - category: "computer science"
+    operator: "OR"
+    keywords: ["python", "java", "golang", "rust", "javascript", "typescript", "c++", "ruby", "php"]
+    case_sensitive: false
+
+# Embedding-based classification rules
+# These rules use semantic similarity between query text and keywords
+embedding_rules:
+  # Embedding Rule 1: Customer Complaint/Feedback Detection
+  # Use case: Identify negative sentiment and complaints regardless of exact wording
+  # Examples: "I'm disappointed with the service", "This product doesn't work as expected",
+  #           "Not satisfied with my purchase", "The quality is poor"
+  - category: "technical_support"
+    threshold: 0.72
+    keywords:
+      - "I'm not satisfied with the product quality"
+      - "The service didn't meet my expectations"
+      - "I'm experiencing issues and need help"
+      - "Something is broken and not working properly"
+      - "I'm disappointed with the performance"
+      - "This is not what I expected when I ordered"
+    aggregation_method: "max"  # Use max to catch any strong complaint signal
+    model: "auto"
+    dimension: 768
+    quality_priority: 0.8  # High quality needed for sentiment detection
+    latency_priority: 0.2
+
+  # Embedding Rule 2: Account/Billing Related Queries
+  # Use case: Route financial and account queries even with varied phrasing
+  # Examples: "How much do I owe?", "Check my balance", "Update payment method",
+  #           "Why was I charged twice?", "Cancel my subscription"
+  - category: "business"
+    threshold: 0.68
+    keywords:
+      - "I need to check my account balance and payment history"
+      - "How can I update my billing information and payment method"
+      - "I was charged incorrectly and need a refund"
+      - "I want to cancel my subscription and stop recurring payments"
+      - "What are the fees and charges on my account"
+      - "I need to review my invoice and transaction details"
+    aggregation_method: "avg"  # Use avg for balanced matching across billing topics
+    model: "qwen3"  # Use high-quality model for financial queries
+    dimension: 1024
 
 bert_model:
   model_id: models/all-MiniLM-L12-v2
@@ -195,12 +268,6 @@ classifier:
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
 
-keyword_rules:
-  - category: "thinking"
-    operator: "OR"
-    keywords: ["urgent", "immediate", "asap", "think", "careful"]
-    case_sensitive: false
-
 
 # Router Configuration for Dual-Path Selection
 router:
 
@@ -0,0 +1,39 @@
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+  name: default-pool
+  namespace: default
+spec:
+  defaultModel: "general-expert"
+  models:
+    - name: "base-model"
+      reasoningFamily: "qwen3"
+      piiPolicy:
+        allowByDefault: false
+      pricing:
+        inputTokenPrice: 0.000001  # $0.001 per 1M tokens
+        outputTokenPrice: 0.000002  # $0.002 per 1M tokens
+      loras:
+        - name: "math-expert"
+          description: "Math specialist LoRA adapter"
+        - name: "code-expert"
+          description: "Code specialist LoRA adapter"
+    
+    - name: "general-expert"
+      reasoningFamily: "deepseek"
+      piiPolicy:
+        allowByDefault: true
+      pricing:
+        inputTokenPrice: 0.000002
+        outputTokenPrice: 0.000004
+      loras:
+        - name: "general-assistant"
+          description: "General purpose assistant"
+    
+    - name: "fast-model"
+      piiPolicy:
+        allowByDefault: true
+      pricing:
+        inputTokenPrice: 0.0000005
+        outputTokenPrice: 0.000001
+