Skip to content

Commit 51cefd8

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 6a4ebf4 commit 51cefd8

File tree

100 files changed

+10492
-6261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+10492
-6261
lines changed

config/intelligent-routing/in-tree/embedding.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ categories:
136136
score: 0.9
137137
use_reasoning: true
138138
jailbreak_enabled: true
139-
pii_detection_enabled: true
139+
pii_enabled: true
140140

141141
- name: product_inquiry
142142
system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
@@ -145,7 +145,7 @@ categories:
145145
score: 0.85
146146
use_reasoning: false
147147
jailbreak_enabled: true
148-
pii_detection_enabled: false
148+
pii_enabled: false
149149

150150
- name: account_management
151151
system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy."
@@ -154,7 +154,7 @@ categories:
154154
score: 0.88
155155
use_reasoning: false
156156
jailbreak_enabled: true
157-
pii_detection_enabled: true
157+
pii_enabled: true
158158

159159
- name: general_inquiry
160160
system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions."
@@ -163,7 +163,7 @@ categories:
163163
score: 0.75
164164
use_reasoning: false
165165
jailbreak_enabled: true
166-
pii_detection_enabled: false
166+
pii_enabled: false
167167

168168
# Embedding Models Configuration
169169
# These models provide intelligent embedding generation with automatic routing:
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{{- if .Values.crds.install }}
2+
---
3+
apiVersion: apiextensions.k8s.io/v1
4+
kind: CustomResourceDefinition
5+
metadata:
6+
name: intelligentpools.vllm.ai
7+
annotations:
8+
"helm.sh/resource-policy": keep
9+
spec:
10+
group: vllm.ai
11+
names:
12+
kind: IntelligentPool
13+
listKind: IntelligentPoolList
14+
plural: intelligentpools
15+
singular: intelligentpool
16+
shortNames:
17+
- ipool
18+
scope: Namespaced
19+
versions:
20+
- name: v1alpha1
21+
served: true
22+
storage: true
23+
schema:
24+
openAPIV3Schema:
25+
type: object
26+
properties:
27+
spec:
28+
type: object
29+
required:
30+
- defaultModel
31+
- models
32+
properties:
33+
defaultModel:
34+
type: string
35+
models:
36+
type: array
37+
items:
38+
type: object
39+
required:
40+
- name
41+
properties:
42+
name:
43+
type: string
44+
reasoningFamily:
45+
type: string
46+
piiPolicy:
47+
type: object
48+
properties:
49+
allowByDefault:
50+
type: boolean
51+
pricing:
52+
type: object
53+
properties:
54+
inputTokenPrice:
55+
type: number
56+
format: double
57+
outputTokenPrice:
58+
type: number
59+
format: double
60+
loras:
61+
type: array
62+
items:
63+
type: object
64+
required:
65+
- name
66+
properties:
67+
name:
68+
type: string
69+
description:
70+
type: string
71+
status:
72+
type: object
73+
properties:
74+
conditions:
75+
type: array
76+
items:
77+
type: object
78+
required:
79+
- type
80+
- status
81+
properties:
82+
type:
83+
type: string
84+
status:
85+
type: string
86+
reason:
87+
type: string
88+
message:
89+
type: string
90+
lastTransitionTime:
91+
type: string
92+
format: date-time
93+
observedGeneration:
94+
type: integer
95+
format: int64
96+
observedGeneration:
97+
type: integer
98+
format: int64
99+
modelCount:
100+
type: integer
101+
format: int32
102+
subresources:
103+
status: {}
104+
additionalPrinterColumns:
105+
- name: Default Model
106+
type: string
107+
jsonPath: .spec.defaultModel
108+
- name: Models
109+
type: integer
110+
jsonPath: .status.modelCount
111+
- name: Status
112+
type: string
113+
jsonPath: .status.conditions[?(@.type=="Ready")].status
114+
- name: Age
115+
type: date
116+
jsonPath: .metadata.creationTimestamp
117+
---
118+
apiVersion: apiextensions.k8s.io/v1
119+
kind: CustomResourceDefinition
120+
metadata:
121+
name: intelligentroutes.vllm.ai
122+
annotations:
123+
"helm.sh/resource-policy": keep
124+
spec:
125+
group: vllm.ai
126+
names:
127+
kind: IntelligentRoute
128+
listKind: IntelligentRouteList
129+
plural: intelligentroutes
130+
singular: intelligentroute
131+
shortNames:
132+
- iroute
133+
scope: Namespaced
134+
versions:
135+
- name: v1alpha1
136+
served: true
137+
storage: true
138+
schema:
139+
openAPIV3Schema:
140+
type: object
141+
# Note: Full schema is too large for this template
142+
# See deploy/kubernetes/crds/intelligentroute-crd.yaml for complete definition
143+
x-kubernetes-preserve-unknown-fields: true
144+
subresources:
145+
status: {}
146+
{{- end }}
147+

deploy/helm/semantic-router/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ global:
77
# -- Namespace for all resources (if not specified, uses Release.Namespace)
88
namespace: ""
99

10+
# CRD configuration
11+
crds:
12+
# -- Install CRDs (IntelligentPool and IntelligentRoute)
13+
install: true
14+
1015
# -- Number of replicas for the deployment
1116
replicaCount: 1
1217

0 commit comments

Comments
 (0)