Skip to content

Commit 25f435e

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent ef0178c commit 25f435e

25 files changed

+744
-618
lines changed
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
---
2+
apiVersion: apiextensions.k8s.io/v1
3+
kind: CustomResourceDefinition
4+
metadata:
5+
annotations:
6+
controller-gen.kubebuilder.io/version: v0.19.0
7+
name: intelligentpools.vllm.ai
8+
spec:
9+
group: vllm.ai
10+
names:
11+
kind: IntelligentPool
12+
listKind: IntelligentPoolList
13+
plural: intelligentpools
14+
shortNames:
15+
- ipool
16+
singular: intelligentpool
17+
scope: Namespaced
18+
versions:
19+
- additionalPrinterColumns:
20+
- description: Default model name
21+
jsonPath: .spec.defaultModel
22+
name: Default Model
23+
type: string
24+
- description: Number of models
25+
jsonPath: .status.modelCount
26+
name: Models
27+
type: integer
28+
- description: Ready status
29+
jsonPath: .status.conditions[?(@.type=='Ready')].status
30+
name: Status
31+
type: string
32+
- jsonPath: .metadata.creationTimestamp
33+
name: Age
34+
type: date
35+
name: v1alpha1
36+
schema:
37+
openAPIV3Schema:
38+
description: IntelligentPool defines a pool of models with their configurations
39+
properties:
40+
apiVersion:
41+
description: |-
42+
APIVersion defines the versioned schema of this representation of an object.
43+
Servers should convert recognized schemas to the latest internal value, and
44+
may reject unrecognized values.
45+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
46+
type: string
47+
kind:
48+
description: |-
49+
Kind is a string value representing the REST resource this object represents.
50+
Servers may infer this from the endpoint the client submits requests to.
51+
Cannot be updated.
52+
In CamelCase.
53+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
54+
type: string
55+
metadata:
56+
type: object
57+
spec:
58+
description: IntelligentPoolSpec defines the desired state of IntelligentPool
59+
properties:
60+
defaultModel:
61+
description: DefaultModel specifies the default model to use when
62+
no specific model is selected
63+
maxLength: 100
64+
minLength: 1
65+
type: string
66+
models:
67+
description: Models defines the list of available models in this pool
68+
items:
69+
description: ModelConfig defines the configuration for a single
70+
model
71+
properties:
72+
loras:
73+
description: LoRAs defines the list of LoRA adapters available
74+
for this model
75+
items:
76+
description: LoRAConfig defines a LoRA adapter configuration
77+
properties:
78+
description:
79+
description: Description provides a human-readable description
80+
of this LoRA adapter
81+
maxLength: 500
82+
type: string
83+
name:
84+
description: Name is the unique identifier for this LoRA
85+
adapter
86+
maxLength: 100
87+
minLength: 1
88+
type: string
89+
required:
90+
- name
91+
type: object
92+
maxItems: 50
93+
type: array
94+
name:
95+
description: Name is the unique identifier for this model
96+
maxLength: 100
97+
minLength: 1
98+
type: string
99+
pricing:
100+
description: Pricing defines the cost structure for this model
101+
properties:
102+
inputTokenPrice:
103+
description: InputTokenPrice is the cost per input token
104+
minimum: 0
105+
type: number
106+
outputTokenPrice:
107+
description: OutputTokenPrice is the cost per output token
108+
minimum: 0
109+
type: number
110+
type: object
111+
reasoningFamily:
112+
description: |-
113+
ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
114+
Must be defined in the global static configuration's ReasoningFamilies
115+
maxLength: 50
116+
type: string
117+
required:
118+
- name
119+
type: object
120+
maxItems: 100
121+
minItems: 1
122+
type: array
123+
required:
124+
- defaultModel
125+
- models
126+
type: object
127+
status:
128+
description: IntelligentPoolStatus defines the observed state of IntelligentPool
129+
properties:
130+
conditions:
131+
description: Conditions represent the latest available observations
132+
of the IntelligentPool's state
133+
items:
134+
description: Condition contains details for one aspect of the current
135+
state of this API Resource.
136+
properties:
137+
lastTransitionTime:
138+
description: |-
139+
lastTransitionTime is the last time the condition transitioned from one status to another.
140+
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
141+
format: date-time
142+
type: string
143+
message:
144+
description: |-
145+
message is a human readable message indicating details about the transition.
146+
This may be an empty string.
147+
maxLength: 32768
148+
type: string
149+
observedGeneration:
150+
description: |-
151+
observedGeneration represents the .metadata.generation that the condition was set based upon.
152+
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
153+
with respect to the current state of the instance.
154+
format: int64
155+
minimum: 0
156+
type: integer
157+
reason:
158+
description: |-
159+
reason contains a programmatic identifier indicating the reason for the condition's last transition.
160+
Producers of specific condition types may define expected values and meanings for this field,
161+
and whether the values are considered a guaranteed API.
162+
The value should be a CamelCase string.
163+
This field may not be empty.
164+
maxLength: 1024
165+
minLength: 1
166+
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
167+
type: string
168+
status:
169+
description: status of the condition, one of True, False, Unknown.
170+
enum:
171+
- "True"
172+
- "False"
173+
- Unknown
174+
type: string
175+
type:
176+
description: type of condition in CamelCase or in foo.example.com/CamelCase.
177+
maxLength: 316
178+
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
179+
type: string
180+
required:
181+
- lastTransitionTime
182+
- message
183+
- reason
184+
- status
185+
- type
186+
type: object
187+
type: array
188+
modelCount:
189+
description: ModelCount indicates the number of models in the pool
190+
format: int32
191+
type: integer
192+
observedGeneration:
193+
description: ObservedGeneration reflects the generation of the most
194+
recently observed IntelligentPool
195+
format: int64
196+
type: integer
197+
type: object
198+
type: object
199+
served: true
200+
storage: true
201+
subresources:
202+
status: {}

0 commit comments

Comments
 (0)