@@ -39,101 +39,32 @@ prompt_guard:
39
39
40
40
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
41
41
vllm_endpoints :
42
- - name : " endpoint1"
43
- address : " 127.0.0.1"
44
- port : 11434
45
- models :
46
- - " phi4"
47
- - " gemma3:27b"
48
- weight : 1 # Load balancing weight
49
- health_check_path : " /health" # Optional health check endpoint
50
- - name : " endpoint2"
51
- address : " 127.0.0.1"
52
- port : 11434
53
- models :
54
- - " mistral-small3.1"
55
- weight : 1
56
- health_check_path : " /health"
57
- - name : " endpoint3"
58
- address : " 127.0.0.1"
59
- port : 11434
60
- models :
61
- - " phi4" # Same model can be served by multiple endpoints for redundancy
62
- - " mistral-small3.1"
63
- weight : 2 # Higher weight for more powerful endpoint
64
42
- name : " qwen-endpoint"
65
43
address : " 127.0.0.1"
66
44
port : 8000
67
45
models :
68
- - " Qwen/Qwen2-0.5B-Instruct "
46
+ - " Model-A "
69
47
weight : 1
70
48
health_check_path : " /health"
71
49
- name : " tinyllama-endpoint"
72
50
address : " 127.0.0.1"
73
51
port : 8001
74
52
models :
75
- - " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
53
+ - " Model-B "
76
54
weight : 1
77
55
health_check_path : " /health"
78
56
79
57
model_config :
80
- phi4 :
81
- pricing :
82
- currency : USD
83
- prompt_per_1m : 0.07
84
- completion_per_1m : 0.35
85
- pii_policy :
86
- allow_by_default : false # Deny all PII by default
87
- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
88
- # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
89
- preferred_endpoints : ["endpoint1", "endpoint3"]
90
- # Reasoning family - phi4 doesn't support reasoning, so omit this field
91
58
92
- # Example: DeepSeek model with custom name
93
- " ds-v31-custom " :
94
- reasoning_family : " deepseek" # This model uses DeepSeek reasoning syntax
95
- preferred_endpoints : ["endpoint1"]
96
- pii_policy :
97
- allow_by_default : true
98
-
99
- # Example: Qwen3 model with custom name
100
- " my-qwen3-model " :
101
- reasoning_family : " qwen3" # This model uses Qwen3 reasoning syntax
102
- preferred_endpoints : ["endpoint2"]
103
- pii_policy :
104
- allow_by_default : true
105
-
106
- # Example: GPT-OSS model with custom name
107
- " custom-gpt-oss " :
108
- reasoning_family : " gpt-oss" # This model uses GPT-OSS reasoning syntax
109
- preferred_endpoints : ["endpoint1"]
110
- pii_policy :
111
- allow_by_default : true
112
- gemma3:27b :
113
- pricing :
114
- currency : USD
115
- prompt_per_1m : 0.067
116
- completion_per_1m : 0.267
117
- pii_policy :
118
- allow_by_default : false # Deny all PII by default
119
- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
120
- preferred_endpoints : ["endpoint1"]
121
- " mistral-small3.1 " :
122
- pricing :
123
- currency : USD
124
- prompt_per_1m : 0.1
125
- completion_per_1m : 0.3
126
- pii_policy :
127
- allow_by_default : false # Deny all PII by default
128
- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
129
- preferred_endpoints : ["endpoint2", "endpoint3"]
130
- " Qwen/Qwen2-0.5B-Instruct " :
59
+ " Model-A " :
60
+ use_reasoning : false
131
61
reasoning_family : " qwen3" # This model uses Qwen reasoning syntax
132
62
preferred_endpoints : ["qwen-endpoint"]
133
63
pii_policy :
134
64
allow_by_default : true
135
65
pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
136
- " TinyLlama/TinyLlama-1.1B-Chat-v1.0 " :
66
+ " Model-B " :
67
+ use_reasoning : false
137
68
preferred_endpoints : ["tinyllama-endpoint"]
138
69
pii_policy :
139
70
allow_by_default : true
@@ -159,148 +90,191 @@ categories:
159
90
reasoning_description : " Business content is typically conversational"
160
91
reasoning_effort : low # Business conversations need low reasoning effort
161
92
model_scores :
162
- - model : phi4
93
+ - model : " Model-A "
163
94
score : 0.8
164
- - model : gemma3:27b
95
+ use_reasoning : false
96
+ - model : " Model-B"
165
97
score : 0.4
166
- - model : mistral-small3.1
98
+ use_reasoning : false
99
+ - model : " Model-A"
167
100
score : 0.2
101
+ use_reasoning : false
168
102
- name : law
169
103
use_reasoning : false
170
104
reasoning_description : " Legal content is typically explanatory"
171
105
model_scores :
172
- - model : gemma3:27b
106
+ - model : " Model-B "
173
107
score : 0.8
174
- - model : phi4
108
+ use_reasoning : false
109
+ - model : " Model-A"
175
110
score : 0.6
176
- - model : mistral-small3.1
111
+ use_reasoning : false
112
+ - model : " Model-A"
177
113
score : 0.4
114
+ use_reasoning : false
178
115
- name : psychology
179
116
use_reasoning : false
180
117
reasoning_description : " Psychology content is usually explanatory"
181
118
model_scores :
182
- - model : mistral-small3.1
119
+ - model : " Model-A "
183
120
score : 0.6
184
- - model : gemma3:27b
121
+ use_reasoning : false
122
+ - model : " Model-B"
185
123
score : 0.4
186
- - model : phi4
124
+ use_reasoning : false
125
+ - model : " Model-A"
187
126
score : 0.4
127
+ use_reasoning : false
188
128
- name : biology
189
129
use_reasoning : true
190
130
reasoning_description : " Biological processes benefit from structured analysis"
191
131
model_scores :
192
- - model : mistral-small3.1
132
+ - model : " Model-A "
193
133
score : 0.8
194
- - model : gemma3:27b
134
+ use_reasoning : false
135
+ - model : " Model-B"
195
136
score : 0.6
196
- - model : phi4
137
+ use_reasoning : false
138
+ - model : " Model-A"
197
139
score : 0.2
140
+ use_reasoning : false
198
141
- name : chemistry
199
142
use_reasoning : true
200
143
reasoning_description : " Chemical reactions and formulas require systematic thinking"
201
144
reasoning_effort : high # Chemistry requires high reasoning effort
202
145
model_scores :
203
- - model : mistral-small3.1
146
+ - model : " Model-A "
204
147
score : 0.8
205
- - model : gemma3:27b
148
+ use_reasoning : true
149
+ - model : " Model-B"
206
150
score : 0.6
207
- - model : phi4
151
+ use_reasoning : false
152
+ - model : " Model-A"
208
153
score : 0.6
154
+ use_reasoning : false
209
155
- name : history
210
156
use_reasoning : false
211
157
reasoning_description : " Historical content is narrative-based"
212
158
model_scores :
213
- - model : mistral-small3.1
159
+ - model : " Model-A "
214
160
score : 0.8
215
- - model : phi4
161
+ use_reasoning : false
162
+ - model : " Model-A"
216
163
score : 0.6
217
- - model : gemma3:27b
164
+ use_reasoning : false
165
+ - model : " Model-B"
218
166
score : 0.4
167
+ use_reasoning : false
219
168
- name : other
220
169
use_reasoning : false
221
170
reasoning_description : " General content doesn't require reasoning"
222
171
model_scores :
223
- - model : gemma3:27b
172
+ - model : " Model-B "
224
173
score : 0.8
225
- - model : phi4
174
+ use_reasoning : false
175
+ - model : " Model-A"
226
176
score : 0.6
227
- - model : mistral-small3.1
177
+ use_reasoning : false
178
+ - model : " Model-A"
228
179
score : 0.6
180
+ use_reasoning : false
229
181
- name : health
230
182
use_reasoning : false
231
183
reasoning_description : " Health information is typically informational"
232
184
model_scores :
233
- - model : gemma3:27b
185
+ - model : " Model-B "
234
186
score : 0.8
235
- - model : phi4
187
+ use_reasoning : false
188
+ - model : " Model-A"
236
189
score : 0.8
237
- - model : mistral-small3.1
190
+ use_reasoning : false
191
+ - model : " Model-A"
238
192
score : 0.6
193
+ use_reasoning : false
239
194
- name : economics
240
195
use_reasoning : false
241
196
reasoning_description : " Economic discussions are usually explanatory"
242
197
model_scores :
243
- - model : gemma3:27b
198
+ - model : " Model-B "
244
199
score : 0.8
245
- - model : mistral-small3.1
200
+ use_reasoning : false
201
+ - model : " Model-A"
246
202
score : 0.8
247
- - model : phi4
248
- score : 0.0
203
+ use_reasoning : false
204
+ - model : " Model-A"
205
+ score : 0.1
206
+ use_reasoning : false
249
207
- name : math
250
208
use_reasoning : true
251
209
reasoning_description : " Mathematical problems require step-by-step reasoning"
252
210
reasoning_effort : high # Math problems need high reasoning effort
253
211
model_scores :
254
- - model : TinyLlama/TinyLlama-1.1B-Chat-v1.0
212
+ - model : " Model-B "
255
213
score : 1.0
256
- - model : phi4
214
+ use_reasoning : true
215
+ - model : " Model-A"
257
216
score : 0.9
258
- - model : mistral-small3.1
217
+ use_reasoning : true
218
+ - model : " Model-A"
259
219
score : 0.8
260
- - model : gemma3:27b
220
+ use_reasoning : false
221
+ - model : " Model-B"
261
222
score : 0.6
223
+ use_reasoning : false
262
224
- name : physics
263
225
use_reasoning : true
264
226
reasoning_description : " Physics concepts need logical analysis"
265
227
model_scores :
266
- - model : gemma3:27b
228
+ - model : " Model-B "
267
229
score : 0.4
268
- - model : phi4
230
+ use_reasoning : true
231
+ - model : " Model-A"
269
232
score : 0.4
270
- - model : mistral-small3.1
233
+ use_reasoning : false
234
+ - model : " Model-A"
271
235
score : 0.4
236
+ use_reasoning : false
272
237
- name : computer science
273
238
use_reasoning : true
274
239
reasoning_description : " Programming and algorithms need logical reasoning"
275
240
model_scores :
276
- - model : gemma3:27b
241
+ - model : " Model-B "
277
242
score : 0.6
278
- - model : mistral-small3.1
243
+ use_reasoning : false
244
+ - model : " Model-A"
279
245
score : 0.6
280
- - model : phi4
281
- score : 0.0
246
+ use_reasoning : false
247
+ - model : " Model-A"
248
+ score : 0.1
249
+ use_reasoning : false
282
250
- name : philosophy
283
251
use_reasoning : false
284
252
reasoning_description : " Philosophical discussions are conversational"
285
253
model_scores :
286
- - model : phi4
254
+ - model : " Model-A "
287
255
score : 0.6
288
- - model : gemma3:27b
256
+ use_reasoning : false
257
+ - model : " Model-B"
289
258
score : 0.2
290
- - model : mistral-small3.1
259
+ use_reasoning : false
260
+ - model : " Model-A"
291
261
score : 0.2
262
+ use_reasoning : false
292
263
- name : engineering
293
264
use_reasoning : true
294
265
reasoning_description : " Engineering problems require systematic problem-solving"
295
266
model_scores :
296
- - model : gemma3:27b
267
+ - model : " Model-B "
297
268
score : 0.6
298
- - model : mistral-small3.1
269
+ use_reasoning : false
270
+ - model : " Model-A"
299
271
score : 0.6
300
- - model : phi4
272
+ use_reasoning : false
273
+ - model : " Model-A"
301
274
score : 0.2
275
+ use_reasoning : false
302
276
303
- default_model : mistral-small3.1
277
+ default_model : " Model-A "
304
278
305
279
# API Configuration
306
280
api :
0 commit comments