@@ -39,101 +39,32 @@ prompt_guard:
3939
4040# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
4141vllm_endpoints :
42- - name : " endpoint1"
43- address : " 127.0.0.1"
44- port : 11434
45- models :
46- - " phi4"
47- - " gemma3:27b"
48- weight : 1 # Load balancing weight
49- health_check_path : " /health" # Optional health check endpoint
50- - name : " endpoint2"
51- address : " 127.0.0.1"
52- port : 11434
53- models :
54- - " mistral-small3.1"
55- weight : 1
56- health_check_path : " /health"
57- - name : " endpoint3"
58- address : " 127.0.0.1"
59- port : 11434
60- models :
61- - " phi4" # Same model can be served by multiple endpoints for redundancy
62- - " mistral-small3.1"
63- weight : 2 # Higher weight for more powerful endpoint
6442 - name : " qwen-endpoint"
6543 address : " 127.0.0.1"
6644 port : 8000
6745 models :
68- - " Qwen/Qwen2-0.5B-Instruct "
46+ - " Model-A "
6947 weight : 1
7048 health_check_path : " /health"
7149 - name : " tinyllama-endpoint"
7250 address : " 127.0.0.1"
7351 port : 8001
7452 models :
75- - " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
53+ - " Model-B "
7654 weight : 1
7755 health_check_path : " /health"
7856
7957model_config :
80- phi4 :
81- pricing :
82- currency : USD
83- prompt_per_1m : 0.07
84- completion_per_1m : 0.35
85- pii_policy :
86- allow_by_default : false # Deny all PII by default
87- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
88- # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
89- preferred_endpoints : ["endpoint1", "endpoint3"]
90- # Reasoning family - phi4 doesn't support reasoning, so omit this field
9158
92- # Example: DeepSeek model with custom name
93- " ds-v31-custom " :
94- reasoning_family : " deepseek" # This model uses DeepSeek reasoning syntax
95- preferred_endpoints : ["endpoint1"]
96- pii_policy :
97- allow_by_default : true
98-
99- # Example: Qwen3 model with custom name
100- " my-qwen3-model " :
101- reasoning_family : " qwen3" # This model uses Qwen3 reasoning syntax
102- preferred_endpoints : ["endpoint2"]
103- pii_policy :
104- allow_by_default : true
105-
106- # Example: GPT-OSS model with custom name
107- " custom-gpt-oss " :
108- reasoning_family : " gpt-oss" # This model uses GPT-OSS reasoning syntax
109- preferred_endpoints : ["endpoint1"]
110- pii_policy :
111- allow_by_default : true
112- gemma3:27b :
113- pricing :
114- currency : USD
115- prompt_per_1m : 0.067
116- completion_per_1m : 0.267
117- pii_policy :
118- allow_by_default : false # Deny all PII by default
119- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
120- preferred_endpoints : ["endpoint1"]
121- " mistral-small3.1 " :
122- pricing :
123- currency : USD
124- prompt_per_1m : 0.1
125- completion_per_1m : 0.3
126- pii_policy :
127- allow_by_default : false # Deny all PII by default
128- pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
129- preferred_endpoints : ["endpoint2", "endpoint3"]
130- " Qwen/Qwen2-0.5B-Instruct " :
59+ " Model-A " :
60+ use_reasoning : false
13161 reasoning_family : " qwen3" # This model uses Qwen reasoning syntax
13262 preferred_endpoints : ["qwen-endpoint"]
13363 pii_policy :
13464 allow_by_default : true
13565 pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
136- " TinyLlama/TinyLlama-1.1B-Chat-v1.0 " :
66+ " Model-B " :
67+ use_reasoning : false
13768 preferred_endpoints : ["tinyllama-endpoint"]
13869 pii_policy :
13970 allow_by_default : true
@@ -159,148 +90,191 @@ categories:
15990 reasoning_description : " Business content is typically conversational"
16091 reasoning_effort : low # Business conversations need low reasoning effort
16192 model_scores :
162- - model : phi4
93+ - model : " Model-A "
16394 score : 0.8
164- - model : gemma3:27b
95+ use_reasoning : false
96+ - model : " Model-B"
16597 score : 0.4
166- - model : mistral-small3.1
98+ use_reasoning : false
99+ - model : " Model-A"
167100 score : 0.2
101+ use_reasoning : false
168102 - name : law
169103 use_reasoning : false
170104 reasoning_description : " Legal content is typically explanatory"
171105 model_scores :
172- - model : gemma3:27b
106+ - model : " Model-B "
173107 score : 0.8
174- - model : phi4
108+ use_reasoning : false
109+ - model : " Model-A"
175110 score : 0.6
176- - model : mistral-small3.1
111+ use_reasoning : false
112+ - model : " Model-A"
177113 score : 0.4
114+ use_reasoning : false
178115 - name : psychology
179116 use_reasoning : false
180117 reasoning_description : " Psychology content is usually explanatory"
181118 model_scores :
182- - model : mistral-small3.1
119+ - model : " Model-A "
183120 score : 0.6
184- - model : gemma3:27b
121+ use_reasoning : false
122+ - model : " Model-B"
185123 score : 0.4
186- - model : phi4
124+ use_reasoning : false
125+ - model : " Model-A"
187126 score : 0.4
127+ use_reasoning : false
188128 - name : biology
189129 use_reasoning : true
190130 reasoning_description : " Biological processes benefit from structured analysis"
191131 model_scores :
192- - model : mistral-small3.1
132+ - model : " Model-A "
193133 score : 0.8
194- - model : gemma3:27b
134+ use_reasoning : false
135+ - model : " Model-B"
195136 score : 0.6
196- - model : phi4
137+ use_reasoning : false
138+ - model : " Model-A"
197139 score : 0.2
140+ use_reasoning : false
198141 - name : chemistry
199142 use_reasoning : true
200143 reasoning_description : " Chemical reactions and formulas require systematic thinking"
201144 reasoning_effort : high # Chemistry requires high reasoning effort
202145 model_scores :
203- - model : mistral-small3.1
146+ - model : " Model-A "
204147 score : 0.8
205- - model : gemma3:27b
148+ use_reasoning : true
149+ - model : " Model-B"
206150 score : 0.6
207- - model : phi4
151+ use_reasoning : false
152+ - model : " Model-A"
208153 score : 0.6
154+ use_reasoning : false
209155 - name : history
210156 use_reasoning : false
211157 reasoning_description : " Historical content is narrative-based"
212158 model_scores :
213- - model : mistral-small3.1
159+ - model : " Model-A "
214160 score : 0.8
215- - model : phi4
161+ use_reasoning : false
162+ - model : " Model-A"
216163 score : 0.6
217- - model : gemma3:27b
164+ use_reasoning : false
165+ - model : " Model-B"
218166 score : 0.4
167+ use_reasoning : false
219168 - name : other
220169 use_reasoning : false
221170 reasoning_description : " General content doesn't require reasoning"
222171 model_scores :
223- - model : gemma3:27b
172+ - model : " Model-B "
224173 score : 0.8
225- - model : phi4
174+ use_reasoning : false
175+ - model : " Model-A"
226176 score : 0.6
227- - model : mistral-small3.1
177+ use_reasoning : false
178+ - model : " Model-A"
228179 score : 0.6
180+ use_reasoning : false
229181 - name : health
230182 use_reasoning : false
231183 reasoning_description : " Health information is typically informational"
232184 model_scores :
233- - model : gemma3:27b
185+ - model : " Model-B "
234186 score : 0.8
235- - model : phi4
187+ use_reasoning : false
188+ - model : " Model-A"
236189 score : 0.8
237- - model : mistral-small3.1
190+ use_reasoning : false
191+ - model : " Model-A"
238192 score : 0.6
193+ use_reasoning : false
239194 - name : economics
240195 use_reasoning : false
241196 reasoning_description : " Economic discussions are usually explanatory"
242197 model_scores :
243- - model : gemma3:27b
198+ - model : " Model-B "
244199 score : 0.8
245- - model : mistral-small3.1
200+ use_reasoning : false
201+ - model : " Model-A"
246202 score : 0.8
247- - model : phi4
248- score : 0.0
203+ use_reasoning : false
204+ - model : " Model-A"
205+ score : 0.1
206+ use_reasoning : false
249207 - name : math
250208 use_reasoning : true
251209 reasoning_description : " Mathematical problems require step-by-step reasoning"
252210 reasoning_effort : high # Math problems need high reasoning effort
253211 model_scores :
254- - model : TinyLlama/TinyLlama-1.1B-Chat-v1.0
212+ - model : " Model-B "
255213 score : 1.0
256- - model : phi4
214+ use_reasoning : true
215+ - model : " Model-A"
257216 score : 0.9
258- - model : mistral-small3.1
217+ use_reasoning : true
218+ - model : " Model-A"
259219 score : 0.8
260- - model : gemma3:27b
220+ use_reasoning : false
221+ - model : " Model-B"
261222 score : 0.6
223+ use_reasoning : false
262224 - name : physics
263225 use_reasoning : true
264226 reasoning_description : " Physics concepts need logical analysis"
265227 model_scores :
266- - model : gemma3:27b
228+ - model : " Model-B "
267229 score : 0.4
268- - model : phi4
230+ use_reasoning : true
231+ - model : " Model-A"
269232 score : 0.4
270- - model : mistral-small3.1
233+ use_reasoning : false
234+ - model : " Model-A"
271235 score : 0.4
236+ use_reasoning : false
272237 - name : computer science
273238 use_reasoning : true
274239 reasoning_description : " Programming and algorithms need logical reasoning"
275240 model_scores :
276- - model : gemma3:27b
241+ - model : " Model-B "
277242 score : 0.6
278- - model : mistral-small3.1
243+ use_reasoning : false
244+ - model : " Model-A"
279245 score : 0.6
280- - model : phi4
281- score : 0.0
246+ use_reasoning : false
247+ - model : " Model-A"
248+ score : 0.1
249+ use_reasoning : false
282250 - name : philosophy
283251 use_reasoning : false
284252 reasoning_description : " Philosophical discussions are conversational"
285253 model_scores :
286- - model : phi4
254+ - model : " Model-A "
287255 score : 0.6
288- - model : gemma3:27b
256+ use_reasoning : false
257+ - model : " Model-B"
289258 score : 0.2
290- - model : mistral-small3.1
259+ use_reasoning : false
260+ - model : " Model-A"
291261 score : 0.2
262+ use_reasoning : false
292263 - name : engineering
293264 use_reasoning : true
294265 reasoning_description : " Engineering problems require systematic problem-solving"
295266 model_scores :
296- - model : gemma3:27b
267+ - model : " Model-B "
297268 score : 0.6
298- - model : mistral-small3.1
269+ use_reasoning : false
270+ - model : " Model-A"
299271 score : 0.6
300- - model : phi4
272+ use_reasoning : false
273+ - model : " Model-A"
301274 score : 0.2
275+ use_reasoning : false
302276
303- default_model : mistral-small3.1
277+ default_model : " Model-A "
304278
305279# API Configuration
306280api :
0 commit comments