@@ -43,27 +43,27 @@ vllm_endpoints:
4343 address : " 127.0.0.1"
4444 port : 8000
4545 models :
46- - " Qwen/Qwen2-0.5B-Instruct "
46+ - " Model-A "
4747 weight : 1
4848 health_check_path : " /health"
4949 - name : " tinyllama-endpoint"
5050 address : " 127.0.0.1"
5151 port : 8001
5252 models :
53- - " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
53+ - " Model-B "
5454 weight : 1
5555 health_check_path : " /health"
5656
5757model_config :
5858
59- " Qwen/Qwen2-0.5B-Instruct " :
59+ " Model-A " :
6060 use_reasoning : false
6161 reasoning_family : " qwen3" # This model uses Qwen reasoning syntax
6262 preferred_endpoints : ["qwen-endpoint"]
6363 pii_policy :
6464 allow_by_default : true
6565 pii_types_allowed : ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
66- " TinyLlama/TinyLlama-1.1B-Chat-v1.0 " :
66+ " Model-B " :
6767 use_reasoning : false
6868 preferred_endpoints : ["tinyllama-endpoint"]
6969 pii_policy :
@@ -90,191 +90,191 @@ categories:
9090 reasoning_description : " Business content is typically conversational"
9191 reasoning_effort : low # Business conversations need low reasoning effort
9292 model_scores :
93- - model : " Qwen/Qwen2-0.5B-Instruct "
93+ - model : " Model-A "
9494 score : 0.8
9595 use_reasoning : false
96- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
96+ - model : " Model-B "
9797 score : 0.4
9898 use_reasoning : false
99- - model : " Qwen/Qwen2-0.5B-Instruct "
99+ - model : " Model-A "
100100 score : 0.2
101101 use_reasoning : false
102102 - name : law
103103 use_reasoning : false
104104 reasoning_description : " Legal content is typically explanatory"
105105 model_scores :
106- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
106+ - model : " Model-B "
107107 score : 0.8
108108 use_reasoning : false
109- - model : " Qwen/Qwen2-0.5B-Instruct "
109+ - model : " Model-A "
110110 score : 0.6
111111 use_reasoning : false
112- - model : " Qwen/Qwen2-0.5B-Instruct "
112+ - model : " Model-A "
113113 score : 0.4
114114 use_reasoning : false
115115 - name : psychology
116116 use_reasoning : false
117117 reasoning_description : " Psychology content is usually explanatory"
118118 model_scores :
119- - model : " Qwen/Qwen2-0.5B-Instruct "
119+ - model : " Model-A "
120120 score : 0.6
121121 use_reasoning : false
122- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
122+ - model : " Model-B "
123123 score : 0.4
124124 use_reasoning : false
125- - model : " Qwen/Qwen2-0.5B-Instruct "
125+ - model : " Model-A "
126126 score : 0.4
127127 use_reasoning : false
128128 - name : biology
129129 use_reasoning : true
130130 reasoning_description : " Biological processes benefit from structured analysis"
131131 model_scores :
132- - model : " Qwen/Qwen2-0.5B-Instruct "
132+ - model : " Model-A "
133133 score : 0.8
134134 use_reasoning : false
135- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
135+ - model : " Model-B "
136136 score : 0.6
137137 use_reasoning : false
138- - model : " Qwen/Qwen2-0.5B-Instruct "
138+ - model : " Model-A "
139139 score : 0.2
140140 use_reasoning : false
141141 - name : chemistry
142142 use_reasoning : true
143143 reasoning_description : " Chemical reactions and formulas require systematic thinking"
144144 reasoning_effort : high # Chemistry requires high reasoning effort
145145 model_scores :
146- - model : " Qwen/Qwen2-0.5B-Instruct "
146+ - model : " Model-A "
147147 score : 0.8
148148 use_reasoning : true
149- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
149+ - model : " Model-B "
150150 score : 0.6
151151 use_reasoning : false
152- - model : " Qwen/Qwen2-0.5B-Instruct "
152+ - model : " Model-A "
153153 score : 0.6
154154 use_reasoning : false
155155 - name : history
156156 use_reasoning : false
157157 reasoning_description : " Historical content is narrative-based"
158158 model_scores :
159- - model : " Qwen/Qwen2-0.5B-Instruct "
159+ - model : " Model-A "
160160 score : 0.8
161161 use_reasoning : false
162- - model : " Qwen/Qwen2-0.5B-Instruct "
162+ - model : " Model-A "
163163 score : 0.6
164164 use_reasoning : false
165- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
165+ - model : " Model-B "
166166 score : 0.4
167167 use_reasoning : false
168168 - name : other
169169 use_reasoning : false
170170 reasoning_description : " General content doesn't require reasoning"
171171 model_scores :
172- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
172+ - model : " Model-B "
173173 score : 0.8
174174 use_reasoning : false
175- - model : " Qwen/Qwen2-0.5B-Instruct "
175+ - model : " Model-A "
176176 score : 0.6
177177 use_reasoning : false
178- - model : " Qwen/Qwen2-0.5B-Instruct "
178+ - model : " Model-A "
179179 score : 0.6
180180 use_reasoning : false
181181 - name : health
182182 use_reasoning : false
183183 reasoning_description : " Health information is typically informational"
184184 model_scores :
185- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
185+ - model : " Model-B "
186186 score : 0.8
187187 use_reasoning : false
188- - model : " Qwen/Qwen2-0.5B-Instruct "
188+ - model : " Model-A "
189189 score : 0.8
190190 use_reasoning : false
191- - model : " Qwen/Qwen2-0.5B-Instruct "
191+ - model : " Model-A "
192192 score : 0.6
193193 use_reasoning : false
194194 - name : economics
195195 use_reasoning : false
196196 reasoning_description : " Economic discussions are usually explanatory"
197197 model_scores :
198- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
198+ - model : " Model-B "
199199 score : 0.8
200200 use_reasoning : false
201- - model : " Qwen/Qwen2-0.5B-Instruct "
201+ - model : " Model-A "
202202 score : 0.8
203203 use_reasoning : false
204- - model : " Qwen/Qwen2-0.5B-Instruct "
204+ - model : " Model-A "
205205 score : 0.1
206206 use_reasoning : false
207207 - name : math
208208 use_reasoning : true
209209 reasoning_description : " Mathematical problems require step-by-step reasoning"
210210 reasoning_effort : high # Math problems need high reasoning effort
211211 model_scores :
212- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
212+ - model : " Model-B "
213213 score : 1.0
214214 use_reasoning : true
215- - model : " Qwen/Qwen2-0.5B-Instruct "
215+ - model : " Model-A "
216216 score : 0.9
217217 use_reasoning : true
218- - model : " Qwen/Qwen2-0.5B-Instruct "
218+ - model : " Model-A "
219219 score : 0.8
220220 use_reasoning : false
221- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
221+ - model : " Model-B "
222222 score : 0.6
223223 use_reasoning : false
224224 - name : physics
225225 use_reasoning : true
226226 reasoning_description : " Physics concepts need logical analysis"
227227 model_scores :
228- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
228+ - model : " Model-B "
229229 score : 0.4
230230 use_reasoning : true
231- - model : " Qwen/Qwen2-0.5B-Instruct "
231+ - model : " Model-A "
232232 score : 0.4
233233 use_reasoning : false
234- - model : " Qwen/Qwen2-0.5B-Instruct "
234+ - model : " Model-A "
235235 score : 0.4
236236 use_reasoning : false
237237 - name : computer science
238238 use_reasoning : true
239239 reasoning_description : " Programming and algorithms need logical reasoning"
240240 model_scores :
241- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
241+ - model : " Model-B "
242242 score : 0.6
243243 use_reasoning : false
244- - model : " Qwen/Qwen2-0.5B-Instruct "
244+ - model : " Model-A "
245245 score : 0.6
246246 use_reasoning : false
247- - model : " Qwen/Qwen2-0.5B-Instruct "
247+ - model : " Model-A "
248248 score : 0.1
249249 use_reasoning : false
250250 - name : philosophy
251251 use_reasoning : false
252252 reasoning_description : " Philosophical discussions are conversational"
253253 model_scores :
254- - model : " Qwen/Qwen2-0.5B-Instruct "
254+ - model : " Model-A "
255255 score : 0.6
256256 use_reasoning : false
257- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
257+ - model : " Model-B "
258258 score : 0.2
259259 use_reasoning : false
260- - model : " Qwen/Qwen2-0.5B-Instruct "
260+ - model : " Model-A "
261261 score : 0.2
262262 use_reasoning : false
263263 - name : engineering
264264 use_reasoning : true
265265 reasoning_description : " Engineering problems require systematic problem-solving"
266266 model_scores :
267- - model : " TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
267+ - model : " Model-B "
268268 score : 0.6
269269 use_reasoning : false
270- - model : " Qwen/Qwen2-0.5B-Instruct "
270+ - model : " Model-A "
271271 score : 0.6
272272 use_reasoning : false
273- - model : " Qwen/Qwen2-0.5B-Instruct "
273+ - model : " Model-A "
274274 score : 0.2
275275 use_reasoning : false
276276
277- default_model : " Qwen/Qwen2-0.5B-Instruct "
277+ default_model : " Model-A "
278278
279279# API Configuration
280280api :
0 commit comments