@@ -24,10 +24,22 @@ def evaluation(ctx: click.Context) -> None:
2424 help = "Type of evaluation to create." ,
2525)
2626@click .option (
27- "--judge-model-name " ,
27+ "--judge-model" ,
2828 type = str ,
2929 required = True ,
30- help = "Name of the judge model to use for evaluation." ,
30+ help = "Name or URL of the judge model to use for evaluation." ,
31+ )
32+ @click .option (
33+ "--judge-model-source" ,
34+ type = click .Choice (["serverless" , "dedicated" , "external" ]),
35+ required = True ,
36+ help = "Source of the judge model." ,
37+ )
38+ @click .option (
39+ "--judge-external-api-token" ,
40+ type = str ,
41+ required = False ,
42+ help = "Optional external API token for the judge model." ,
3143)
3244@click .option (
3345 "--judge-system-template" ,
@@ -48,10 +60,20 @@ def evaluation(ctx: click.Context) -> None:
4860 "Can not be used when model-a-name and other model config parameters are specified" ,
4961)
5062@click .option (
51- "--model-to-evaluate-name " ,
63+ "--model-to-evaluate" ,
5264 type = str ,
5365 help = "Model name when using the detailed config" ,
5466)
67+ @click .option (
68+ "--model-to-evaluate-source" ,
69+ type = click .Choice (["serverless" , "dedicated" , "external" ]),
70+ help = "Source of the model to evaluate." ,
71+ )
72+ @click .option (
73+ "--model-to-evaluate-external-api-token" ,
74+ type = str ,
75+ help = "Optional external API token for the model to evaluate." ,
76+ )
5577@click .option (
5678 "--model-to-evaluate-max-tokens" ,
5779 type = int ,
@@ -104,9 +126,19 @@ def evaluation(ctx: click.Context) -> None:
104126 Can not be used when model-a-name and other model config parameters are specified" ,
105127)
106128@click .option (
107- "--model-a-name " ,
129+ "--model-a" ,
108130 type = str ,
109- help = "Model name for model A when using detailed config." ,
131+ help = "Model name or URL for model A when using detailed config." ,
132+ )
133+ @click .option (
134+ "--model-a-source" ,
135+ type = click .Choice (["serverless" , "dedicated" , "external" ]),
136+ help = "Source of model A." ,
137+ )
138+ @click .option (
139+ "--model-a-external-api-token" ,
140+ type = str ,
141+ help = "Optional external API token for model A." ,
110142)
111143@click .option (
112144 "--model-a-max-tokens" ,
@@ -135,9 +167,19 @@ def evaluation(ctx: click.Context) -> None:
135167 Can not be used when model-b-name and other model config parameters are specified" ,
136168)
137169@click .option (
138- "--model-b-name " ,
170+ "--model-b" ,
139171 type = str ,
140- help = "Model name for model B when using detailed config." ,
172+ help = "Model name or URL for model B when using detailed config." ,
173+ )
174+ @click .option (
175+ "--model-b-source" ,
176+ type = click .Choice (["serverless" , "dedicated" , "external" ]),
177+ help = "Source of model B." ,
178+ )
179+ @click .option (
180+ "--model-b-external-api-token" ,
181+ type = str ,
182+ help = "Optional external API token for model B." ,
141183)
142184@click .option (
143185 "--model-b-max-tokens" ,
@@ -162,11 +204,15 @@ def evaluation(ctx: click.Context) -> None:
162204def create (
163205 ctx : click .Context ,
164206 type : str ,
165- judge_model_name : str ,
207+ judge_model : str ,
208+ judge_model_source : str ,
166209 judge_system_template : str ,
210+ judge_external_api_token : Optional [str ],
167211 input_data_file_path : str ,
168212 model_field : Optional [str ],
169- model_to_evaluate_name : Optional [str ],
213+ model_to_evaluate : Optional [str ],
214+ model_to_evaluate_source : Optional [str ],
215+ model_to_evaluate_external_api_token : Optional [str ],
170216 model_to_evaluate_max_tokens : Optional [int ],
171217 model_to_evaluate_temperature : Optional [float ],
172218 model_to_evaluate_system_template : Optional [str ],
@@ -177,13 +223,17 @@ def create(
177223 max_score : Optional [float ],
178224 pass_threshold : Optional [float ],
179225 model_a_field : Optional [str ],
180- model_a_name : Optional [str ],
226+ model_a : Optional [str ],
227+ model_a_source : Optional [str ],
228+ model_a_external_api_token : Optional [str ],
181229 model_a_max_tokens : Optional [int ],
182230 model_a_temperature : Optional [float ],
183231 model_a_system_template : Optional [str ],
184232 model_a_input_template : Optional [str ],
185233 model_b_field : Optional [str ],
186- model_b_name : Optional [str ],
234+ model_b : Optional [str ],
235+ model_b_source : Optional [str ],
236+ model_b_external_api_token : Optional [str ],
187237 model_b_max_tokens : Optional [int ],
188238 model_b_temperature : Optional [float ],
189239 model_b_system_template : Optional [str ],
@@ -203,7 +253,8 @@ def create(
203253 # Check if any config parameters are provided
204254 config_params_provided = any (
205255 [
206- model_to_evaluate_name ,
256+ model_to_evaluate ,
257+ model_to_evaluate_source ,
207258 model_to_evaluate_max_tokens ,
208259 model_to_evaluate_temperature ,
209260 model_to_evaluate_system_template ,
@@ -223,17 +274,23 @@ def create(
223274 elif config_params_provided :
224275 # Config mode: config parameters are provided
225276 model_to_evaluate_final = {
226- "model_name" : model_to_evaluate_name ,
277+ "model" : model_to_evaluate ,
278+ "model_source" : model_to_evaluate_source ,
227279 "max_tokens" : model_to_evaluate_max_tokens ,
228280 "temperature" : model_to_evaluate_temperature ,
229281 "system_template" : model_to_evaluate_system_template ,
230282 "input_template" : model_to_evaluate_input_template ,
231283 }
284+ if model_to_evaluate_external_api_token :
285+ model_to_evaluate_final ["external_api_token" ] = (
286+ model_to_evaluate_external_api_token
287+ )
232288
233289 # Build model-a configuration
234290 model_a_final : Union [Dict [str , Any ], None , str ] = None
235291 model_a_config_params = [
236- model_a_name ,
292+ model_a ,
293+ model_a_source ,
237294 model_a_max_tokens ,
238295 model_a_temperature ,
239296 model_a_system_template ,
@@ -252,17 +309,21 @@ def create(
252309 elif any (model_a_config_params ):
253310 # Config mode: config parameters are provided
254311 model_a_final = {
255- "model_name" : model_a_name ,
312+ "model" : model_a ,
313+ "model_source" : model_a_source ,
256314 "max_tokens" : model_a_max_tokens ,
257315 "temperature" : model_a_temperature ,
258316 "system_template" : model_a_system_template ,
259317 "input_template" : model_a_input_template ,
260318 }
319+ if model_a_external_api_token :
320+ model_a_final ["external_api_token" ] = model_a_external_api_token
261321
262322 # Build model-b configuration
263323 model_b_final : Union [Dict [str , Any ], None , str ] = None
264324 model_b_config_params = [
265- model_b_name ,
325+ model_b ,
326+ model_b_source ,
266327 model_b_max_tokens ,
267328 model_b_temperature ,
268329 model_b_system_template ,
@@ -281,18 +342,23 @@ def create(
281342 elif any (model_b_config_params ):
282343 # Config mode: config parameters are provided
283344 model_b_final = {
284- "model_name" : model_b_name ,
345+ "model" : model_b ,
346+ "model_source" : model_b_source ,
285347 "max_tokens" : model_b_max_tokens ,
286348 "temperature" : model_b_temperature ,
287349 "system_template" : model_b_system_template ,
288350 "input_template" : model_b_input_template ,
289351 }
352+ if model_b_external_api_token :
353+ model_b_final ["external_api_token" ] = model_b_external_api_token
290354
291355 try :
292356 response = client .evaluation .create (
293357 type = type ,
294- judge_model_name = judge_model_name ,
358+ judge_model = judge_model ,
359+ judge_model_source = judge_model_source ,
295360 judge_system_template = judge_system_template ,
361+ judge_external_api_token = judge_external_api_token ,
296362 input_data_file_path = input_data_file_path ,
297363 model_to_evaluate = model_to_evaluate_final ,
298364 labels = labels_list ,
0 commit comments