Skip to content

Commit 8745e78

Browse files
VProvkirahsapong
andauthored
Update evaluation API for a wider model support (#360)
* Update evaluation API for a wider model support * Add more tests, fix required params in compare * Update src/together/cli/api/evaluation.py Co-authored-by: kirahsapong <[email protected]> * Update src/together/cli/api/evaluation.py Co-authored-by: kirahsapong <[email protected]> * Update src/together/resources/evaluation.py Co-authored-by: kirahsapong <[email protected]> * Update src/together/resources/evaluation.py Co-authored-by: kirahsapong <[email protected]> * Update src/together/cli/api/evaluation.py Co-authored-by: kirahsapong <[email protected]> * Small changes * Version update to 1.5.26 --------- Co-authored-by: kirahsapong <[email protected]>
1 parent f537b08 commit 8745e78

File tree

5 files changed

+861
-49
lines changed

5 files changed

+861
-49
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
1212

1313
[tool.poetry]
1414
name = "together"
15-
version = "1.5.25"
15+
version = "1.5.26"
1616
authors = ["Together AI <[email protected]>"]
1717
description = "Python client for Together's Cloud Platform!"
1818
readme = "README.md"

src/together/cli/api/evaluation.py

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,22 @@ def evaluation(ctx: click.Context) -> None:
2424
help="Type of evaluation to create.",
2525
)
2626
@click.option(
27-
"--judge-model-name",
27+
"--judge-model",
2828
type=str,
2929
required=True,
30-
help="Name of the judge model to use for evaluation.",
30+
help="Name or URL of the judge model to use for evaluation.",
31+
)
32+
@click.option(
33+
"--judge-model-source",
34+
type=click.Choice(["serverless", "dedicated", "external"]),
35+
required=True,
36+
help="Source of the judge model.",
37+
)
38+
@click.option(
39+
"--judge-external-api-token",
40+
type=str,
41+
required=False,
42+
help="Optional external API token for the judge model.",
3143
)
3244
@click.option(
3345
"--judge-system-template",
@@ -48,10 +60,20 @@ def evaluation(ctx: click.Context) -> None:
4860
"Can not be used when model-a-name and other model config parameters are specified",
4961
)
5062
@click.option(
51-
"--model-to-evaluate-name",
63+
"--model-to-evaluate",
5264
type=str,
5365
help="Model name when using the detailed config",
5466
)
67+
@click.option(
68+
"--model-to-evaluate-source",
69+
type=click.Choice(["serverless", "dedicated", "external"]),
70+
help="Source of the model to evaluate.",
71+
)
72+
@click.option(
73+
"--model-to-evaluate-external-api-token",
74+
type=str,
75+
help="Optional external API token for the model to evaluate.",
76+
)
5577
@click.option(
5678
"--model-to-evaluate-max-tokens",
5779
type=int,
@@ -104,9 +126,19 @@ def evaluation(ctx: click.Context) -> None:
104126
Can not be used when model-a-name and other model config parameters are specified",
105127
)
106128
@click.option(
107-
"--model-a-name",
129+
"--model-a",
108130
type=str,
109-
help="Model name for model A when using detailed config.",
131+
help="Model name or URL for model A when using detailed config.",
132+
)
133+
@click.option(
134+
"--model-a-source",
135+
type=click.Choice(["serverless", "dedicated", "external"]),
136+
help="Source of model A.",
137+
)
138+
@click.option(
139+
"--model-a-external-api-token",
140+
type=str,
141+
help="Optional external API token for model A.",
110142
)
111143
@click.option(
112144
"--model-a-max-tokens",
@@ -135,9 +167,19 @@ def evaluation(ctx: click.Context) -> None:
135167
Can not be used when model-b-name and other model config parameters are specified",
136168
)
137169
@click.option(
138-
"--model-b-name",
170+
"--model-b",
139171
type=str,
140-
help="Model name for model B when using detailed config.",
172+
help="Model name or URL for model B when using detailed config.",
173+
)
174+
@click.option(
175+
"--model-b-source",
176+
type=click.Choice(["serverless", "dedicated", "external"]),
177+
help="Source of model B.",
178+
)
179+
@click.option(
180+
"--model-b-external-api-token",
181+
type=str,
182+
help="Optional external API token for model B.",
141183
)
142184
@click.option(
143185
"--model-b-max-tokens",
@@ -162,11 +204,15 @@ def evaluation(ctx: click.Context) -> None:
162204
def create(
163205
ctx: click.Context,
164206
type: str,
165-
judge_model_name: str,
207+
judge_model: str,
208+
judge_model_source: str,
166209
judge_system_template: str,
210+
judge_external_api_token: Optional[str],
167211
input_data_file_path: str,
168212
model_field: Optional[str],
169-
model_to_evaluate_name: Optional[str],
213+
model_to_evaluate: Optional[str],
214+
model_to_evaluate_source: Optional[str],
215+
model_to_evaluate_external_api_token: Optional[str],
170216
model_to_evaluate_max_tokens: Optional[int],
171217
model_to_evaluate_temperature: Optional[float],
172218
model_to_evaluate_system_template: Optional[str],
@@ -177,13 +223,17 @@ def create(
177223
max_score: Optional[float],
178224
pass_threshold: Optional[float],
179225
model_a_field: Optional[str],
180-
model_a_name: Optional[str],
226+
model_a: Optional[str],
227+
model_a_source: Optional[str],
228+
model_a_external_api_token: Optional[str],
181229
model_a_max_tokens: Optional[int],
182230
model_a_temperature: Optional[float],
183231
model_a_system_template: Optional[str],
184232
model_a_input_template: Optional[str],
185233
model_b_field: Optional[str],
186-
model_b_name: Optional[str],
234+
model_b: Optional[str],
235+
model_b_source: Optional[str],
236+
model_b_external_api_token: Optional[str],
187237
model_b_max_tokens: Optional[int],
188238
model_b_temperature: Optional[float],
189239
model_b_system_template: Optional[str],
@@ -203,7 +253,8 @@ def create(
203253
# Check if any config parameters are provided
204254
config_params_provided = any(
205255
[
206-
model_to_evaluate_name,
256+
model_to_evaluate,
257+
model_to_evaluate_source,
207258
model_to_evaluate_max_tokens,
208259
model_to_evaluate_temperature,
209260
model_to_evaluate_system_template,
@@ -223,17 +274,23 @@ def create(
223274
elif config_params_provided:
224275
# Config mode: config parameters are provided
225276
model_to_evaluate_final = {
226-
"model_name": model_to_evaluate_name,
277+
"model": model_to_evaluate,
278+
"model_source": model_to_evaluate_source,
227279
"max_tokens": model_to_evaluate_max_tokens,
228280
"temperature": model_to_evaluate_temperature,
229281
"system_template": model_to_evaluate_system_template,
230282
"input_template": model_to_evaluate_input_template,
231283
}
284+
if model_to_evaluate_external_api_token:
285+
model_to_evaluate_final["external_api_token"] = (
286+
model_to_evaluate_external_api_token
287+
)
232288

233289
# Build model-a configuration
234290
model_a_final: Union[Dict[str, Any], None, str] = None
235291
model_a_config_params = [
236-
model_a_name,
292+
model_a,
293+
model_a_source,
237294
model_a_max_tokens,
238295
model_a_temperature,
239296
model_a_system_template,
@@ -252,17 +309,21 @@ def create(
252309
elif any(model_a_config_params):
253310
# Config mode: config parameters are provided
254311
model_a_final = {
255-
"model_name": model_a_name,
312+
"model": model_a,
313+
"model_source": model_a_source,
256314
"max_tokens": model_a_max_tokens,
257315
"temperature": model_a_temperature,
258316
"system_template": model_a_system_template,
259317
"input_template": model_a_input_template,
260318
}
319+
if model_a_external_api_token:
320+
model_a_final["external_api_token"] = model_a_external_api_token
261321

262322
# Build model-b configuration
263323
model_b_final: Union[Dict[str, Any], None, str] = None
264324
model_b_config_params = [
265-
model_b_name,
325+
model_b,
326+
model_b_source,
266327
model_b_max_tokens,
267328
model_b_temperature,
268329
model_b_system_template,
@@ -281,18 +342,23 @@ def create(
281342
elif any(model_b_config_params):
282343
# Config mode: config parameters are provided
283344
model_b_final = {
284-
"model_name": model_b_name,
345+
"model": model_b,
346+
"model_source": model_b_source,
285347
"max_tokens": model_b_max_tokens,
286348
"temperature": model_b_temperature,
287349
"system_template": model_b_system_template,
288350
"input_template": model_b_input_template,
289351
}
352+
if model_b_external_api_token:
353+
model_b_final["external_api_token"] = model_b_external_api_token
290354

291355
try:
292356
response = client.evaluation.create(
293357
type=type,
294-
judge_model_name=judge_model_name,
358+
judge_model=judge_model,
359+
judge_model_source=judge_model_source,
295360
judge_system_template=judge_system_template,
361+
judge_external_api_token=judge_external_api_token,
296362
input_data_file_path=input_data_file_path,
297363
model_to_evaluate=model_to_evaluate_final,
298364
labels=labels_list,

0 commit comments

Comments
 (0)