Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ optional = true
pytest = ">=7.4.2,<9.0.0"
pytest-watch = "^4.2.0"
pytest-mock = "^3.14.0"
pytest-asyncio = "^0.24.0"
tox = "^4.14.1"

[tool.poetry.group.examples]
Expand Down
34 changes: 34 additions & 0 deletions src/together/cli/api/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def evaluation(ctx: click.Context) -> None:
required=False,
help="Optional external API token for the judge model.",
)
@click.option(
"--judge-external-base-url",
type=str,
required=False,
help="Optional external base URLs for the judge model.",
)
@click.option(
"--judge-system-template",
type=str,
Expand Down Expand Up @@ -74,6 +80,11 @@ def evaluation(ctx: click.Context) -> None:
type=str,
help="Optional external API token for the model to evaluate.",
)
@click.option(
"--model-to-evaluate-external-base-url",
type=str,
help="Optional external base URL for the model to evaluate.",
)
@click.option(
"--model-to-evaluate-max-tokens",
type=int,
Expand Down Expand Up @@ -140,6 +151,11 @@ def evaluation(ctx: click.Context) -> None:
type=str,
help="Optional external API token for model A.",
)
@click.option(
"--model-a-external-base-url",
type=str,
help="Optional external base URL for model A.",
)
@click.option(
"--model-a-max-tokens",
type=int,
Expand Down Expand Up @@ -181,6 +197,11 @@ def evaluation(ctx: click.Context) -> None:
type=str,
help="Optional external API token for model B.",
)
@click.option(
"--model-b-external-base-url",
type=str,
help="Optional external base URL for model B.",
)
@click.option(
"--model-b-max-tokens",
type=int,
Expand Down Expand Up @@ -208,11 +229,13 @@ def create(
judge_model_source: str,
judge_system_template: str,
judge_external_api_token: Optional[str],
judge_external_base_url: Optional[str],
input_data_file_path: str,
model_field: Optional[str],
model_to_evaluate: Optional[str],
model_to_evaluate_source: Optional[str],
model_to_evaluate_external_api_token: Optional[str],
model_to_evaluate_external_base_url: Optional[str],
model_to_evaluate_max_tokens: Optional[int],
model_to_evaluate_temperature: Optional[float],
model_to_evaluate_system_template: Optional[str],
Expand All @@ -226,6 +249,7 @@ def create(
model_a: Optional[str],
model_a_source: Optional[str],
model_a_external_api_token: Optional[str],
model_a_external_base_url: Optional[str],
model_a_max_tokens: Optional[int],
model_a_temperature: Optional[float],
model_a_system_template: Optional[str],
Expand All @@ -234,6 +258,7 @@ def create(
model_b: Optional[str],
model_b_source: Optional[str],
model_b_external_api_token: Optional[str],
model_b_external_base_url: Optional[str],
model_b_max_tokens: Optional[int],
model_b_temperature: Optional[float],
model_b_system_template: Optional[str],
Expand Down Expand Up @@ -285,6 +310,10 @@ def create(
model_to_evaluate_final["external_api_token"] = (
model_to_evaluate_external_api_token
)
if model_to_evaluate_external_base_url:
model_to_evaluate_final["external_base_url"] = (
model_to_evaluate_external_base_url
)

# Build model-a configuration
model_a_final: Union[Dict[str, Any], None, str] = None
Expand Down Expand Up @@ -318,6 +347,8 @@ def create(
}
if model_a_external_api_token:
model_a_final["external_api_token"] = model_a_external_api_token
if model_a_external_base_url:
model_a_final["external_base_url"] = model_a_external_base_url

# Build model-b configuration
model_b_final: Union[Dict[str, Any], None, str] = None
Expand Down Expand Up @@ -351,6 +382,8 @@ def create(
}
if model_b_external_api_token:
model_b_final["external_api_token"] = model_b_external_api_token
if model_b_external_base_url:
model_b_final["external_base_url"] = model_b_external_base_url

try:
response = client.evaluation.create(
Expand All @@ -359,6 +392,7 @@ def create(
judge_model_source=judge_model_source,
judge_system_template=judge_system_template,
judge_external_api_token=judge_external_api_token,
judge_external_base_url=judge_external_base_url,
input_data_file_path=input_data_file_path,
model_to_evaluate=model_to_evaluate_final,
labels=labels_list,
Expand Down
6 changes: 6 additions & 0 deletions src/together/resources/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def create(
judge_system_template: str,
input_data_file_path: str,
judge_external_api_token: Optional[str] = None,
judge_external_base_url: Optional[str] = None,
# Classify-specific parameters
labels: Optional[List[str]] = None,
pass_labels: Optional[List[str]] = None,
Expand All @@ -55,6 +56,7 @@ def create(
judge_system_template: System template for the judge
input_data_file_path: Path to input data file
judge_external_api_token: Optional external API token for the judge model
judge_external_base_url: Optional external base URLs for the judge model
labels: List of classification labels (required for classify)
pass_labels: List of labels considered as passing (required for classify)
min_score: Minimum score value (required for score)
Expand Down Expand Up @@ -82,6 +84,7 @@ def create(
model_source=judge_model_source,
system_template=judge_system_template,
external_api_token=judge_external_api_token,
external_base_url=judge_external_base_url,
)
parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
# Build parameters based on type
Expand Down Expand Up @@ -423,6 +426,7 @@ async def create(
judge_system_template: str,
input_data_file_path: str,
judge_external_api_token: Optional[str] = None,
judge_external_base_url: Optional[str] = None,
# Classify-specific parameters
labels: Optional[List[str]] = None,
pass_labels: Optional[List[str]] = None,
Expand All @@ -446,6 +450,7 @@ async def create(
judge_system_template: System template for the judge
input_data_file_path: Path to input data file
judge_external_api_token: Optional external API token for the judge model
judge_external_base_url: Optional external base URLs for the judge model
labels: List of classification labels (required for classify)
pass_labels: List of labels considered as passing (required for classify)
min_score: Minimum score value (required for score)
Expand Down Expand Up @@ -473,6 +478,7 @@ async def create(
model_source=judge_model_source,
system_template=judge_system_template,
external_api_token=judge_external_api_token,
external_base_url=judge_external_base_url,
)
parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
# Build parameters based on type
Expand Down
2 changes: 2 additions & 0 deletions src/together/types/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class JudgeModelConfig(BaseModel):
model_source: Literal["serverless", "dedicated", "external"]
system_template: str
external_api_token: Optional[str] = None
external_base_url: Optional[str] = None


class ModelRequest(BaseModel):
Expand All @@ -37,6 +38,7 @@ class ModelRequest(BaseModel):
system_template: str
input_template: str
external_api_token: Optional[str] = None
external_base_url: Optional[str] = None


class ClassifyParameters(BaseModel):
Expand Down
Loading