Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "together"
version = "1.5.16"
version = "1.5.17"
authors = ["Together AI <[email protected]>"]
description = "Python client for Together's Cloud Platform!"
readme = "README.md"
Expand Down
12 changes: 6 additions & 6 deletions src/together/cli/api/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def fine_tuning(ctx: click.Context) -> None:
@click.option(
"--dpo-beta",
type=float,
default=0.1,
default=None,
help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
)
@click.option(
Expand All @@ -154,7 +154,7 @@ def fine_tuning(ctx: click.Context) -> None:
@click.option(
"--rpo-alpha",
type=float,
default=0.0,
default=None,
help=(
"RPO alpha parameter of DPO training to include NLL in the loss "
"(only used when '--training-method' is 'dpo')"
Expand All @@ -163,7 +163,7 @@ def fine_tuning(ctx: click.Context) -> None:
@click.option(
"--simpo-gamma",
type=float,
default=0.0,
default=None,
help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
)
@click.option(
Expand Down Expand Up @@ -229,10 +229,10 @@ def create(
confirm: bool,
train_on_inputs: bool | Literal["auto"],
training_method: str,
dpo_beta: float,
dpo_beta: float | None,
dpo_normalize_logratios_by_length: bool,
rpo_alpha: float,
simpo_gamma: float,
rpo_alpha: float | None,
simpo_gamma: float | None,
from_checkpoint: str,
) -> None:
"""Start fine-tuning"""
Expand Down
18 changes: 18 additions & 0 deletions src/together/resources/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,24 @@ def create_finetune_request(
)
train_on_inputs = "auto"

if dpo_beta is not None and training_method != "dpo":
raise ValueError("dpo_beta is only supported for DPO training")
if dpo_normalize_logratios_by_length and training_method != "dpo":
raise ValueError(
"dpo_normalize_logratios_by_length=True is only supported for DPO training"
)
if rpo_alpha is not None:
if training_method != "dpo":
raise ValueError("rpo_alpha is only supported for DPO training")
if not rpo_alpha >= 0.0:
raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")

if simpo_gamma is not None:
if training_method != "dpo":
raise ValueError("simpo_gamma is only supported for DPO training")
if not simpo_gamma >= 0.0:
raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")

lr_scheduler: FinetuneLRScheduler
if lr_scheduler_type == "cosine":
if scheduler_num_cycles <= 0.0:
Expand Down