diff --git a/pyproject.toml b/pyproject.toml index 145650c5..17373042 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.16" +version = "1.5.17" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index b77df655..172acdd3 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -139,7 +139,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--dpo-beta", type=float, - default=0.1, + default=None, help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')", ) @click.option( @@ -154,7 +154,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--rpo-alpha", type=float, - default=0.0, + default=None, help=( "RPO alpha parameter of DPO training to include NLL in the loss " "(only used when '--training-method' is 'dpo')" @@ -163,7 +163,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--simpo-gamma", type=float, - default=0.0, + default=None, help="SimPO gamma parameter (only used when '--training-method' is 'dpo')", ) @click.option( @@ -188,7 +188,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--train-on-inputs", type=BOOL_WITH_AUTO, - default="auto", + default=None, help="Whether to mask the user messages in conversational data or prompts in instruction data. " "`auto` will automatically determine whether to mask the inputs based on the data format.", ) @@ -229,10 +229,10 @@ def create( confirm: bool, train_on_inputs: bool | Literal["auto"], training_method: str, - dpo_beta: float, + dpo_beta: float | None, dpo_normalize_logratios_by_length: bool, - rpo_alpha: float, - simpo_gamma: float, + rpo_alpha: float | None, + simpo_gamma: float | None, from_checkpoint: str, ) -> None: """Start fine-tuning""" diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 67c74c40..27baf2d2 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -183,6 +183,24 @@ def create_finetune_request( ) train_on_inputs = "auto" + if dpo_beta is not None and training_method != "dpo": + raise ValueError("dpo_beta is only supported for DPO training") + if dpo_normalize_logratios_by_length and training_method != "dpo": + raise ValueError( + "dpo_normalize_logratios_by_length=True is only supported for DPO training" + ) + if rpo_alpha is not None: + if training_method != "dpo": + raise ValueError("rpo_alpha is only supported for DPO training") + if not rpo_alpha >= 0.0: + raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})") + + if simpo_gamma is not None: + if training_method != "dpo": + raise ValueError("simpo_gamma is only supported for DPO training") + if not simpo_gamma >= 0.0: + raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})") + lr_scheduler: FinetuneLRScheduler if lr_scheduler_type == "cosine": if scheduler_num_cycles <= 0.0: