diff --git a/pyproject.toml b/pyproject.toml index 98b8964e..154cf1cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.11" +version = "1.5.12" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 4d57b732..9aa581a8 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -82,7 +82,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--lr-scheduler-type", type=click.Choice(["linear", "cosine"]), - default="linear", + default="cosine", help="Learning rate scheduler type", ) @click.option( diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 4e05a772..1c55412a 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -53,7 +53,7 @@ def create_finetune_request( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float | None = None, @@ -281,7 +281,7 @@ def create( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float = 0.0, @@ -318,7 +318,7 @@ def create( batch_size (int or "max"): Batch size for fine-tuning. Defaults to max. learning_rate (float, optional): Learning rate multiplier to use for training Defaults to 0.00001. - lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear". + lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine". min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for the learning rate scheduler. Defaults to 0.0. scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5. @@ -693,7 +693,7 @@ async def create( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float = 0.0, @@ -730,7 +730,7 @@ async def create( batch_size (int, optional): Batch size for fine-tuning. Defaults to max. learning_rate (float, optional): Learning rate multiplier to use for training Defaults to 0.00001. - lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear". + lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine". min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for the learning rate scheduler. Defaults to 0.0. scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.