From 938c6cd6ce99b30b15682b273ce93a2ccf5cca9b Mon Sep 17 00:00:00 2001 From: Soroush Bassam Date: Mon, 9 Jun 2025 11:13:35 -0400 Subject: [PATCH 1/2] Update default LR scheduler to cosine with 0.5 cycles - Change default lr_scheduler_type from 'linear' to 'cosine' in finetune.py - Update CLI default to use cosine scheduler - Maintains backward compatibility for explicit scheduler specification --- src/together/cli/api/finetune.py | 2 +- src/together/resources/finetune.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 4d57b732..9aa581a8 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -82,7 +82,7 @@ def fine_tuning(ctx: click.Context) -> None: @click.option( "--lr-scheduler-type", type=click.Choice(["linear", "cosine"]), - default="linear", + default="cosine", help="Learning rate scheduler type", ) @click.option( diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 4e05a772..1c55412a 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -53,7 +53,7 @@ def create_finetune_request( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float | None = None, @@ -281,7 +281,7 @@ def create( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float = 0.0, @@ -318,7 +318,7 @@ def create( batch_size (int or "max"): Batch size for fine-tuning. Defaults to max. learning_rate (float, optional): Learning rate multiplier to use for training Defaults to 0.00001. - lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear". + lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine". min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for the learning rate scheduler. Defaults to 0.0. scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5. @@ -693,7 +693,7 @@ async def create( n_checkpoints: int | None = 1, batch_size: int | Literal["max"] = "max", learning_rate: float | None = 0.00001, - lr_scheduler_type: Literal["linear", "cosine"] = "linear", + lr_scheduler_type: Literal["linear", "cosine"] = "cosine", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, warmup_ratio: float = 0.0, @@ -730,7 +730,7 @@ async def create( batch_size (int, optional): Batch size for fine-tuning. Defaults to max. learning_rate (float, optional): Learning rate multiplier to use for training Defaults to 0.00001. - lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear". + lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine". min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for the learning rate scheduler. Defaults to 0.0. scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5. From 5194fd44fa2df4c28bded2b72121c648e20ba88a Mon Sep 17 00:00:00 2001 From: Soroush Bassam Date: Mon, 9 Jun 2025 13:33:07 -0400 Subject: [PATCH 2/2] Bump version to 1.5.12 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 98b8964e..154cf1cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.11" +version = "1.5.12" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md"