togethercomputer · sbassam · Jun 9, 2025 · Jun 9, 2025 · Jun 9, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.11"
+version = "1.5.12"
 authors = ["Together AI <[email protected]>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"

diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
@@ -82,7 +82,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--lr-scheduler-type",
     type=click.Choice(["linear", "cosine"]),
-    default="linear",
+    default="cosine",
     help="Learning rate scheduler type",
 )
 @click.option(

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -53,7 +53,7 @@ def create_finetune_request(
     n_checkpoints: int | None = 1,
     batch_size: int | Literal["max"] = "max",
     learning_rate: float | None = 0.00001,
-    lr_scheduler_type: Literal["linear", "cosine"] = "linear",
+    lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
     min_lr_ratio: float = 0.0,
     scheduler_num_cycles: float = 0.5,
     warmup_ratio: float | None = None,
@@ -281,7 +281,7 @@ def create(
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        lr_scheduler_type: Literal["linear", "cosine"] = "linear",
+        lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
         min_lr_ratio: float = 0.0,
         scheduler_num_cycles: float = 0.5,
         warmup_ratio: float = 0.0,
@@ -318,7 +318,7 @@ def create(
             batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
             learning_rate (float, optional): Learning rate multiplier to use for training
                 Defaults to 0.00001.
-            lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
+            lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
             min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
                 the learning rate scheduler. Defaults to 0.0.
             scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
@@ -693,7 +693,7 @@ async def create(
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        lr_scheduler_type: Literal["linear", "cosine"] = "linear",
+        lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
         min_lr_ratio: float = 0.0,
         scheduler_num_cycles: float = 0.5,
         warmup_ratio: float = 0.0,
@@ -730,7 +730,7 @@ async def create(
             batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
             learning_rate (float, optional): Learning rate multiplier to use for training
                 Defaults to 0.00001.
-            lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
+            lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
             min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
                 the learning rate scheduler. Defaults to 0.0.
             scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.