Skip to content

Commit f8a3fa5

Browse files
committed
Update default LR scheduler to cosine with 0.5 cycles
- Change default lr_scheduler_type from 'linear' to 'cosine' in finetune.py - Update CLI default to use cosine scheduler - Maintains backward compatibility for explicit scheduler specification
1 parent 554d3e8 commit f8a3fa5

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

src/together/cli/api/finetune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def fine_tuning(ctx: click.Context) -> None:
8282
@click.option(
8383
"--lr-scheduler-type",
8484
type=click.Choice(["linear", "cosine"]),
85-
default="linear",
85+
default="cosine",
8686
help="Learning rate scheduler type",
8787
)
8888
@click.option(

src/together/resources/finetune.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def create_finetune_request(
5353
n_checkpoints: int | None = 1,
5454
batch_size: int | Literal["max"] = "max",
5555
learning_rate: float | None = 0.00001,
56-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
56+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
5757
min_lr_ratio: float = 0.0,
5858
scheduler_num_cycles: float = 0.5,
5959
warmup_ratio: float | None = None,
@@ -276,7 +276,7 @@ def create(
276276
n_checkpoints: int | None = 1,
277277
batch_size: int | Literal["max"] = "max",
278278
learning_rate: float | None = 0.00001,
279-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
279+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
280280
min_lr_ratio: float = 0.0,
281281
scheduler_num_cycles: float = 0.5,
282282
warmup_ratio: float = 0.0,
@@ -313,7 +313,7 @@ def create(
313313
batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
314314
learning_rate (float, optional): Learning rate multiplier to use for training
315315
Defaults to 0.00001.
316-
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
316+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
317317
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
318318
the learning rate scheduler. Defaults to 0.0.
319319
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
@@ -688,7 +688,7 @@ async def create(
688688
n_checkpoints: int | None = 1,
689689
batch_size: int | Literal["max"] = "max",
690690
learning_rate: float | None = 0.00001,
691-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
691+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
692692
min_lr_ratio: float = 0.0,
693693
scheduler_num_cycles: float = 0.5,
694694
warmup_ratio: float = 0.0,
@@ -725,7 +725,7 @@ async def create(
725725
batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
726726
learning_rate (float, optional): Learning rate multiplier to use for training
727727
Defaults to 0.00001.
728-
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
728+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
729729
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
730730
the learning rate scheduler. Defaults to 0.0.
731731
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.

0 commit comments

Comments
 (0)