diff --git a/pyproject.toml b/pyproject.toml index b0d42ad6..91d7b417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.4" +version = "1.5.5" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 6dce5db7..e8f75b5c 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -22,10 +22,10 @@ TogetherRequest, TrainingType, FinetuneLRScheduler, - FinetuneLinearLRScheduler, - FinetuneCosineLRScheduler, - FinetuneLinearLRSchedulerArgs, - FinetuneCosineLRSchedulerArgs, + LinearLRScheduler, + CosineLRScheduler, + LinearLRSchedulerArgs, + CosineLRSchedulerArgs, TrainingMethodDPO, TrainingMethodSFT, FinetuneCheckpoint, @@ -50,7 +50,7 @@ } -def createFinetuneRequest( +def create_finetune_request( model_limits: FinetuneTrainingLimits, training_file: str, model: str | None = None, @@ -152,21 +152,19 @@ def createFinetuneRequest( f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}" ) - # Default to generic lr scheduler - lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear") - + lr_scheduler: FinetuneLRScheduler if lr_scheduler_type == "cosine": if scheduler_num_cycles <= 0.0: raise ValueError("Number of cycles should be greater than 0") - lrScheduler = FinetuneCosineLRScheduler( - lr_scheduler_args=FinetuneCosineLRSchedulerArgs( + lr_scheduler = CosineLRScheduler( + lr_scheduler_args=CosineLRSchedulerArgs( min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles ), ) else: - lrScheduler = FinetuneLinearLRScheduler( - lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio), + lr_scheduler = LinearLRScheduler( + lr_scheduler_args=LinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio), ) training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT() @@ -182,7 +180,7 @@ def createFinetuneRequest( n_checkpoints=n_checkpoints, batch_size=batch_size, learning_rate=learning_rate, - lr_scheduler=lrScheduler, + lr_scheduler=lr_scheduler, warmup_ratio=warmup_ratio, max_grad_norm=max_grad_norm, weight_decay=weight_decay, @@ -374,7 +372,7 @@ def create( pass model_limits = self.get_model_limits(model=model_name) - finetune_request = createFinetuneRequest( + finetune_request = create_finetune_request( model_limits=model_limits, training_file=training_file, model=model, @@ -762,7 +760,7 @@ async def create( pass model_limits = await self.get_model_limits(model=model_name) - finetune_request = createFinetuneRequest( + finetune_request = create_finetune_request( model_limits=model_limits, training_file=training_file, model=model, diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index 53e1858e..fddb3636 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -34,11 +34,11 @@ TrainingMethodDPO, TrainingMethodSFT, FinetuneCheckpoint, - FinetuneCosineLRScheduler, - FinetuneCosineLRSchedulerArgs, + CosineLRScheduler, + CosineLRSchedulerArgs, FinetuneDownloadResult, - FinetuneLinearLRScheduler, - FinetuneLinearLRSchedulerArgs, + LinearLRScheduler, + LinearLRSchedulerArgs, FinetuneLRScheduler, FinetuneList, FinetuneListEvents, @@ -72,10 +72,10 @@ "FinetuneListEvents", "FinetuneDownloadResult", "FinetuneLRScheduler", - "FinetuneLinearLRScheduler", - "FinetuneLinearLRSchedulerArgs", - "FinetuneCosineLRScheduler", - "FinetuneCosineLRSchedulerArgs", + "LinearLRScheduler", + "LinearLRSchedulerArgs", + "CosineLRScheduler", + "CosineLRSchedulerArgs", "FileRequest", "FileResponse", "FileList", diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index a315203c..6325ce59 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -1,9 +1,9 @@ from __future__ import annotations from enum import Enum -from typing import List, Literal, Union +from typing import List, Literal -from pydantic import StrictBool, Field, validator, field_validator, ValidationInfo +from pydantic import StrictBool, Field, field_validator from together.types.abstract import BaseModel from together.types.common import ( @@ -176,7 +176,7 @@ class FinetuneRequest(BaseModel): # training learning rate learning_rate: float # learning rate scheduler type and args - lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None + lr_scheduler: LinearLRScheduler | CosineLRScheduler | None = None # learning rate warmup ratio warmup_ratio: float # max gradient norm @@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel): # training learning rate learning_rate: float | None = None # learning rate scheduler type and args - lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None + lr_scheduler: LinearLRScheduler | CosineLRScheduler | EmptyLRScheduler | None = None # learning rate warmup ratio warmup_ratio: float | None = None # max gradient norm @@ -345,11 +345,11 @@ class FinetuneTrainingLimits(BaseModel): lora_training: FinetuneLoraTrainingLimits | None = None -class FinetuneLinearLRSchedulerArgs(BaseModel): +class LinearLRSchedulerArgs(BaseModel): min_lr_ratio: float | None = 0.0 -class FinetuneCosineLRSchedulerArgs(BaseModel): +class CosineLRSchedulerArgs(BaseModel): min_lr_ratio: float | None = 0.0 num_cycles: float | None = 0.5 @@ -358,14 +358,20 @@ class FinetuneLRScheduler(BaseModel): lr_scheduler_type: str -class FinetuneLinearLRScheduler(FinetuneLRScheduler): +class LinearLRScheduler(FinetuneLRScheduler): lr_scheduler_type: Literal["linear"] = "linear" - lr_scheduler: FinetuneLinearLRSchedulerArgs | None = None + lr_scheduler_args: LinearLRSchedulerArgs | None = None -class FinetuneCosineLRScheduler(FinetuneLRScheduler): +class CosineLRScheduler(FinetuneLRScheduler): lr_scheduler_type: Literal["cosine"] = "cosine" - lr_scheduler: FinetuneCosineLRSchedulerArgs | None = None + lr_scheduler_args: CosineLRSchedulerArgs | None = None + + +# placeholder for old fine-tuning jobs with no lr_scheduler_type specified +class EmptyLRScheduler(FinetuneLRScheduler): + lr_scheduler_type: Literal[""] + lr_scheduler_args: None = None class FinetuneCheckpoint(BaseModel): diff --git a/tests/unit/test_finetune_resources.py b/tests/unit/test_finetune_resources.py index b5c0bb4a..8db880df 100644 --- a/tests/unit/test_finetune_resources.py +++ b/tests/unit/test_finetune_resources.py @@ -1,6 +1,6 @@ import pytest -from together.resources.finetune import createFinetuneRequest +from together.resources.finetune import create_finetune_request from together.types.finetune import ( FinetuneTrainingLimits, FinetuneFullTrainingLimits, @@ -30,7 +30,7 @@ def test_simple_request(): - request = createFinetuneRequest( + request = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -46,7 +46,7 @@ def test_simple_request(): def test_validation_file(): - request = createFinetuneRequest( + request = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -61,14 +61,14 @@ def test_no_training_file(): with pytest.raises( TypeError, match="missing 1 required positional argument: 'training_file'" ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, ) def test_lora_request(): - request = createFinetuneRequest( + request = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -84,7 +84,7 @@ def test_lora_request(): def test_from_checkpoint_request(): - request = createFinetuneRequest( + request = create_finetune_request( model_limits=_MODEL_LIMITS, training_file=_TRAINING_FILE, from_checkpoint=_FROM_CHECKPOINT, @@ -99,7 +99,7 @@ def test_both_from_checkpoint_model_name(): ValueError, match="You must specify either a model or a checkpoint to start a job from, not both", ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -111,7 +111,7 @@ def test_no_from_checkpoint_no_model_name(): with pytest.raises( ValueError, match="You must specify either a model or a checkpoint" ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, training_file=_TRAINING_FILE, ) @@ -122,7 +122,7 @@ def test_batch_size_limit(): ValueError, match="Requested batch size is higher that the maximum allowed value", ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -132,7 +132,7 @@ def test_batch_size_limit(): with pytest.raises( ValueError, match="Requested batch size is lower that the minimum allowed value" ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -143,7 +143,7 @@ def test_batch_size_limit(): ValueError, match="Requested batch size is higher that the maximum allowed value", ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -154,7 +154,7 @@ def test_batch_size_limit(): with pytest.raises( ValueError, match="Requested batch size is lower that the minimum allowed value" ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -167,7 +167,7 @@ def test_non_lora_model(): with pytest.raises( ValueError, match="LoRA adapters are not supported for the selected model." ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=FinetuneTrainingLimits( max_num_epochs=20, max_learning_rate=1.0, @@ -188,7 +188,7 @@ def test_non_full_model(): with pytest.raises( ValueError, match="Full training is not supported for the selected model." ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=FinetuneTrainingLimits( max_num_epochs=20, max_learning_rate=1.0, @@ -210,7 +210,7 @@ def test_non_full_model(): @pytest.mark.parametrize("warmup_ratio", [-1.0, 2.0]) def test_bad_warmup(warmup_ratio): with pytest.raises(ValueError, match="Warmup ratio should be between 0 and 1"): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -223,7 +223,7 @@ def test_bad_min_lr_ratio(min_lr_ratio): with pytest.raises( ValueError, match="Min learning rate ratio should be between 0 and 1" ): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -234,7 +234,7 @@ def test_bad_min_lr_ratio(min_lr_ratio): @pytest.mark.parametrize("max_grad_norm", [-1.0, -0.01]) def test_bad_max_grad_norm(max_grad_norm): with pytest.raises(ValueError, match="Max gradient norm should be non-negative"): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -245,7 +245,7 @@ def test_bad_max_grad_norm(max_grad_norm): @pytest.mark.parametrize("weight_decay", [-1.0, -0.01]) def test_bad_weight_decay(weight_decay): with pytest.raises(ValueError, match="Weight decay should be non-negative"): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE, @@ -255,7 +255,7 @@ def test_bad_weight_decay(weight_decay): def test_bad_training_method(): with pytest.raises(ValueError, match="training_method must be one of .*"): - _ = createFinetuneRequest( + _ = create_finetune_request( model_limits=_MODEL_LIMITS, model=_MODEL_NAME, training_file=_TRAINING_FILE,