diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index e8f75b5c..e7a189c6 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -2,7 +2,7 @@ import re from pathlib import Path -from typing import Literal, List +from typing import List, Literal from rich import print as rprint @@ -10,37 +10,38 @@ from together.filemanager import DownloadManager from together.together_response import TogetherResponse from together.types import ( + CosineLRScheduler, + CosineLRSchedulerArgs, + FinetuneCheckpoint, FinetuneDownloadResult, FinetuneList, FinetuneListEvents, + FinetuneLRScheduler, FinetuneRequest, FinetuneResponse, FinetuneTrainingLimits, FullTrainingType, + LinearLRScheduler, + LinearLRSchedulerArgs, LoRATrainingType, TogetherClient, TogetherRequest, - TrainingType, - FinetuneLRScheduler, - LinearLRScheduler, - CosineLRScheduler, - LinearLRSchedulerArgs, - CosineLRSchedulerArgs, TrainingMethodDPO, TrainingMethodSFT, - FinetuneCheckpoint, + TrainingType, ) from together.types.finetune import ( DownloadCheckpointType, - FinetuneEventType, FinetuneEvent, + FinetuneEventType, ) from together.utils import ( + get_event_step, log_warn_once, normalize_key, - get_event_step, ) + _FT_JOB_WITH_STEP_REGEX = r"^ft-[\dabcdef-]+:\d+$" @@ -63,7 +64,7 @@ def create_finetune_request( lr_scheduler_type: Literal["linear", "cosine"] = "linear", min_lr_ratio: float = 0.0, scheduler_num_cycles: float = 0.5, - warmup_ratio: float = 0.0, + warmup_ratio: float | None = None, max_grad_norm: float = 1.0, weight_decay: float = 0.0, lora: bool = False, @@ -81,7 +82,6 @@ def create_finetune_request( dpo_beta: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneRequest: - if model is not None and from_checkpoint is not None: raise ValueError( "You must specify either a model or a checkpoint to start a job from, not both" @@ -90,6 +90,8 @@ def create_finetune_request( if model is None and from_checkpoint is None: raise ValueError("You must specify either a model or a checkpoint") + model_or_checkpoint = model or from_checkpoint + if batch_size == "max": log_warn_once( "Starting from together>=1.3.0, " @@ -103,7 +105,9 @@ def create_finetune_request( min_batch_size: int = 0 if lora: if model_limits.lora_training is None: - raise ValueError("LoRA adapters are not supported for the selected model.") + raise ValueError( + f"LoRA adapters are not supported for the selected model ({model_or_checkpoint})." + ) lora_r = lora_r if lora_r is not None else model_limits.lora_training.max_rank lora_alpha = lora_alpha if lora_alpha is not None else lora_r * 2 training_type = LoRATrainingType( @@ -118,7 +122,9 @@ def create_finetune_request( else: if model_limits.full_training is None: - raise ValueError("Full training is not supported for the selected model.") + raise ValueError( + f"Full training is not supported for the selected model ({model_or_checkpoint})." + ) max_batch_size = model_limits.full_training.max_batch_size min_batch_size = model_limits.full_training.min_batch_size @@ -127,25 +133,29 @@ def create_finetune_request( if batch_size > max_batch_size: raise ValueError( - "Requested batch size is higher that the maximum allowed value." + f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}." ) if batch_size < min_batch_size: raise ValueError( - "Requested batch size is lower that the minimum allowed value." + f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}." ) if warmup_ratio > 1 or warmup_ratio < 0: - raise ValueError("Warmup ratio should be between 0 and 1") + raise ValueError(f"Warmup ratio should be between 0 and 1 (got {warmup_ratio})") if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0): - raise ValueError("Min learning rate ratio should be between 0 and 1") + raise ValueError( + f"Min learning rate ratio should be between 0 and 1 (got {min_lr_ratio})" + ) if max_grad_norm < 0: - raise ValueError("Max gradient norm should be non-negative") + raise ValueError( + f"Max gradient norm should be non-negative (got {max_grad_norm})" + ) if weight_decay is not None and (weight_decay < 0): - raise ValueError("Weight decay should be non-negative") + raise ValueError(f"Weight decay should be non-negative (got {weight_decay})") if training_method not in AVAILABLE_TRAINING_METHODS: raise ValueError( @@ -155,7 +165,9 @@ def create_finetune_request( lr_scheduler: FinetuneLRScheduler if lr_scheduler_type == "cosine": if scheduler_num_cycles <= 0.0: - raise ValueError("Number of cycles should be greater than 0") + raise ValueError( + f"Number of cycles should be greater than 0 (got {scheduler_num_cycles})" + ) lr_scheduler = CosineLRScheduler( lr_scheduler_args=CosineLRSchedulerArgs( diff --git a/tests/unit/test_finetune_resources.py b/tests/unit/test_finetune_resources.py index 8db880df..f7acdbca 100644 --- a/tests/unit/test_finetune_resources.py +++ b/tests/unit/test_finetune_resources.py @@ -2,9 +2,9 @@ from together.resources.finetune import create_finetune_request from together.types.finetune import ( - FinetuneTrainingLimits, FinetuneFullTrainingLimits, FinetuneLoraTrainingLimits, + FinetuneTrainingLimits, ) @@ -117,50 +117,36 @@ def test_no_from_checkpoint_no_model_name(): ) -def test_batch_size_limit(): - with pytest.raises( - ValueError, - match="Requested batch size is higher that the maximum allowed value", - ): - _ = create_finetune_request( - model_limits=_MODEL_LIMITS, - model=_MODEL_NAME, - training_file=_TRAINING_FILE, - batch_size=128, - ) - - with pytest.raises( - ValueError, match="Requested batch size is lower that the minimum allowed value" - ): - _ = create_finetune_request( - model_limits=_MODEL_LIMITS, - model=_MODEL_NAME, - training_file=_TRAINING_FILE, - batch_size=1, - ) - - with pytest.raises( - ValueError, - match="Requested batch size is higher that the maximum allowed value", - ): - _ = create_finetune_request( - model_limits=_MODEL_LIMITS, - model=_MODEL_NAME, - training_file=_TRAINING_FILE, - batch_size=256, - lora=True, - ) - - with pytest.raises( - ValueError, match="Requested batch size is lower that the minimum allowed value" - ): - _ = create_finetune_request( - model_limits=_MODEL_LIMITS, - model=_MODEL_NAME, - training_file=_TRAINING_FILE, - batch_size=1, - lora=True, - ) +@pytest.mark.parametrize("batch_size", [256, 1]) +@pytest.mark.parametrize("use_lora", [False, True]) +def test_batch_size_limit(batch_size, use_lora): + model_limits = ( + _MODEL_LIMITS.full_training if not use_lora else _MODEL_LIMITS.lora_training + ) + max_batch_size = model_limits.max_batch_size + min_batch_size = model_limits.min_batch_size + + if batch_size > max_batch_size: + error_message = f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}" + with pytest.raises(ValueError, match=error_message): + _ = create_finetune_request( + model_limits=_MODEL_LIMITS, + model=_MODEL_NAME, + training_file=_TRAINING_FILE, + batch_size=batch_size, + lora=use_lora, + ) + + if batch_size < min_batch_size: + error_message = f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}" + with pytest.raises(ValueError, match=error_message): + _ = create_finetune_request( + model_limits=_MODEL_LIMITS, + model=_MODEL_NAME, + training_file=_TRAINING_FILE, + batch_size=batch_size, + lora=use_lora, + ) def test_non_lora_model():