togethercomputer · artek0chumak · Mar 26, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.2"
+version = "1.5.3"
 authors = [
     "Together AI <[email protected]>"
 ]

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -87,6 +87,9 @@ def createFinetuneRequest(
             "You must specify either a model or a checkpoint to start a job from, not both"
         )
 
+    if model is None and from_checkpoint is None:
+        raise ValueError("You must specify either a model or a checkpoint")
+
     if batch_size == "max":
         log_warn_once(
             "Starting from together>=1.3.0, "
@@ -96,6 +99,8 @@ def createFinetuneRequest(
         warmup_ratio = 0.0
 
     training_type: TrainingType = FullTrainingType()
+    max_batch_size: int = 0
+    min_batch_size: int = 0
     if lora:
         if model_limits.lora_training is None:
             raise ValueError("LoRA adapters are not supported for the selected model.")
@@ -108,18 +113,26 @@ def createFinetuneRequest(
             lora_trainable_modules=lora_trainable_modules,
         )
 
-        batch_size = (
-            batch_size
-            if batch_size != "max"
-            else model_limits.lora_training.max_batch_size
-        )
+        max_batch_size = model_limits.lora_training.max_batch_size
+        min_batch_size = model_limits.lora_training.min_batch_size
+
     else:
         if model_limits.full_training is None:
             raise ValueError("Full training is not supported for the selected model.")
-        batch_size = (
-            batch_size
-            if batch_size != "max"
-            else model_limits.full_training.max_batch_size
+
+        max_batch_size = model_limits.full_training.max_batch_size
+        min_batch_size = model_limits.full_training.min_batch_size
+
+    batch_size = batch_size if batch_size != "max" else max_batch_size
+
+    if batch_size > max_batch_size:
+        raise ValueError(
+            "Requested batch size is higher that the maximum allowed value."
+        )
+
+    if batch_size < min_batch_size:
+        raise ValueError(
+            "Requested batch size is lower that the minimum allowed value."
         )
 
     if warmup_ratio > 1 or warmup_ratio < 0:
@@ -346,9 +359,6 @@ def create(
             FinetuneResponse: Object containing information about fine-tuning job.
         """
 
-        if model is None and from_checkpoint is None:
-            raise ValueError("You must specify either a model or a checkpoint")
-
         requestor = api_requestor.APIRequestor(
             client=self._client,
         )
@@ -737,9 +747,6 @@ async def create(
             FinetuneResponse: Object containing information about fine-tuning job.
         """
 
-        if model is None and from_checkpoint is None:
-            raise ValueError("You must specify either a model or a checkpoint")
-
         requestor = api_requestor.APIRequestor(
             client=self._client,
         )

diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
@@ -170,7 +170,7 @@ class FinetuneRequest(BaseModel):
     # validation file id
     validation_file: str | None = None
     # base model string
-    model: str
+    model: str | None = None
     # number of epochs to train for
     n_epochs: int
     # training learning rate

diff --git a/tests/unit/test_finetune_resources.py b/tests/unit/test_finetune_resources.py
@@ -0,0 +1,263 @@
+import pytest
+
+from together.resources.finetune import createFinetuneRequest
+from together.types.finetune import (
+    FinetuneTrainingLimits,
+    FinetuneFullTrainingLimits,
+    FinetuneLoraTrainingLimits,
+)
+
+
+_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference"
+_TRAINING_FILE = "file-7dbce5e9-7993-4520-9f3e-a7ece6c39d84"
+_VALIDATION_FILE = "file-7dbce5e9-7553-4520-9f3e-a7ece6c39d84"
+_FROM_CHECKPOINT = "ft-12345678-1234-1234-1234-1234567890ab"
+_MODEL_LIMITS = FinetuneTrainingLimits(
+    max_num_epochs=20,
+    max_learning_rate=1.0,
+    min_learning_rate=1e-6,
+    full_training=FinetuneFullTrainingLimits(
+        max_batch_size=96,
+        min_batch_size=8,
+    ),
+    lora_training=FinetuneLoraTrainingLimits(
+        max_batch_size=128,
+        min_batch_size=8,
+        max_rank=64,
+        target_modules=["q", "k", "v", "o", "mlp"],
+    ),
+)
+
+
+def test_simple_request():
+    request = createFinetuneRequest(
+        model_limits=_MODEL_LIMITS,
+        model=_MODEL_NAME,
+        training_file=_TRAINING_FILE,
+    )
+
+    assert request.model == _MODEL_NAME
+    assert request.training_file == _TRAINING_FILE
+    assert request.learning_rate > 0
+    assert request.n_epochs > 0
+    assert request.warmup_ratio == 0.0
+    assert request.training_type.type == "Full"
+    assert request.batch_size == _MODEL_LIMITS.full_training.max_batch_size
+
+
+def test_validation_file():
+    request = createFinetuneRequest(
+        model_limits=_MODEL_LIMITS,
+        model=_MODEL_NAME,
+        training_file=_TRAINING_FILE,
+        validation_file=_VALIDATION_FILE,
+    )
+
+    assert request.training_file == _TRAINING_FILE
+    assert request.validation_file == _VALIDATION_FILE
+
+
+def test_no_training_file():
+    with pytest.raises(
+        TypeError, match="missing 1 required positional argument: 'training_file'"
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+        )
+
+
+def test_lora_request():
+    request = createFinetuneRequest(
+        model_limits=_MODEL_LIMITS,
+        model=_MODEL_NAME,
+        training_file=_TRAINING_FILE,
+        lora=True,
+    )
+
+    assert request.training_type.type == "Lora"
+    assert request.training_type.lora_r == _MODEL_LIMITS.lora_training.max_rank
+    assert request.training_type.lora_alpha == _MODEL_LIMITS.lora_training.max_rank * 2
+    assert request.training_type.lora_dropout == 0.0
+    assert request.training_type.lora_trainable_modules == "all-linear"
+    assert request.batch_size == _MODEL_LIMITS.lora_training.max_batch_size
+
+
+def test_from_checkpoint_request():
+    request = createFinetuneRequest(
+        model_limits=_MODEL_LIMITS,
+        training_file=_TRAINING_FILE,
+        from_checkpoint=_FROM_CHECKPOINT,
+    )
+
+    assert request.model is None
+    assert request.from_checkpoint == _FROM_CHECKPOINT
+
+
+def test_both_from_checkpoint_model_name():
+    with pytest.raises(
+        ValueError,
+        match="You must specify either a model or a checkpoint to start a job from, not both",
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            from_checkpoint=_FROM_CHECKPOINT,
+        )
+
+
+def test_no_from_checkpoint_no_model_name():
+    with pytest.raises(
+        ValueError, match="You must specify either a model or a checkpoint"
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            training_file=_TRAINING_FILE,
+        )
+
+
+def test_batch_size_limit():
+    with pytest.raises(
+        ValueError,
+        match="Requested batch size is higher that the maximum allowed value",
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            batch_size=128,
+        )
+
+    with pytest.raises(
+        ValueError, match="Requested batch size is lower that the minimum allowed value"
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            batch_size=1,
+        )
+
+    with pytest.raises(
+        ValueError,
+        match="Requested batch size is higher that the maximum allowed value",
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            batch_size=256,
+            lora=True,
+        )
+
+    with pytest.raises(
+        ValueError, match="Requested batch size is lower that the minimum allowed value"
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            batch_size=1,
+            lora=True,
+        )
+
+
+def test_non_lora_model():
+    with pytest.raises(
+        ValueError, match="LoRA adapters are not supported for the selected model."
+    ):
+        _ = createFinetuneRequest(
+            model_limits=FinetuneTrainingLimits(
+                max_num_epochs=20,
+                max_learning_rate=1.0,
+                min_learning_rate=1e-6,
+                full_training=FinetuneFullTrainingLimits(
+                    max_batch_size=96,
+                    min_batch_size=8,
+                ),
+                lora_training=None,
+            ),
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            lora=True,
+        )
+
+
+def test_non_full_model():
+    with pytest.raises(
+        ValueError, match="Full training is not supported for the selected model."
+    ):
+        _ = createFinetuneRequest(
+            model_limits=FinetuneTrainingLimits(
+                max_num_epochs=20,
+                max_learning_rate=1.0,
+                min_learning_rate=1e-6,
+                lora_training=FinetuneLoraTrainingLimits(
+                    max_batch_size=96,
+                    min_batch_size=8,
+                    max_rank=64,
+                    target_modules=["q", "k", "v", "o", "mlp"],
+                ),
+                full_training=None,
+            ),
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            lora=False,
+        )
+
+
+@pytest.mark.parametrize("warmup_ratio", [-1.0, 2.0])
+def test_bad_warmup(warmup_ratio):
+    with pytest.raises(ValueError, match="Warmup ratio should be between 0 and 1"):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            warmup_ratio=warmup_ratio,
+        )
+
+
+@pytest.mark.parametrize("min_lr_ratio", [-1.0, 2.0])
+def test_bad_min_lr_ratio(min_lr_ratio):
+    with pytest.raises(
+        ValueError, match="Min learning rate ratio should be between 0 and 1"
+    ):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            min_lr_ratio=min_lr_ratio,
+        )
+
+
+@pytest.mark.parametrize("max_grad_norm", [-1.0, -0.01])
+def test_bad_max_grad_norm(max_grad_norm):
+    with pytest.raises(ValueError, match="Max gradient norm should be non-negative"):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            max_grad_norm=max_grad_norm,
+        )
+
+
+@pytest.mark.parametrize("weight_decay", [-1.0, -0.01])
+def test_bad_weight_decay(weight_decay):
+    with pytest.raises(ValueError, match="Weight decay should be non-negative"):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            weight_decay=weight_decay,
+        )
+
+
+def test_bad_training_method():
+    with pytest.raises(ValueError, match="training_method must be one of .*"):
+        _ = createFinetuneRequest(
+            model_limits=_MODEL_LIMITS,
+            model=_MODEL_NAME,
+            training_file=_TRAINING_FILE,
+            training_method="NON_SFT",
+        )