From a0270e8f42f9b5f37ad9dffc56b7078e7f32e095 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Mon, 9 Jun 2025 17:06:20 +0200
Subject: [PATCH 01/13] Add dpo improvements arguments

---
 src/together/cli/api/finetune.py   | 38 ++++++++++++++++++++++++++
 src/together/resources/finetune.py | 44 +++++++++++++++++++++++++++++-
 src/together/types/finetune.py     |  4 +++
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index 9aa581a8..c5b99ebf 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -142,6 +142,36 @@ def fine_tuning(ctx: click.Context) -> None:
     default=0.1,
     help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
 )
+@click.option(
+    "--dpo-normalize-logratios_by-length",
+    type=bool,
+    default=False,
+    help=(
+        "Whether to normalize logratios by sample length "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--dpo-reference-free",
+    type=bool,
+    default=False,
+    help="Whether to skip reference logits usage (only used when '--training-method' is 'dpo')",
+)
+@click.option(
+    "--rpo-alpha",
+    type=float,
+    default=0.0,
+    help=(
+        "RPO alpha parameter of DPO training to include NLL in the loss "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--simpo-gamma",
+    type=float,
+    default=0.1,
+    help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
+)
 @click.option(
     "--suffix",
     "-s",
@@ -206,6 +236,10 @@ def create(
     train_on_inputs: bool | Literal["auto"],
     training_method: str,
     dpo_beta: float,
+    dpo_normalize_logratios_by_length: bool,
+    dpo_reference_free: bool,
+    rpo_alpha: float,
+    simpo_gamma: float,
     from_checkpoint: str,
 ) -> None:
     """Start fine-tuning"""
@@ -239,6 +273,10 @@ def create(
         train_on_inputs=train_on_inputs,
         training_method=training_method,
         dpo_beta=dpo_beta,
+        dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+        dpo_reference_free=dpo_reference_free,
+        rpo_alpha=rpo_alpha,
+        simpo_gamma=simpo_gamma,
         from_checkpoint=from_checkpoint,
     )
 
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 1c55412a..b639b057 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -72,6 +72,10 @@ def create_finetune_request(
     train_on_inputs: bool | Literal["auto"] | None = None,
     training_method: str = "sft",
     dpo_beta: float | None = None,
+    dpo_normalize_logratios_by_length: bool = False,
+    dpo_reference_free: bool = False,
+    rpo_alpha: float | None = None,
+    simpo_gamma: float | None = None,
     from_checkpoint: str | None = None,
 ) -> FinetuneRequest:
     if model is not None and from_checkpoint is not None:
@@ -182,6 +186,14 @@ def create_finetune_request(
 
     if dpo_beta is not None and training_method != "dpo":
         raise ValueError("dpo_beta is only supported for DPO training")
+    if dpo_normalize_logratios_by_length and training_method != "dpo":
+        raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training")
+    if dpo_reference_free and training_method != "dpo":
+        raise ValueError("dpo_reference_free=True is only supported for DPO training")
+    if rpo_alpha is not None and training_method != "dpo":
+        raise ValueError("rpo_alpha is only supported for DPO training")
+    if simpo_gamma is not None and training_method != "dpo":
+        raise ValueError("simpo_gamma is only supported for DPO training")
 
     lr_scheduler: FinetuneLRScheduler
     if lr_scheduler_type == "cosine":
@@ -204,7 +216,13 @@ def create_finetune_request(
     if training_method == "sft":
         training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
     elif training_method == "dpo":
-        training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta)
+        training_method_cls = TrainingMethodDPO(
+            dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            dpo_reference_free=dpo_reference_free,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
+        )
 
     finetune_request = FinetuneRequest(
         model=model,
@@ -302,6 +320,10 @@ def create(
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        dpo_reference_free: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -353,6 +375,10 @@ def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
+            dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False.
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -405,6 +431,10 @@ def create(
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            dpo_reference_free=dpo_reference_free,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
         )
 
@@ -714,6 +744,10 @@ async def create(
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        dpo_reference_free: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -765,6 +799,10 @@ async def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
+            dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False.
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -817,6 +855,10 @@ async def create(
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            dpo_reference_free=dpo_reference_free,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
         )
 
diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
index 07ee65ec..e8c388f9 100644
--- a/src/together/types/finetune.py
+++ b/src/together/types/finetune.py
@@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod):
 
     method: Literal["dpo"] = "dpo"
     dpo_beta: float | None = None
+    dpo_normalize_logratios_by_length: bool = False
+    dpo_reference_free: bool = False
+    rpo_alpha: float | None = None
+    simpo_gamma: float | None = None
 
 
 class FinetuneRequest(BaseModel):

From b92bc17d701b6fa342be5393a5531345e0ebdb65 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Tue, 10 Jun 2025 13:16:07 +0200
Subject: [PATCH 02/13] Version bump (tmp, dev)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c8afe9b2..0cc5508b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.13"
+version = "1.5.14.dev"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"

From 8e1ee627746c29c14932c8ed5c11e35e41f637f6 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:09:08 +0200
Subject: [PATCH 03/13] Implicit setting of `reference_free` in case if
 simpo_gamma is set

---
 src/together/cli/api/finetune.py   |  2 --
 src/together/resources/finetune.py | 15 ++++++---------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index c5b99ebf..e90a3c09 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -237,7 +237,6 @@ def create(
     training_method: str,
     dpo_beta: float,
     dpo_normalize_logratios_by_length: bool,
-    dpo_reference_free: bool,
     rpo_alpha: float,
     simpo_gamma: float,
     from_checkpoint: str,
@@ -274,7 +273,6 @@ def create(
         training_method=training_method,
         dpo_beta=dpo_beta,
         dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
-        dpo_reference_free=dpo_reference_free,
         rpo_alpha=rpo_alpha,
         simpo_gamma=simpo_gamma,
         from_checkpoint=from_checkpoint,
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index b639b057..6414a728 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -73,7 +73,6 @@ def create_finetune_request(
     training_method: str = "sft",
     dpo_beta: float | None = None,
     dpo_normalize_logratios_by_length: bool = False,
-    dpo_reference_free: bool = False,
     rpo_alpha: float | None = None,
     simpo_gamma: float | None = None,
     from_checkpoint: str | None = None,
@@ -188,8 +187,6 @@ def create_finetune_request(
         raise ValueError("dpo_beta is only supported for DPO training")
     if dpo_normalize_logratios_by_length and training_method != "dpo":
         raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training")
-    if dpo_reference_free and training_method != "dpo":
-        raise ValueError("dpo_reference_free=True is only supported for DPO training")
     if rpo_alpha is not None and training_method != "dpo":
         raise ValueError("rpo_alpha is only supported for DPO training")
     if simpo_gamma is not None and training_method != "dpo":
@@ -216,6 +213,12 @@ def create_finetune_request(
     if training_method == "sft":
         training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
     elif training_method == "dpo":
+        if simpo_gamma is not None and simpo_gamma > 0:
+            dpo_reference_free = True
+            rprint(
+                f"Parameter simpo_gamma was set to {simpo_gamma}. "
+                "SimPO training detected. Reference logits will not be used."
+            )
         training_method_cls = TrainingMethodDPO(
             dpo_beta=dpo_beta,
             dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
@@ -321,7 +324,6 @@ def create(
         training_method: str = "sft",
         dpo_beta: float | None = None,
         dpo_normalize_logratios_by_length: bool = False,
-        dpo_reference_free: bool = False,
         rpo_alpha: float | None = None,
         simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
@@ -376,7 +378,6 @@ def create(
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
             dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
-            dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False.
             rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
             simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
@@ -432,7 +433,6 @@ def create(
             training_method=training_method,
             dpo_beta=dpo_beta,
             dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
-            dpo_reference_free=dpo_reference_free,
             rpo_alpha=rpo_alpha,
             simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
@@ -745,7 +745,6 @@ async def create(
         training_method: str = "sft",
         dpo_beta: float | None = None,
         dpo_normalize_logratios_by_length: bool = False,
-        dpo_reference_free: bool = False,
         rpo_alpha: float | None = None,
         simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
@@ -800,7 +799,6 @@ async def create(
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
             dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
-            dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False.
             rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
             simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
@@ -856,7 +854,6 @@ async def create(
             training_method=training_method,
             dpo_beta=dpo_beta,
             dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
-            dpo_reference_free=dpo_reference_free,
             rpo_alpha=rpo_alpha,
             simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,

From 7cd0109848130b8a75b4bb9acc2d62309289e104 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:14:45 +0200
Subject: [PATCH 04/13] Fix unbound variable

---
 src/together/resources/finetune.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 6414a728..9c2e1669 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -219,6 +219,9 @@ def create_finetune_request(
                 f"Parameter simpo_gamma was set to {simpo_gamma}. "
                 "SimPO training detected. Reference logits will not be used."
             )
+        else:
+            dpo_reference_free=None
+
         training_method_cls = TrainingMethodDPO(
             dpo_beta=dpo_beta,
             dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,

From 0719212fc3e893302dd398c86036170f55df2284 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:16:09 +0200
Subject: [PATCH 05/13] Fix

---
 src/together/resources/finetune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 9c2e1669..e7fdee91 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -220,7 +220,7 @@ def create_finetune_request(
                 "SimPO training detected. Reference logits will not be used."
             )
         else:
-            dpo_reference_free=None
+            dpo_reference_free = False
 
         training_method_cls = TrainingMethodDPO(
             dpo_beta=dpo_beta,

From 5f8b188f3973fd75d2c4fa5b9932c8663be0fe93 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:18:47 +0200
Subject: [PATCH 06/13] Force normalization for simpo

---
 src/together/resources/finetune.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index e7fdee91..42d015b5 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -215,9 +215,11 @@ def create_finetune_request(
     elif training_method == "dpo":
         if simpo_gamma is not None and simpo_gamma > 0:
             dpo_reference_free = True
+            dpo_normalize_logratios_by_length = True
             rprint(
                 f"Parameter simpo_gamma was set to {simpo_gamma}. "
-                "SimPO training detected. Reference logits will not be used."
+                "SimPO training detected. Reference logits will not be used "
+                "and length normalization of logps will be enabled."
             )
         else:
             dpo_reference_free = False

From 57c0e16f11520a68dda816c6096b961ab58af729 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:43:25 +0200
Subject: [PATCH 07/13] Version bump

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0cc5508b..b8bdfd62 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.14.dev"
+version = "1.5.15"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"

From d0a993249ce4f965517620ecadff4d54d00e47f5 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:52:53 +0200
Subject: [PATCH 08/13] Formatting

---
 src/together/resources/finetune.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 42d015b5..2762307d 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -186,7 +186,9 @@ def create_finetune_request(
     if dpo_beta is not None and training_method != "dpo":
         raise ValueError("dpo_beta is only supported for DPO training")
     if dpo_normalize_logratios_by_length and training_method != "dpo":
-        raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training")
+        raise ValueError(
+            "dpo_normalize_logratios_by_length=True is only supported for DPO training"
+        )
     if rpo_alpha is not None and training_method != "dpo":
         raise ValueError("rpo_alpha is only supported for DPO training")
     if simpo_gamma is not None and training_method != "dpo":

From 4702194bce19630f415f35f351893f141b51c3e9 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Thu, 12 Jun 2025 18:58:46 +0200
Subject: [PATCH 09/13] Version fix

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b8bdfd62..0f22984d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.15"
+version = "1.5.14"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"

From a082aac5e0376e9089185efac17c833364f4adaf Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Fri, 13 Jun 2025 10:40:42 +0200
Subject: [PATCH 10/13] Remove reference-free from dpo

---
 src/together/cli/api/finetune.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index e90a3c09..cbde2ddf 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -151,12 +151,6 @@ def fine_tuning(ctx: click.Context) -> None:
         "(only used when '--training-method' is 'dpo')"
     ),
 )
-@click.option(
-    "--dpo-reference-free",
-    type=bool,
-    default=False,
-    help="Whether to skip reference logits usage (only used when '--training-method' is 'dpo')",
-)
 @click.option(
     "--rpo-alpha",
     type=float,

From 92d7e01509300120741e1c49dad656a1eee01707 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Fri, 13 Jun 2025 10:50:55 +0200
Subject: [PATCH 11/13] Review fixes

---
 src/together/resources/finetune.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 2762307d..10b53b52 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -189,10 +189,17 @@ def create_finetune_request(
         raise ValueError(
             "dpo_normalize_logratios_by_length=True is only supported for DPO training"
         )
-    if rpo_alpha is not None and training_method != "dpo":
-        raise ValueError("rpo_alpha is only supported for DPO training")
-    if simpo_gamma is not None and training_method != "dpo":
-        raise ValueError("simpo_gamma is only supported for DPO training")
+    if rpo_alpha is not None: 
+        if training_method != "dpo":
+            raise ValueError("rpo_alpha is only supported for DPO training")
+        if not rpo_alpha >= 0.0:
+            raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
+
+    if simpo_gamma is not None:
+        if training_method != "dpo":
+            raise ValueError("simpo_gamma is only supported for DPO training")
+        if not simpo_gamma >= 0.0:
+            raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
 
     lr_scheduler: FinetuneLRScheduler
     if lr_scheduler_type == "cosine":
@@ -221,7 +228,7 @@ def create_finetune_request(
             rprint(
                 f"Parameter simpo_gamma was set to {simpo_gamma}. "
                 "SimPO training detected. Reference logits will not be used "
-                "and length normalization of logps will be enabled."
+                "and length normalization of log-probabilities will be enabled."
             )
         else:
             dpo_reference_free = False

From 51b96fce2baee1c10cc8244e9c9391fdc7d038ad Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Fri, 13 Jun 2025 10:52:21 +0200
Subject: [PATCH 12/13] Formatting

---
 src/together/resources/finetune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 10b53b52..c1cfad35 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -189,7 +189,7 @@ def create_finetune_request(
         raise ValueError(
             "dpo_normalize_logratios_by_length=True is only supported for DPO training"
         )
-    if rpo_alpha is not None: 
+    if rpo_alpha is not None:
         if training_method != "dpo":
             raise ValueError("rpo_alpha is only supported for DPO training")
         if not rpo_alpha >= 0.0:

From 47248241219c3fe8afd6d92d8d840656b54d2302 Mon Sep 17 00:00:00 2001
From: Egor Timofeev <etimofeev@together.ai>
Date: Fri, 13 Jun 2025 16:47:24 +0200
Subject: [PATCH 13/13] Fixes

---
 src/together/cli/api/finetune.py   | 2 +-
 src/together/resources/finetune.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index cbde2ddf..f95235cf 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -143,7 +143,7 @@ def fine_tuning(ctx: click.Context) -> None:
     help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
 )
 @click.option(
-    "--dpo-normalize-logratios_by-length",
+    "--dpo-normalize-logratios-by-length",
     type=bool,
     default=False,
     help=(
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index c1cfad35..27baf2d2 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -391,7 +391,7 @@ def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
-            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
             rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
             simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
@@ -812,7 +812,7 @@ async def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
-            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False,
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
             rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
             simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.