From a0270e8f42f9b5f37ad9dffc56b7078e7f32e095 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Mon, 9 Jun 2025 17:06:20 +0200 Subject: [PATCH 01/13] Add dpo improvements arguments --- src/together/cli/api/finetune.py | 38 ++++++++++++++++++++++++++ src/together/resources/finetune.py | 44 +++++++++++++++++++++++++++++- src/together/types/finetune.py | 4 +++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 9aa581a8..c5b99ebf 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -142,6 +142,36 @@ def fine_tuning(ctx: click.Context) -> None: default=0.1, help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')", ) +@click.option( + "--dpo-normalize-logratios_by-length", + type=bool, + default=False, + help=( + "Whether to normalize logratios by sample length " + "(only used when '--training-method' is 'dpo')" + ), +) +@click.option( + "--dpo-reference-free", + type=bool, + default=False, + help="Whether to skip reference logits usage (only used when '--training-method' is 'dpo')", +) +@click.option( + "--rpo-alpha", + type=float, + default=0.0, + help=( + "RPO alpha parameter of DPO training to include NLL in the loss " + "(only used when '--training-method' is 'dpo')" + ), +) +@click.option( + "--simpo-gamma", + type=float, + default=0.1, + help="SimPO gamma parameter (only used when '--training-method' is 'dpo')", +) @click.option( "--suffix", "-s", @@ -206,6 +236,10 @@ def create( train_on_inputs: bool | Literal["auto"], training_method: str, dpo_beta: float, + dpo_normalize_logratios_by_length: bool, + dpo_reference_free: bool, + rpo_alpha: float, + simpo_gamma: float, from_checkpoint: str, ) -> None: """Start fine-tuning""" @@ -239,6 +273,10 @@ def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, + dpo_reference_free=dpo_reference_free, + rpo_alpha=rpo_alpha, + simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, ) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 1c55412a..b639b057 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -72,6 +72,10 @@ def create_finetune_request( train_on_inputs: bool | Literal["auto"] | None = None, training_method: str = "sft", dpo_beta: float | None = None, + dpo_normalize_logratios_by_length: bool = False, + dpo_reference_free: bool = False, + rpo_alpha: float | None = None, + simpo_gamma: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneRequest: if model is not None and from_checkpoint is not None: @@ -182,6 +186,14 @@ def create_finetune_request( if dpo_beta is not None and training_method != "dpo": raise ValueError("dpo_beta is only supported for DPO training") + if dpo_normalize_logratios_by_length and training_method != "dpo": + raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training") + if dpo_reference_free and training_method != "dpo": + raise ValueError("dpo_reference_free=True is only supported for DPO training") + if rpo_alpha is not None and training_method != "dpo": + raise ValueError("rpo_alpha is only supported for DPO training") + if simpo_gamma is not None and training_method != "dpo": + raise ValueError("simpo_gamma is only supported for DPO training") lr_scheduler: FinetuneLRScheduler if lr_scheduler_type == "cosine": @@ -204,7 +216,13 @@ def create_finetune_request( if training_method == "sft": training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs) elif training_method == "dpo": - training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta) + training_method_cls = TrainingMethodDPO( + dpo_beta=dpo_beta, + dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, + dpo_reference_free=dpo_reference_free, + rpo_alpha=rpo_alpha, + simpo_gamma=simpo_gamma, + ) finetune_request = FinetuneRequest( model=model, @@ -302,6 +320,10 @@ def create( train_on_inputs: bool | Literal["auto"] | None = None, training_method: str = "sft", dpo_beta: float | None = None, + dpo_normalize_logratios_by_length: bool = False, + dpo_reference_free: bool = False, + rpo_alpha: float | None = None, + simpo_gamma: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneResponse: """ @@ -353,6 +375,10 @@ def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. + dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, + dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False. + rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. + simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. @@ -405,6 +431,10 @@ def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, + dpo_reference_free=dpo_reference_free, + rpo_alpha=rpo_alpha, + simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, ) @@ -714,6 +744,10 @@ async def create( train_on_inputs: bool | Literal["auto"] | None = None, training_method: str = "sft", dpo_beta: float | None = None, + dpo_normalize_logratios_by_length: bool = False, + dpo_reference_free: bool = False, + rpo_alpha: float | None = None, + simpo_gamma: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneResponse: """ @@ -765,6 +799,10 @@ async def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. + dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, + dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False. + rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. + simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. @@ -817,6 +855,10 @@ async def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, + dpo_reference_free=dpo_reference_free, + rpo_alpha=rpo_alpha, + simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, ) diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index 07ee65ec..e8c388f9 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod): method: Literal["dpo"] = "dpo" dpo_beta: float | None = None + dpo_normalize_logratios_by_length: bool = False + dpo_reference_free: bool = False + rpo_alpha: float | None = None + simpo_gamma: float | None = None class FinetuneRequest(BaseModel): From b92bc17d701b6fa342be5393a5531345e0ebdb65 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Tue, 10 Jun 2025 13:16:07 +0200 Subject: [PATCH 02/13] Version bump (tmp, dev) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c8afe9b2..0cc5508b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.13" +version = "1.5.14.dev" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" From 8e1ee627746c29c14932c8ed5c11e35e41f637f6 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:09:08 +0200 Subject: [PATCH 03/13] Implicit setting of `reference_free` in case if simpo_gamma is set --- src/together/cli/api/finetune.py | 2 -- src/together/resources/finetune.py | 15 ++++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index c5b99ebf..e90a3c09 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -237,7 +237,6 @@ def create( training_method: str, dpo_beta: float, dpo_normalize_logratios_by_length: bool, - dpo_reference_free: bool, rpo_alpha: float, simpo_gamma: float, from_checkpoint: str, @@ -274,7 +273,6 @@ def create( training_method=training_method, dpo_beta=dpo_beta, dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, - dpo_reference_free=dpo_reference_free, rpo_alpha=rpo_alpha, simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index b639b057..6414a728 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -73,7 +73,6 @@ def create_finetune_request( training_method: str = "sft", dpo_beta: float | None = None, dpo_normalize_logratios_by_length: bool = False, - dpo_reference_free: bool = False, rpo_alpha: float | None = None, simpo_gamma: float | None = None, from_checkpoint: str | None = None, @@ -188,8 +187,6 @@ def create_finetune_request( raise ValueError("dpo_beta is only supported for DPO training") if dpo_normalize_logratios_by_length and training_method != "dpo": raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training") - if dpo_reference_free and training_method != "dpo": - raise ValueError("dpo_reference_free=True is only supported for DPO training") if rpo_alpha is not None and training_method != "dpo": raise ValueError("rpo_alpha is only supported for DPO training") if simpo_gamma is not None and training_method != "dpo": @@ -216,6 +213,12 @@ def create_finetune_request( if training_method == "sft": training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs) elif training_method == "dpo": + if simpo_gamma is not None and simpo_gamma > 0: + dpo_reference_free = True + rprint( + f"Parameter simpo_gamma was set to {simpo_gamma}. " + "SimPO training detected. Reference logits will not be used." + ) training_method_cls = TrainingMethodDPO( dpo_beta=dpo_beta, dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, @@ -321,7 +324,6 @@ def create( training_method: str = "sft", dpo_beta: float | None = None, dpo_normalize_logratios_by_length: bool = False, - dpo_reference_free: bool = False, rpo_alpha: float | None = None, simpo_gamma: float | None = None, from_checkpoint: str | None = None, @@ -376,7 +378,6 @@ def create( Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, - dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False. rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. @@ -432,7 +433,6 @@ def create( training_method=training_method, dpo_beta=dpo_beta, dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, - dpo_reference_free=dpo_reference_free, rpo_alpha=rpo_alpha, simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, @@ -745,7 +745,6 @@ async def create( training_method: str = "sft", dpo_beta: float | None = None, dpo_normalize_logratios_by_length: bool = False, - dpo_reference_free: bool = False, rpo_alpha: float | None = None, simpo_gamma: float | None = None, from_checkpoint: str | None = None, @@ -800,7 +799,6 @@ async def create( Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, - dpo_reference_free (bool): Whether to skip reference logits usage. Defaults to False. rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. @@ -856,7 +854,6 @@ async def create( training_method=training_method, dpo_beta=dpo_beta, dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, - dpo_reference_free=dpo_reference_free, rpo_alpha=rpo_alpha, simpo_gamma=simpo_gamma, from_checkpoint=from_checkpoint, From 7cd0109848130b8a75b4bb9acc2d62309289e104 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:14:45 +0200 Subject: [PATCH 04/13] Fix unbound variable --- src/together/resources/finetune.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 6414a728..9c2e1669 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -219,6 +219,9 @@ def create_finetune_request( f"Parameter simpo_gamma was set to {simpo_gamma}. " "SimPO training detected. Reference logits will not be used." ) + else: + dpo_reference_free=None + training_method_cls = TrainingMethodDPO( dpo_beta=dpo_beta, dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length, From 0719212fc3e893302dd398c86036170f55df2284 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:16:09 +0200 Subject: [PATCH 05/13] Fix --- src/together/resources/finetune.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 9c2e1669..e7fdee91 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -220,7 +220,7 @@ def create_finetune_request( "SimPO training detected. Reference logits will not be used." ) else: - dpo_reference_free=None + dpo_reference_free = False training_method_cls = TrainingMethodDPO( dpo_beta=dpo_beta, From 5f8b188f3973fd75d2c4fa5b9932c8663be0fe93 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:18:47 +0200 Subject: [PATCH 06/13] Force normalization for simpo --- src/together/resources/finetune.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index e7fdee91..42d015b5 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -215,9 +215,11 @@ def create_finetune_request( elif training_method == "dpo": if simpo_gamma is not None and simpo_gamma > 0: dpo_reference_free = True + dpo_normalize_logratios_by_length = True rprint( f"Parameter simpo_gamma was set to {simpo_gamma}. " - "SimPO training detected. Reference logits will not be used." + "SimPO training detected. Reference logits will not be used " + "and length normalization of logps will be enabled." ) else: dpo_reference_free = False From 57c0e16f11520a68dda816c6096b961ab58af729 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:43:25 +0200 Subject: [PATCH 07/13] Version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0cc5508b..b8bdfd62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.14.dev" +version = "1.5.15" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" From d0a993249ce4f965517620ecadff4d54d00e47f5 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:52:53 +0200 Subject: [PATCH 08/13] Formatting --- src/together/resources/finetune.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 42d015b5..2762307d 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -186,7 +186,9 @@ def create_finetune_request( if dpo_beta is not None and training_method != "dpo": raise ValueError("dpo_beta is only supported for DPO training") if dpo_normalize_logratios_by_length and training_method != "dpo": - raise ValueError("dpo_normalize_logratios_by_length=True is only supported for DPO training") + raise ValueError( + "dpo_normalize_logratios_by_length=True is only supported for DPO training" + ) if rpo_alpha is not None and training_method != "dpo": raise ValueError("rpo_alpha is only supported for DPO training") if simpo_gamma is not None and training_method != "dpo": From 4702194bce19630f415f35f351893f141b51c3e9 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Thu, 12 Jun 2025 18:58:46 +0200 Subject: [PATCH 09/13] Version fix --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b8bdfd62..0f22984d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.15" +version = "1.5.14" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" From a082aac5e0376e9089185efac17c833364f4adaf Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Fri, 13 Jun 2025 10:40:42 +0200 Subject: [PATCH 10/13] Remove reference-free from dpo --- src/together/cli/api/finetune.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index e90a3c09..cbde2ddf 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -151,12 +151,6 @@ def fine_tuning(ctx: click.Context) -> None: "(only used when '--training-method' is 'dpo')" ), ) -@click.option( - "--dpo-reference-free", - type=bool, - default=False, - help="Whether to skip reference logits usage (only used when '--training-method' is 'dpo')", -) @click.option( "--rpo-alpha", type=float, From 92d7e01509300120741e1c49dad656a1eee01707 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Fri, 13 Jun 2025 10:50:55 +0200 Subject: [PATCH 11/13] Review fixes --- src/together/resources/finetune.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 2762307d..10b53b52 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -189,10 +189,17 @@ def create_finetune_request( raise ValueError( "dpo_normalize_logratios_by_length=True is only supported for DPO training" ) - if rpo_alpha is not None and training_method != "dpo": - raise ValueError("rpo_alpha is only supported for DPO training") - if simpo_gamma is not None and training_method != "dpo": - raise ValueError("simpo_gamma is only supported for DPO training") + if rpo_alpha is not None: + if training_method != "dpo": + raise ValueError("rpo_alpha is only supported for DPO training") + if not rpo_alpha >= 0.0: + raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})") + + if simpo_gamma is not None: + if training_method != "dpo": + raise ValueError("simpo_gamma is only supported for DPO training") + if not simpo_gamma >= 0.0: + raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})") lr_scheduler: FinetuneLRScheduler if lr_scheduler_type == "cosine": @@ -221,7 +228,7 @@ def create_finetune_request( rprint( f"Parameter simpo_gamma was set to {simpo_gamma}. " "SimPO training detected. Reference logits will not be used " - "and length normalization of logps will be enabled." + "and length normalization of log-probabilities will be enabled." ) else: dpo_reference_free = False From 51b96fce2baee1c10cc8244e9c9391fdc7d038ad Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Fri, 13 Jun 2025 10:52:21 +0200 Subject: [PATCH 12/13] Formatting --- src/together/resources/finetune.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 10b53b52..c1cfad35 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -189,7 +189,7 @@ def create_finetune_request( raise ValueError( "dpo_normalize_logratios_by_length=True is only supported for DPO training" ) - if rpo_alpha is not None: + if rpo_alpha is not None: if training_method != "dpo": raise ValueError("rpo_alpha is only supported for DPO training") if not rpo_alpha >= 0.0: From 47248241219c3fe8afd6d92d8d840656b54d2302 Mon Sep 17 00:00:00 2001 From: Egor Timofeev Date: Fri, 13 Jun 2025 16:47:24 +0200 Subject: [PATCH 13/13] Fixes --- src/together/cli/api/finetune.py | 2 +- src/together/resources/finetune.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index cbde2ddf..f95235cf 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -143,7 +143,7 @@ def fine_tuning(ctx: click.Context) -> None: help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')", ) @click.option( - "--dpo-normalize-logratios_by-length", + "--dpo-normalize-logratios-by-length", type=bool, default=False, help=( diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index c1cfad35..27baf2d2 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -391,7 +391,7 @@ def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. - dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, + dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False, rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. @@ -812,7 +812,7 @@ async def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. - dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample lenght. Defaults to False, + dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False, rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None. simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.