From faad4bbb6b400de85f7167b0ca297bd18244cd91 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 25 Feb 2025 12:11:36 +0100 Subject: [PATCH 01/11] Add from_step --- src/together/cli/api/finetune.py | 10 +++++++++- src/together/cli/api/utils.py | 17 +++++++++++++++++ src/together/resources/finetune.py | 8 ++++++++ src/together/types/finetune.py | 3 +++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 7bc02744..9fd92c29 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -11,7 +11,7 @@ from tabulate import tabulate from together import Together -from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX +from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX, FROM_STEP_TYPE from together.utils import ( finetune_price_to_dollars, log_warn, @@ -126,6 +126,12 @@ def fine_tuning(ctx: click.Context) -> None: help="Whether to mask the user messages in conversational data or prompts in instruction data. " "`auto` will automatically determine whether to mask the inputs based on the data format.", ) +@click.option( + "--from-step", + type=FROM_STEP_TYPE, + default="final", + help="From which checkpoint start a fine-tuning job" +) def create( ctx: click.Context, training_file: str, @@ -152,6 +158,7 @@ def create( wandb_name: str, confirm: bool, train_on_inputs: bool | Literal["auto"], + from_step: int | Literal["final"], ) -> None: """Start fine-tuning""" client: Together = ctx.obj @@ -180,6 +187,7 @@ def create( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, + from_step=from_step, ) model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits( diff --git a/src/together/cli/api/utils.py b/src/together/cli/api/utils.py index 08dfe492..116de08d 100644 --- a/src/together/cli/api/utils.py +++ b/src/together/cli/api/utils.py @@ -47,5 +47,22 @@ def convert( ) +class FromStepParamType(click.ParamType): + name = "from_step" + + def convert(self, value: str, param: click.Parameter | None, ctx: click.Context | None) -> int | Literal["final"] | None: + if value == "final": + return "final" + try: + return int(value) + except ValueError: + self.fail( + _("{value!r} is not a valid {type}.").format( + value=value, type=self.name + ), + ) + + INT_WITH_MAX = AutoIntParamType() BOOL_WITH_AUTO = BooleanWithAutoParamType() +FROM_STEP_TYPE = FromStepParamType() diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index b58cdae2..4f90ac5b 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -52,6 +52,7 @@ def createFinetuneRequest( wandb_project_name: str | None = None, wandb_name: str | None = None, train_on_inputs: bool | Literal["auto"] = "auto", + from_step: int | Literal["final"] = "final" ) -> FinetuneRequest: if batch_size == "max": log_warn_once( @@ -100,6 +101,9 @@ def createFinetuneRequest( if weight_decay is not None and (weight_decay < 0): raise ValueError("Weight decay should be non-negative") + if from_step == "final": + from_step = -1 + lrScheduler = FinetuneLRScheduler( lr_scheduler_type="linear", lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio), @@ -125,6 +129,7 @@ def createFinetuneRequest( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, + from_step=from_step, ) return finetune_request @@ -162,6 +167,7 @@ def create( verbose: bool = False, model_limits: FinetuneTrainingLimits | None = None, train_on_inputs: bool | Literal["auto"] = "auto", + from_step: int | Literal["final"] = "final", ) -> FinetuneResponse: """ Method to initiate a fine-tuning job @@ -207,6 +213,7 @@ def create( For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields (Instruction format), inputs will be masked. Defaults to "auto". + from_step (int or "final"): From which checkpoint start a fine-tuning job Returns: FinetuneResponse: Object containing information about fine-tuning job. @@ -244,6 +251,7 @@ def create( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, + from_step=from_step, ) if verbose: diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index 05bc8c42..8a069194 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -178,6 +178,8 @@ class FinetuneRequest(BaseModel): training_type: FullTrainingType | LoRATrainingType | None = None # train on inputs train_on_inputs: StrictBool | Literal["auto"] = "auto" + # from step + from_step: int | None = -1 class FinetuneResponse(BaseModel): @@ -256,6 +258,7 @@ class FinetuneResponse(BaseModel): training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines") training_file_size: int | None = Field(None, alias="TrainingFileSize") train_on_inputs: StrictBool | Literal["auto"] | None = "auto" + from_step: int | None = "-1" @field_validator("training_type") @classmethod From 2cac4e96d907b95a6d8f72eca7d5d1e2531192cd Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Wed, 26 Feb 2025 14:00:06 +0100 Subject: [PATCH 02/11] Add from_checkpoint argument --- src/together/cli/api/finetune.py | 15 ++++++++------- src/together/cli/api/utils.py | 18 ------------------ src/together/resources/finetune.py | 13 +++++-------- src/together/types/finetune.py | 4 ++-- 4 files changed, 15 insertions(+), 35 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 9fd92c29..8a6a7638 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -11,7 +11,7 @@ from tabulate import tabulate from together import Together -from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX, FROM_STEP_TYPE +from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX from together.utils import ( finetune_price_to_dollars, log_warn, @@ -127,10 +127,11 @@ def fine_tuning(ctx: click.Context) -> None: "`auto` will automatically determine whether to mask the inputs based on the data format.", ) @click.option( - "--from-step", - type=FROM_STEP_TYPE, - default="final", - help="From which checkpoint start a fine-tuning job" + "from_checkpoint", + type=str, + default=None, + help="The checkpoint to be used in the fine-tuning. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. " + "The step value is optional, without it the final checkpoint will be used." ) def create( ctx: click.Context, @@ -158,7 +159,7 @@ def create( wandb_name: str, confirm: bool, train_on_inputs: bool | Literal["auto"], - from_step: int | Literal["final"], + from_checkpoint: str, ) -> None: """Start fine-tuning""" client: Together = ctx.obj @@ -187,7 +188,7 @@ def create( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, - from_step=from_step, + from_checkpoint=from_checkpoint, ) model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits( diff --git a/src/together/cli/api/utils.py b/src/together/cli/api/utils.py index 116de08d..d3ce1b0e 100644 --- a/src/together/cli/api/utils.py +++ b/src/together/cli/api/utils.py @@ -46,23 +46,5 @@ def convert( ctx, ) - -class FromStepParamType(click.ParamType): - name = "from_step" - - def convert(self, value: str, param: click.Parameter | None, ctx: click.Context | None) -> int | Literal["final"] | None: - if value == "final": - return "final" - try: - return int(value) - except ValueError: - self.fail( - _("{value!r} is not a valid {type}.").format( - value=value, type=self.name - ), - ) - - INT_WITH_MAX = AutoIntParamType() BOOL_WITH_AUTO = BooleanWithAutoParamType() -FROM_STEP_TYPE = FromStepParamType() diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 4f90ac5b..8fc61bb1 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -52,7 +52,7 @@ def createFinetuneRequest( wandb_project_name: str | None = None, wandb_name: str | None = None, train_on_inputs: bool | Literal["auto"] = "auto", - from_step: int | Literal["final"] = "final" + from_checkpoint: str | None = None, ) -> FinetuneRequest: if batch_size == "max": log_warn_once( @@ -101,9 +101,6 @@ def createFinetuneRequest( if weight_decay is not None and (weight_decay < 0): raise ValueError("Weight decay should be non-negative") - if from_step == "final": - from_step = -1 - lrScheduler = FinetuneLRScheduler( lr_scheduler_type="linear", lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio), @@ -129,7 +126,7 @@ def createFinetuneRequest( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, - from_step=from_step, + from_checkpoint=from_checkpoint, ) return finetune_request @@ -167,7 +164,7 @@ def create( verbose: bool = False, model_limits: FinetuneTrainingLimits | None = None, train_on_inputs: bool | Literal["auto"] = "auto", - from_step: int | Literal["final"] = "final", + from_checkpoint: str | None = None, ) -> FinetuneResponse: """ Method to initiate a fine-tuning job @@ -213,7 +210,7 @@ def create( For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields (Instruction format), inputs will be masked. Defaults to "auto". - from_step (int or "final"): From which checkpoint start a fine-tuning job + from_checkpoint (str, optional): The checkpoint to be used in the fine-tuning. Returns: FinetuneResponse: Object containing information about fine-tuning job. @@ -251,7 +248,7 @@ def create( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, - from_step=from_step, + from_checkpoint=from_checkpoint, ) if verbose: diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index 8a069194..826d8f24 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -179,7 +179,7 @@ class FinetuneRequest(BaseModel): # train on inputs train_on_inputs: StrictBool | Literal["auto"] = "auto" # from step - from_step: int | None = -1 + from_checkpoint: str class FinetuneResponse(BaseModel): @@ -258,7 +258,7 @@ class FinetuneResponse(BaseModel): training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines") training_file_size: int | None = Field(None, alias="TrainingFileSize") train_on_inputs: StrictBool | Literal["auto"] | None = "auto" - from_step: int | None = "-1" + from_checkpoint: str @field_validator("training_type") @classmethod From 2a92819401a5fd044e2ccffc55fb47d278b36eef Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Wed, 26 Feb 2025 14:43:30 +0100 Subject: [PATCH 03/11] add list checkpoint cmd --- src/together/cli/api/finetune.py | 37 +++++++- src/together/resources/finetune.py | 148 ++++++++++++++++++++++++++--- src/together/types/__init__.py | 4 + src/together/types/finetune.py | 24 ++++- src/together/utils/__init__.py | 4 + src/together/utils/tools.py | 67 +++++++++++-- 6 files changed, 261 insertions(+), 23 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 8a6a7638..e5b2b4bb 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -4,6 +4,7 @@ from datetime import datetime from textwrap import wrap from typing import Any, Literal +import re import click from click.core import ParameterSource # type: ignore[attr-defined] @@ -17,8 +18,10 @@ log_warn, log_warn_once, parse_timestamp, + format_event_timestamp, + get_event_step, ) -from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits +from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits, FinetuneEventType _CONFIRMATION_MESSAGE = ( @@ -127,7 +130,7 @@ def fine_tuning(ctx: click.Context) -> None: "`auto` will automatically determine whether to mask the inputs based on the data format.", ) @click.option( - "from_checkpoint", + "--from-checkpoint", type=str, default=None, help="The checkpoint to be used in the fine-tuning. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. " @@ -353,6 +356,36 @@ def list_events(ctx: click.Context, fine_tune_id: str) -> None: click.echo(table) +@fine_tuning.command() +@click.pass_context +@click.argument("fine_tune_id", type=str, required=True) +def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: + """List available checkpoints for a fine-tuning job""" + client: Together = ctx.obj + + response = client.fine_tuning.list_checkpoints(fine_tune_id) + + response.data = response.data or [] + + display_list = [] + for checkpoint in response.data: + display_list.append( + { + "Type": checkpoint.type, + "Timestamp": checkpoint.timestamp, + "Name": checkpoint.name, + } + ) + + if display_list: + click.echo(f"This job contains these checkpoints:") + table = tabulate(display_list, headers="keys", tablefmt="grid") + click.echo(table) + click.echo("\nTo download a checkpoint, use cmd: together fine-tuning download") + else: + click.echo(f"No checkpoints found for job {fine_tune_id}") + + @fine_tuning.command() @click.pass_context @click.argument("fine_tune_id", type=str, required=True) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 8fc61bb1..24878df7 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -1,7 +1,8 @@ from __future__ import annotations +import re from pathlib import Path -from typing import Literal +from typing import Literal, List from rich import print as rprint @@ -22,9 +23,16 @@ TrainingType, FinetuneLRScheduler, FinetuneLinearLRSchedulerArgs, + FinetuneCheckpoint, + FinetuneCheckpointList, +) +from together.types.finetune import DownloadCheckpointType, FinetuneEventType +from together.utils import ( + log_warn_once, + normalize_key, + format_event_timestamp, + get_event_step ) -from together.types.finetune import DownloadCheckpointType -from together.utils import log_warn_once, normalize_key def createFinetuneRequest( @@ -371,11 +379,65 @@ def list_events(self, id: str) -> FinetuneListEvents: ), stream=False, ) - assert isinstance(response, TogetherResponse) return FinetuneListEvents(**response.data) + def list_checkpoints(self, id: str) -> FinetuneCheckpointList: + """ + List available checkpoints for a fine-tuning job + + Args: + id (str): Unique identifier of the fine-tune job to list checkpoints for + + Returns: + FinetuneCheckpointList: Object containing list of available checkpoints + """ + events = self.list_events(id).data or [] + + checkpoints: List[FinetuneCheckpoint] = [] + + for event in events: + event_type = event.type + + if event_type == FinetuneEventType.CHECKPOINT_SAVE: + formatted_time = format_event_timestamp(event) + step = get_event_step(event) + checkpoint_name = f"{id}:{step}" if step else id + + checkpoints.append( + FinetuneCheckpoint( + type="Intermediate", + timestamp=formatted_time, + name=checkpoint_name, + ) + ) + elif event_type == FinetuneEventType.JOB_COMPLETE: + formatted_time = format_event_timestamp(event) + is_lora = hasattr(event, "adapter_path") + + checkpoints.append( + FinetuneCheckpoint( + type="Final Merged" if is_lora else "Final", + timestamp=formatted_time, + name=id, + ) + ) + + if is_lora: + checkpoints.append( + FinetuneCheckpoint( + type="Final Adapter", + timestamp=formatted_time, + name=id, + ) + ) + + # Sort by timestamp (newest first) + checkpoints.sort(key=lambda x: x.timestamp, reverse=True) + + return FinetuneCheckpointList(object="list", data=checkpoints) + def download( self, id: str, @@ -401,6 +463,13 @@ def download( Returns: FinetuneDownloadResult: Object containing downloaded model metadata """ + + if re.match(r"^ft-[\dabcde-]+\:\d+$", id) is not None: + if checkpoint_step == -1: + checkpoint_step = int(id.split(":")[1]) + id = id.split(":")[0] + else: + raise ValueError("Invalid fine-tune ID. Don't use `checkpoint_step` with a colon in the ID.") url = f"finetune/download?ft_id={id}" @@ -692,30 +761,87 @@ async def cancel(self, id: str) -> FinetuneResponse: async def list_events(self, id: str) -> FinetuneListEvents: """ - Async method to lists events of a fine-tune job + List fine-tuning events Args: - id (str): Fine-tune ID to list events for. A string that starts with `ft-`. + id (str): Unique identifier of the fine-tune job to list events for Returns: - FinetuneListEvents: Object containing list of fine-tune events + FinetuneListEvents: Object containing list of fine-tune job events """ requestor = api_requestor.APIRequestor( client=self._client, ) - response, _, _ = await requestor.arequest( + events_response, _, _ = await requestor.arequest( options=TogetherRequest( method="GET", - url=f"fine-tunes/{id}/events", + url=f"fine-tunes/{normalize_key(id)}/events", ), stream=False, ) - assert isinstance(response, TogetherResponse) + # FIXME: API returns "data" field with no object type (should be "list") + events_list = FinetuneListEvents(object="list", **events_response.data) - return FinetuneListEvents(**response.data) + return events_list + + async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: + """ + List available checkpoints for a fine-tuning job + + Args: + id (str): Unique identifier of the fine-tune job to list checkpoints for + + Returns: + FinetuneCheckpointList: Object containing list of available checkpoints + """ + events_list = await self.list_events(id) + events = events_list.data or [] + + checkpoints: List[FinetuneCheckpoint] = [] + + for event in events: + event_type = event.type + + if event_type == FinetuneEventType.CHECKPOINT_SAVE: + formatted_time = format_event_timestamp(event) + step = get_event_step(event) + checkpoint_name = f"{id}:{step}" if step else id + + checkpoints.append( + FinetuneCheckpoint( + type="Intermediate", + timestamp=formatted_time, + name=checkpoint_name, + ) + ) + elif event_type == FinetuneEventType.JOB_COMPLETE: + formatted_time = format_event_timestamp(event) + is_lora = hasattr(event, "adapter_path") + + checkpoints.append( + FinetuneCheckpoint( + type="Final Merged" if is_lora else "Final", + timestamp=formatted_time, + name=id, + ) + ) + + if is_lora: + checkpoints.append( + FinetuneCheckpoint( + type="Final Adapter", + timestamp=formatted_time, + name=id, + ) + ) + + # Sort by timestamp (newest first) + checkpoints.sort(key=lambda x: x.timestamp, reverse=True) + + return FinetuneCheckpointList(object="list", data=checkpoints) async def download( self, id: str, *, output: str | None = None, checkpoint_step: int = -1 diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index c3100cd1..33b1ed23 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -31,6 +31,8 @@ FileType, ) from together.types.finetune import ( + FinetuneCheckpoint, + FinetuneCheckpointList, FinetuneDownloadResult, FinetuneLinearLRSchedulerArgs, FinetuneList, @@ -59,6 +61,8 @@ "ChatCompletionResponse", "EmbeddingRequest", "EmbeddingResponse", + "FinetuneCheckpoint", + "FinetuneCheckpointList", "FinetuneRequest", "FinetuneResponse", "FinetuneList", diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index 826d8f24..ce602c32 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -258,7 +258,7 @@ class FinetuneResponse(BaseModel): training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines") training_file_size: int | None = Field(None, alias="TrainingFileSize") train_on_inputs: StrictBool | Literal["auto"] | None = "auto" - from_checkpoint: str + from_checkpoint: str | None = None @field_validator("training_type") @classmethod @@ -323,3 +323,25 @@ class FinetuneLRScheduler(BaseModel): class FinetuneLinearLRSchedulerArgs(BaseModel): min_lr_ratio: float | None = 0.0 + + +class FinetuneCheckpoint(BaseModel): + """ + Fine-tune checkpoint information + """ + # checkpoint type (e.g. "Intermediate", "Final", "Final Merged", "Final Adapter") + type: str + # timestamp when the checkpoint was created + timestamp: str + # checkpoint name/identifier + name: str + + +class FinetuneCheckpointList(BaseModel): + """ + List of fine-tune checkpoints + """ + # object type + object: Literal["list"] | None = None + # list of fine-tune checkpoint objects + data: List[FinetuneCheckpoint] | None = None diff --git a/src/together/utils/__init__.py b/src/together/utils/__init__.py index 0e59966f..f2109569 100644 --- a/src/together/utils/__init__.py +++ b/src/together/utils/__init__.py @@ -8,6 +8,8 @@ finetune_price_to_dollars, normalize_key, parse_timestamp, + format_event_timestamp, + get_event_step, ) @@ -23,6 +25,8 @@ "enforce_trailing_slash", "normalize_key", "parse_timestamp", + "format_event_timestamp", + "get_event_step", "finetune_price_to_dollars", "convert_bytes", "convert_unix_timestamp", diff --git a/src/together/utils/tools.py b/src/together/utils/tools.py index 7ac68000..f7d84a2b 100644 --- a/src/together/utils/tools.py +++ b/src/together/utils/tools.py @@ -3,6 +3,8 @@ import logging import os from datetime import datetime +import re +from typing import Any logger = logging.getLogger("together") @@ -23,19 +25,66 @@ def normalize_key(key: str) -> str: return key.replace("/", "--").replace("_", "-").replace(" ", "-").lower() -def parse_timestamp(timestamp: str) -> datetime: - formats = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ"] - for fmt in formats: - try: - return datetime.strptime(timestamp, fmt) - except ValueError: - continue - raise ValueError("Timestamp does not match any expected format") +def parse_timestamp(timestamp: str) -> datetime | None: + """Parse a timestamp string into a datetime object or None if invalid. + + Args: + timestamp (str): Timestamp in ISO 8601 format (e.g. "2021-01-01T00:00:00Z") + + Returns: + datetime | None: Parsed datetime, or None if the string is empty + """ + if timestamp == "": + return None + + return datetime.fromisoformat(timestamp.replace("Z", "+00:00")) + + +def format_event_timestamp(event: Any) -> str: + """Format event timestamp to a readable date string. + + Args: + event: An event object with a created_at attribute + + Returns: + str: Formatted timestamp string (MM/DD/YYYY, HH:MM AM/PM) + """ + timestamp = parse_timestamp(event.created_at or "") + return timestamp.strftime("%m/%d/%Y, %I:%M %p") if timestamp else "" + + +def get_event_step(event: Any) -> str | None: + """Extract the step number from a checkpoint event. + + Args: + event: A checkpoint event object + + Returns: + str | None: The step number as a string, or None if not found + """ + # First try to get step directly from the event object + step = getattr(event, "step", None) + if step is not None: + return str(step) + + # If not available, try to extract from the message + message = getattr(event, "message", "") or "" + step_match = re.search(r"step[:\s]+(\d+)", message.lower()) + return step_match.group(1) if step_match else None # Convert fine-tune nano-dollar price to dollars def finetune_price_to_dollars(price: float) -> float: - return price / NANODOLLAR + """Convert fine-tune price to dollars + + Args: + price (float): Fine-tune price in billing units + + Returns: + float: Price in dollars + """ + # Convert from nanodollars (1e-9 dollars) to dollars + return price / 1e9 def convert_bytes(num: float) -> str | None: From c65c64bf0a54f6fd614d1f1dd2f6d87319eaae62 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Wed, 26 Feb 2025 14:48:00 +0100 Subject: [PATCH 04/11] style --- src/together/cli/api/finetune.py | 18 +++++++---- src/together/cli/api/utils.py | 1 + src/together/resources/finetune.py | 48 ++++++++++++++++-------------- src/together/types/finetune.py | 2 ++ src/together/utils/tools.py | 10 +++---- 5 files changed, 45 insertions(+), 34 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index e5b2b4bb..fb09289d 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from datetime import datetime +from datetime import datetime, timezone from textwrap import wrap from typing import Any, Literal import re @@ -21,7 +21,11 @@ format_event_timestamp, get_event_step, ) -from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits, FinetuneEventType +from together.types.finetune import ( + DownloadCheckpointType, + FinetuneTrainingLimits, + FinetuneEventType, +) _CONFIRMATION_MESSAGE = ( @@ -134,7 +138,7 @@ def fine_tuning(ctx: click.Context) -> None: type=str, default=None, help="The checkpoint to be used in the fine-tuning. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. " - "The step value is optional, without it the final checkpoint will be used." + "The step value is optional, without it the final checkpoint will be used.", ) def create( ctx: click.Context, @@ -273,7 +277,9 @@ def list(ctx: click.Context) -> None: response.data = response.data or [] - response.data.sort(key=lambda x: parse_timestamp(x.created_at or "")) + # Use a default datetime for None values to make sure the key function always returns a comparable value + epoch_start = datetime.fromtimestamp(0, tz=timezone.utc) + response.data.sort(key=lambda x: parse_timestamp(x.created_at or "") or epoch_start) display_list = [] for i in response.data: @@ -366,7 +372,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: response = client.fine_tuning.list_checkpoints(fine_tune_id) response.data = response.data or [] - + display_list = [] for checkpoint in response.data: display_list.append( @@ -376,7 +382,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: "Name": checkpoint.name, } ) - + if display_list: click.echo(f"This job contains these checkpoints:") table = tabulate(display_list, headers="keys", tablefmt="grid") diff --git a/src/together/cli/api/utils.py b/src/together/cli/api/utils.py index d3ce1b0e..08dfe492 100644 --- a/src/together/cli/api/utils.py +++ b/src/together/cli/api/utils.py @@ -46,5 +46,6 @@ def convert( ctx, ) + INT_WITH_MAX = AutoIntParamType() BOOL_WITH_AUTO = BooleanWithAutoParamType() diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 24878df7..8de46d3c 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -28,10 +28,10 @@ ) from together.types.finetune import DownloadCheckpointType, FinetuneEventType from together.utils import ( - log_warn_once, - normalize_key, - format_event_timestamp, - get_event_step + log_warn_once, + normalize_key, + format_event_timestamp, + get_event_step, ) @@ -394,17 +394,17 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: FinetuneCheckpointList: Object containing list of available checkpoints """ events = self.list_events(id).data or [] - + checkpoints: List[FinetuneCheckpoint] = [] - + for event in events: event_type = event.type - + if event_type == FinetuneEventType.CHECKPOINT_SAVE: formatted_time = format_event_timestamp(event) step = get_event_step(event) checkpoint_name = f"{id}:{step}" if step else id - + checkpoints.append( FinetuneCheckpoint( type="Intermediate", @@ -415,7 +415,7 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: elif event_type == FinetuneEventType.JOB_COMPLETE: formatted_time = format_event_timestamp(event) is_lora = hasattr(event, "adapter_path") - + checkpoints.append( FinetuneCheckpoint( type="Final Merged" if is_lora else "Final", @@ -423,7 +423,7 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: name=id, ) ) - + if is_lora: checkpoints.append( FinetuneCheckpoint( @@ -432,10 +432,10 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: name=id, ) ) - + # Sort by timestamp (newest first) checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - + return FinetuneCheckpointList(object="list", data=checkpoints) def download( @@ -463,13 +463,15 @@ def download( Returns: FinetuneDownloadResult: Object containing downloaded model metadata """ - + if re.match(r"^ft-[\dabcde-]+\:\d+$", id) is not None: if checkpoint_step == -1: checkpoint_step = int(id.split(":")[1]) id = id.split(":")[0] else: - raise ValueError("Invalid fine-tune ID. Don't use `checkpoint_step` with a colon in the ID.") + raise ValueError( + "Invalid fine-tune ID. Don't use `checkpoint_step` with a colon in the ID." + ) url = f"finetune/download?ft_id={id}" @@ -786,7 +788,7 @@ async def list_events(self, id: str) -> FinetuneListEvents: events_list = FinetuneListEvents(object="list", **events_response.data) return events_list - + async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: """ List available checkpoints for a fine-tuning job @@ -799,17 +801,17 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: """ events_list = await self.list_events(id) events = events_list.data or [] - + checkpoints: List[FinetuneCheckpoint] = [] - + for event in events: event_type = event.type - + if event_type == FinetuneEventType.CHECKPOINT_SAVE: formatted_time = format_event_timestamp(event) step = get_event_step(event) checkpoint_name = f"{id}:{step}" if step else id - + checkpoints.append( FinetuneCheckpoint( type="Intermediate", @@ -820,7 +822,7 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: elif event_type == FinetuneEventType.JOB_COMPLETE: formatted_time = format_event_timestamp(event) is_lora = hasattr(event, "adapter_path") - + checkpoints.append( FinetuneCheckpoint( type="Final Merged" if is_lora else "Final", @@ -828,7 +830,7 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: name=id, ) ) - + if is_lora: checkpoints.append( FinetuneCheckpoint( @@ -837,10 +839,10 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: name=id, ) ) - + # Sort by timestamp (newest first) checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - + return FinetuneCheckpointList(object="list", data=checkpoints) async def download( diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index ce602c32..fc5f5257 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -329,6 +329,7 @@ class FinetuneCheckpoint(BaseModel): """ Fine-tune checkpoint information """ + # checkpoint type (e.g. "Intermediate", "Final", "Final Merged", "Final Adapter") type: str # timestamp when the checkpoint was created @@ -341,6 +342,7 @@ class FinetuneCheckpointList(BaseModel): """ List of fine-tune checkpoints """ + # object type object: Literal["list"] | None = None # list of fine-tune checkpoint objects diff --git a/src/together/utils/tools.py b/src/together/utils/tools.py index f7d84a2b..23fabd34 100644 --- a/src/together/utils/tools.py +++ b/src/together/utils/tools.py @@ -42,10 +42,10 @@ def parse_timestamp(timestamp: str) -> datetime | None: def format_event_timestamp(event: Any) -> str: """Format event timestamp to a readable date string. - + Args: event: An event object with a created_at attribute - + Returns: str: Formatted timestamp string (MM/DD/YYYY, HH:MM AM/PM) """ @@ -55,10 +55,10 @@ def format_event_timestamp(event: Any) -> str: def get_event_step(event: Any) -> str | None: """Extract the step number from a checkpoint event. - + Args: event: A checkpoint event object - + Returns: str | None: The step number as a string, or None if not found """ @@ -66,7 +66,7 @@ def get_event_step(event: Any) -> str | None: step = getattr(event, "step", None) if step is not None: return str(step) - + # If not available, try to extract from the message message = getattr(event, "message", "") or "" step_match = re.search(r"step[:\s]+(\d+)", message.lower()) From 2eadd13b55784e3aa87b7a5cfe910c9f9e28eb4d Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Wed, 26 Feb 2025 16:55:02 +0100 Subject: [PATCH 05/11] fix tools --- src/together/utils/tools.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/together/utils/tools.py b/src/together/utils/tools.py index 23fabd34..de237860 100644 --- a/src/together/utils/tools.py +++ b/src/together/utils/tools.py @@ -62,12 +62,10 @@ def get_event_step(event: Any) -> str | None: Returns: str | None: The step number as a string, or None if not found """ - # First try to get step directly from the event object step = getattr(event, "step", None) if step is not None: return str(step) - # If not available, try to extract from the message message = getattr(event, "message", "") or "" step_match = re.search(r"step[:\s]+(\d+)", message.lower()) return step_match.group(1) if step_match else None @@ -83,8 +81,7 @@ def finetune_price_to_dollars(price: float) -> float: Returns: float: Price in dollars """ - # Convert from nanodollars (1e-9 dollars) to dollars - return price / 1e9 + return price / NANODOLLAR def convert_bytes(num: float) -> str | None: From f1e55953c69e83f62559e110e4f8fa715c32963b Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Mon, 10 Mar 2025 14:43:57 +0100 Subject: [PATCH 06/11] fixes --- src/together/cli/api/finetune.py | 15 ++++----- src/together/legacy/finetune.py | 2 +- src/together/resources/finetune.py | 53 +++++++++++++++++------------- src/together/types/__init__.py | 2 -- src/together/types/finetune.py | 13 +------- src/together/utils/tools.py | 23 +++++++------ 6 files changed, 53 insertions(+), 55 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index fb09289d..8aa98269 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -137,7 +137,8 @@ def fine_tuning(ctx: click.Context) -> None: "--from-checkpoint", type=str, default=None, - help="The checkpoint to be used in the fine-tuning. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. " + help="The checkpoint identifier to continue training from a previous fine-tuning job. " + "The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. " "The step value is optional, without it the final checkpoint will be used.", ) def create( @@ -369,12 +370,10 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: """List available checkpoints for a fine-tuning job""" client: Together = ctx.obj - response = client.fine_tuning.list_checkpoints(fine_tune_id) - - response.data = response.data or [] + checkpoints = client.fine_tuning.list_checkpoints(fine_tune_id) display_list = [] - for checkpoint in response.data: + for checkpoint in checkpoints: display_list.append( { "Type": checkpoint.type, @@ -384,7 +383,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: ) if display_list: - click.echo(f"This job contains these checkpoints:") + click.echo(f"Job {fine_tune_id} contains the following checkpoints:") table = tabulate(display_list, headers="keys", tablefmt="grid") click.echo(table) click.echo("\nTo download a checkpoint, use cmd: together fine-tuning download") @@ -406,7 +405,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: "--checkpoint-step", type=int, required=False, - default=-1, + default=None, help="Download fine-tuning checkpoint. Defaults to latest.", ) @click.option( @@ -420,7 +419,7 @@ def download( ctx: click.Context, fine_tune_id: str, output_dir: str, - checkpoint_step: int, + checkpoint_step: int | None, checkpoint_type: DownloadCheckpointType, ) -> None: """Download fine-tuning checkpoint""" diff --git a/src/together/legacy/finetune.py b/src/together/legacy/finetune.py index fe53be0e..a8a973bb 100644 --- a/src/together/legacy/finetune.py +++ b/src/together/legacy/finetune.py @@ -161,7 +161,7 @@ def download( cls, fine_tune_id: str, output: str | None = None, - step: int = -1, + step: int | None = None, ) -> Dict[str, Any]: """Legacy finetuning download function.""" diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 8de46d3c..6fb127d3 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -24,7 +24,6 @@ FinetuneLRScheduler, FinetuneLinearLRSchedulerArgs, FinetuneCheckpoint, - FinetuneCheckpointList, ) from together.types.finetune import DownloadCheckpointType, FinetuneEventType from together.utils import ( @@ -34,6 +33,8 @@ get_event_step, ) +_FT_JOB_REGEX = r"^ft-[\dabcdef-]+:\d+$" + def createFinetuneRequest( model_limits: FinetuneTrainingLimits, @@ -383,7 +384,7 @@ def list_events(self, id: str) -> FinetuneListEvents: return FinetuneListEvents(**response.data) - def list_checkpoints(self, id: str) -> FinetuneCheckpointList: + def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: """ List available checkpoints for a fine-tuning job @@ -391,7 +392,7 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: id (str): Unique identifier of the fine-tune job to list checkpoints for Returns: - FinetuneCheckpointList: Object containing list of available checkpoints + List[FinetuneCheckpoint]: List of available checkpoints """ events = self.list_events(id).data or [] @@ -414,20 +415,27 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: ) elif event_type == FinetuneEventType.JOB_COMPLETE: formatted_time = format_event_timestamp(event) - is_lora = hasattr(event, "adapter_path") - - checkpoints.append( - FinetuneCheckpoint( - type="Final Merged" if is_lora else "Final", - timestamp=formatted_time, - name=id, + if hasattr(event, "model_path"): + checkpoints.append( + FinetuneCheckpoint( + type=( + "Final Merged" + if hasattr(event, "adapter_path") + else "Final" + ), + timestamp=formatted_time, + name=id, + ) ) - ) - if is_lora: + if hasattr(event, "adapter_path"): checkpoints.append( FinetuneCheckpoint( - type="Final Adapter", + type=( + "Final Adapter" + if hasattr(event, "model_path") + else "Final" + ), timestamp=formatted_time, name=id, ) @@ -436,14 +444,14 @@ def list_checkpoints(self, id: str) -> FinetuneCheckpointList: # Sort by timestamp (newest first) checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - return FinetuneCheckpointList(object="list", data=checkpoints) + return checkpoints def download( self, id: str, *, output: Path | str | None = None, - checkpoint_step: int = -1, + checkpoint_step: int | None = None, checkpoint_type: DownloadCheckpointType = DownloadCheckpointType.DEFAULT, ) -> FinetuneDownloadResult: """ @@ -464,18 +472,19 @@ def download( FinetuneDownloadResult: Object containing downloaded model metadata """ - if re.match(r"^ft-[\dabcde-]+\:\d+$", id) is not None: - if checkpoint_step == -1: + if re.match(_FT_JOB_REGEX, id) is not None: + if checkpoint_step is None: checkpoint_step = int(id.split(":")[1]) id = id.split(":")[0] else: raise ValueError( - "Invalid fine-tune ID. Don't use `checkpoint_step` with a colon in the ID." + "Fine-tuning job ID {id} contains a colon to specify the step to download, but `checkpoint_step` " + "was also set. Remove one of the step specifiers to proceed." ) url = f"finetune/download?ft_id={id}" - if checkpoint_step > 0: + if checkpoint_step is not None: url += f"&checkpoint_step={checkpoint_step}" ft_job = self.retrieve(id) @@ -789,7 +798,7 @@ async def list_events(self, id: str) -> FinetuneListEvents: return events_list - async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: + async def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: """ List available checkpoints for a fine-tuning job @@ -797,7 +806,7 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: id (str): Unique identifier of the fine-tune job to list checkpoints for Returns: - FinetuneCheckpointList: Object containing list of available checkpoints + List[FinetuneCheckpoint]: Object containing list of available checkpoints """ events_list = await self.list_events(id) events = events_list.data or [] @@ -843,7 +852,7 @@ async def list_checkpoints(self, id: str) -> FinetuneCheckpointList: # Sort by timestamp (newest first) checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - return FinetuneCheckpointList(object="list", data=checkpoints) + return checkpoints async def download( self, id: str, *, output: str | None = None, checkpoint_step: int = -1 diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index 33b1ed23..1a7419a5 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -32,7 +32,6 @@ ) from together.types.finetune import ( FinetuneCheckpoint, - FinetuneCheckpointList, FinetuneDownloadResult, FinetuneLinearLRSchedulerArgs, FinetuneList, @@ -62,7 +61,6 @@ "EmbeddingRequest", "EmbeddingResponse", "FinetuneCheckpoint", - "FinetuneCheckpointList", "FinetuneRequest", "FinetuneResponse", "FinetuneList", diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index fc5f5257..e3811292 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -327,7 +327,7 @@ class FinetuneLinearLRSchedulerArgs(BaseModel): class FinetuneCheckpoint(BaseModel): """ - Fine-tune checkpoint information + Fine-tuning checkpoint information """ # checkpoint type (e.g. "Intermediate", "Final", "Final Merged", "Final Adapter") @@ -336,14 +336,3 @@ class FinetuneCheckpoint(BaseModel): timestamp: str # checkpoint name/identifier name: str - - -class FinetuneCheckpointList(BaseModel): - """ - List of fine-tune checkpoints - """ - - # object type - object: Literal["list"] | None = None - # list of fine-tune checkpoint objects - data: List[FinetuneCheckpoint] | None = None diff --git a/src/together/utils/tools.py b/src/together/utils/tools.py index de237860..cf860f1a 100644 --- a/src/together/utils/tools.py +++ b/src/together/utils/tools.py @@ -26,10 +26,10 @@ def normalize_key(key: str) -> str: def parse_timestamp(timestamp: str) -> datetime | None: - """Parse a timestamp string into a datetime object or None if invalid. + """Parse a timestamp string into a datetime object or None if the string is empty. Args: - timestamp (str): Timestamp in ISO 8601 format (e.g. "2021-01-01T00:00:00Z") + timestamp (str): Timestamp Returns: datetime | None: Parsed datetime, or None if the string is empty @@ -37,7 +37,14 @@ def parse_timestamp(timestamp: str) -> datetime | None: if timestamp == "": return None - return datetime.fromisoformat(timestamp.replace("Z", "+00:00")) + formats = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ"] + for fmt in formats: + try: + return datetime.strptime(timestamp, fmt) + except ValueError: + continue + + raise ValueError("Timestamp does not match any expected format") def format_event_timestamp(event: Any) -> str: @@ -65,18 +72,14 @@ def get_event_step(event: Any) -> str | None: step = getattr(event, "step", None) if step is not None: return str(step) - - message = getattr(event, "message", "") or "" - step_match = re.search(r"step[:\s]+(\d+)", message.lower()) - return step_match.group(1) if step_match else None + return None -# Convert fine-tune nano-dollar price to dollars def finetune_price_to_dollars(price: float) -> float: - """Convert fine-tune price to dollars + """Convert fine-tuning job price to dollars Args: - price (float): Fine-tune price in billing units + price (float): Fine-tuning job price in billing units Returns: float: Price in dollars From bad1787e2558bf63c8f019af44a47323bf676a61 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 11 Mar 2025 13:14:14 +0100 Subject: [PATCH 07/11] pr feedback --- src/together/cli/api/finetune.py | 7 +++--- src/together/resources/finetune.py | 36 +++++++++++++++++------------- src/together/utils/__init__.py | 4 ++-- src/together/utils/tools.py | 12 +++++----- 4 files changed, 33 insertions(+), 26 deletions(-) diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 8aa98269..ad81339d 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -18,8 +18,7 @@ log_warn, log_warn_once, parse_timestamp, - format_event_timestamp, - get_event_step, + format_timestamp, ) from together.types.finetune import ( DownloadCheckpointType, @@ -377,7 +376,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: display_list.append( { "Type": checkpoint.type, - "Timestamp": checkpoint.timestamp, + "Timestamp": format_timestamp(checkpoint.timestamp), "Name": checkpoint.name, } ) @@ -386,7 +385,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None: click.echo(f"Job {fine_tune_id} contains the following checkpoints:") table = tabulate(display_list, headers="keys", tablefmt="grid") click.echo(table) - click.echo("\nTo download a checkpoint, use cmd: together fine-tuning download") + click.echo("\nTo download a checkpoint, use `together fine-tuning download`") else: click.echo(f"No checkpoints found for job {fine_tune_id}") diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 6fb127d3..62a427f9 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -29,11 +29,10 @@ from together.utils import ( log_warn_once, normalize_key, - format_event_timestamp, get_event_step, ) -_FT_JOB_REGEX = r"^ft-[\dabcdef-]+:\d+$" +_FT_JOB_WITH_STEP_REGEX = r"^ft-[\dabcdef-]+:\d+$" def createFinetuneRequest( @@ -220,6 +219,8 @@ def create( (Instruction format), inputs will be masked. Defaults to "auto". from_checkpoint (str, optional): The checkpoint to be used in the fine-tuning. + The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. + The step value is optional, without it the final checkpoint will be used. Returns: FinetuneResponse: Object containing information about fine-tuning job. @@ -402,19 +403,21 @@ def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: event_type = event.type if event_type == FinetuneEventType.CHECKPOINT_SAVE: - formatted_time = format_event_timestamp(event) step = get_event_step(event) - checkpoint_name = f"{id}:{step}" if step else id + checkpoint_name = f"{id}:{step}" if step is not None else id checkpoints.append( FinetuneCheckpoint( - type="Intermediate", - timestamp=formatted_time, + type=( + f"Intermediate (step {step})" + if step is not None + else "Intermediate" + ), + timestamp=event.created_at, name=checkpoint_name, ) ) elif event_type == FinetuneEventType.JOB_COMPLETE: - formatted_time = format_event_timestamp(event) if hasattr(event, "model_path"): checkpoints.append( FinetuneCheckpoint( @@ -423,7 +426,7 @@ def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: if hasattr(event, "adapter_path") else "Final" ), - timestamp=formatted_time, + timestamp=event.created_at, name=id, ) ) @@ -436,7 +439,7 @@ def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: if hasattr(event, "model_path") else "Final" ), - timestamp=formatted_time, + timestamp=event.created_at, name=id, ) ) @@ -472,7 +475,7 @@ def download( FinetuneDownloadResult: Object containing downloaded model metadata """ - if re.match(_FT_JOB_REGEX, id) is not None: + if re.match(_FT_JOB_WITH_STEP_REGEX, id) is not None: if checkpoint_step is None: checkpoint_step = int(id.split(":")[1]) id = id.split(":")[0] @@ -588,6 +591,7 @@ async def create( verbose: bool = False, model_limits: FinetuneTrainingLimits | None = None, train_on_inputs: bool | Literal["auto"] = "auto", + from_checkpoint: str | None = None, ) -> FinetuneResponse: """ Async method to initiate a fine-tuning job @@ -633,6 +637,9 @@ async def create( For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields (Instruction format), inputs will be masked. Defaults to "auto". + from_checkpoint (str, optional): The checkpoint to be used in the fine-tuning. + The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. + The step value is optional, without it the final checkpoint will be used. Returns: FinetuneResponse: Object containing information about fine-tuning job. @@ -670,6 +677,7 @@ async def create( wandb_project_name=wandb_project_name, wandb_name=wandb_name, train_on_inputs=train_on_inputs, + from_checkpoint=from_checkpoint, ) if verbose: @@ -817,25 +825,23 @@ async def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: event_type = event.type if event_type == FinetuneEventType.CHECKPOINT_SAVE: - formatted_time = format_event_timestamp(event) step = get_event_step(event) checkpoint_name = f"{id}:{step}" if step else id checkpoints.append( FinetuneCheckpoint( type="Intermediate", - timestamp=formatted_time, + timestamp=event.created_at, name=checkpoint_name, ) ) elif event_type == FinetuneEventType.JOB_COMPLETE: - formatted_time = format_event_timestamp(event) is_lora = hasattr(event, "adapter_path") checkpoints.append( FinetuneCheckpoint( type="Final Merged" if is_lora else "Final", - timestamp=formatted_time, + timestamp=event.created_at, name=id, ) ) @@ -844,7 +850,7 @@ async def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: checkpoints.append( FinetuneCheckpoint( type="Final Adapter", - timestamp=formatted_time, + timestamp=event.created_at, name=id, ) ) diff --git a/src/together/utils/__init__.py b/src/together/utils/__init__.py index f2109569..a9e88c3b 100644 --- a/src/together/utils/__init__.py +++ b/src/together/utils/__init__.py @@ -8,7 +8,7 @@ finetune_price_to_dollars, normalize_key, parse_timestamp, - format_event_timestamp, + format_timestamp, get_event_step, ) @@ -25,7 +25,7 @@ "enforce_trailing_slash", "normalize_key", "parse_timestamp", - "format_event_timestamp", + "format_timestamp", "get_event_step", "finetune_price_to_dollars", "convert_bytes", diff --git a/src/together/utils/tools.py b/src/together/utils/tools.py index cf860f1a..2e84307a 100644 --- a/src/together/utils/tools.py +++ b/src/together/utils/tools.py @@ -47,17 +47,19 @@ def parse_timestamp(timestamp: str) -> datetime | None: raise ValueError("Timestamp does not match any expected format") -def format_event_timestamp(event: Any) -> str: - """Format event timestamp to a readable date string. +def format_timestamp(timestamp_str: str) -> str: + """Format timestamp to a readable date string. Args: - event: An event object with a created_at attribute + timestamp: A timestamp string Returns: str: Formatted timestamp string (MM/DD/YYYY, HH:MM AM/PM) """ - timestamp = parse_timestamp(event.created_at or "") - return timestamp.strftime("%m/%d/%Y, %I:%M %p") if timestamp else "" + timestamp = parse_timestamp(timestamp_str) + if timestamp is None: + return "" + return timestamp.strftime("%m/%d/%Y, %I:%M %p") def get_event_step(event: Any) -> str | None: From 42a94257ab88b13bc0101e38b15adc51031fef36 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 11 Mar 2025 13:21:55 +0100 Subject: [PATCH 08/11] remove repetion --- src/together/resources/finetune.py | 165 +++++++++++++---------------- 1 file changed, 71 insertions(+), 94 deletions(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 62a427f9..495a2423 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -25,7 +25,11 @@ FinetuneLinearLRSchedulerArgs, FinetuneCheckpoint, ) -from together.types.finetune import DownloadCheckpointType, FinetuneEventType +from together.types.finetune import ( + DownloadCheckpointType, + FinetuneEventType, + FinetuneEvent, +) from together.utils import ( log_warn_once, normalize_key, @@ -140,6 +144,70 @@ def createFinetuneRequest( return finetune_request +def _process_checkpoints_from_events( + events: List[FinetuneEvent], id: str +) -> List[FinetuneCheckpoint]: + """ + Helper function to process events and create checkpoint list. + + Args: + events (List[FinetuneEvent]): List of fine-tune events to process + id (str): Fine-tune job ID + + Returns: + List[FinetuneCheckpoint]: List of available checkpoints + """ + checkpoints: List[FinetuneCheckpoint] = [] + + for event in events: + event_type = event.type + + if event_type == FinetuneEventType.CHECKPOINT_SAVE: + step = get_event_step(event) + checkpoint_name = f"{id}:{step}" if step is not None else id + + checkpoints.append( + FinetuneCheckpoint( + type=( + f"Intermediate (step {step})" + if step is not None + else "Intermediate" + ), + timestamp=event.created_at, + name=checkpoint_name, + ) + ) + elif event_type == FinetuneEventType.JOB_COMPLETE: + if hasattr(event, "model_path"): + checkpoints.append( + FinetuneCheckpoint( + type=( + "Final Merged" + if hasattr(event, "adapter_path") + else "Final" + ), + timestamp=event.created_at, + name=id, + ) + ) + + if hasattr(event, "adapter_path"): + checkpoints.append( + FinetuneCheckpoint( + type=( + "Final Adapter" if hasattr(event, "model_path") else "Final" + ), + timestamp=event.created_at, + name=id, + ) + ) + + # Sort by timestamp (newest first) + checkpoints.sort(key=lambda x: x.timestamp, reverse=True) + + return checkpoints + + class FineTuning: def __init__(self, client: TogetherClient) -> None: self._client = client @@ -396,58 +464,7 @@ def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: List[FinetuneCheckpoint]: List of available checkpoints """ events = self.list_events(id).data or [] - - checkpoints: List[FinetuneCheckpoint] = [] - - for event in events: - event_type = event.type - - if event_type == FinetuneEventType.CHECKPOINT_SAVE: - step = get_event_step(event) - checkpoint_name = f"{id}:{step}" if step is not None else id - - checkpoints.append( - FinetuneCheckpoint( - type=( - f"Intermediate (step {step})" - if step is not None - else "Intermediate" - ), - timestamp=event.created_at, - name=checkpoint_name, - ) - ) - elif event_type == FinetuneEventType.JOB_COMPLETE: - if hasattr(event, "model_path"): - checkpoints.append( - FinetuneCheckpoint( - type=( - "Final Merged" - if hasattr(event, "adapter_path") - else "Final" - ), - timestamp=event.created_at, - name=id, - ) - ) - - if hasattr(event, "adapter_path"): - checkpoints.append( - FinetuneCheckpoint( - type=( - "Final Adapter" - if hasattr(event, "model_path") - else "Final" - ), - timestamp=event.created_at, - name=id, - ) - ) - - # Sort by timestamp (newest first) - checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - - return checkpoints + return _process_checkpoints_from_events(events, id) def download( self, @@ -818,47 +835,7 @@ async def list_checkpoints(self, id: str) -> List[FinetuneCheckpoint]: """ events_list = await self.list_events(id) events = events_list.data or [] - - checkpoints: List[FinetuneCheckpoint] = [] - - for event in events: - event_type = event.type - - if event_type == FinetuneEventType.CHECKPOINT_SAVE: - step = get_event_step(event) - checkpoint_name = f"{id}:{step}" if step else id - - checkpoints.append( - FinetuneCheckpoint( - type="Intermediate", - timestamp=event.created_at, - name=checkpoint_name, - ) - ) - elif event_type == FinetuneEventType.JOB_COMPLETE: - is_lora = hasattr(event, "adapter_path") - - checkpoints.append( - FinetuneCheckpoint( - type="Final Merged" if is_lora else "Final", - timestamp=event.created_at, - name=id, - ) - ) - - if is_lora: - checkpoints.append( - FinetuneCheckpoint( - type="Final Adapter", - timestamp=event.created_at, - name=id, - ) - ) - - # Sort by timestamp (newest first) - checkpoints.sort(key=lambda x: x.timestamp, reverse=True) - - return checkpoints + return _process_checkpoints_from_events(events, id) async def download( self, id: str, *, output: str | None = None, checkpoint_step: int = -1 From 57fef4e83ddb1ca5ed7e1b3aa98b0c931ad96e7a Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 11 Mar 2025 14:40:20 +0100 Subject: [PATCH 09/11] fix --- src/together/resources/finetune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 495a2423..11d445db 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -286,7 +286,7 @@ def create( For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields (Instruction format), inputs will be masked. Defaults to "auto". - from_checkpoint (str, optional): The checkpoint to be used in the fine-tuning. + from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. @@ -654,7 +654,7 @@ async def create( For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields (Instruction format), inputs will be masked. Defaults to "auto". - from_checkpoint (str, optional): The checkpoint to be used in the fine-tuning. + from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. From 13b5d4a9d7e0215f664d3e4652fdcbf3356d7570 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 11 Mar 2025 15:35:20 +0100 Subject: [PATCH 10/11] style changes --- README.md | 4 ++-- src/together/types/endpoints.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 408da2fd..a54430b0 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ print(response.choices[0].message.content) response = client.chat.completions.create( model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", messages=[{ - "role": "user", + "role": "user", "content": [ { "type": "text", @@ -91,7 +91,7 @@ response = client.chat.completions.create( "role": "user", "content": [ { - "type": "text", + "type": "text", "text": "Compare these two images." }, { diff --git a/src/together/types/endpoints.py b/src/together/types/endpoints.py index 3f52831a..0db1de21 100644 --- a/src/together/types/endpoints.py +++ b/src/together/types/endpoints.py @@ -86,9 +86,9 @@ class BaseEndpoint(TogetherJSONModel): model: str = Field(description="The model deployed on this endpoint") type: str = Field(description="The type of endpoint") owner: str = Field(description="The owner of this endpoint") - state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "FAILED", "ERROR"] = ( - Field(description="Current state of the endpoint") - ) + state: Literal[ + "PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "FAILED", "ERROR" + ] = Field(description="Current state of the endpoint") created_at: datetime = Field(description="Timestamp when the endpoint was created") From 05d85d358945a265b17725068195be81d1935c0a Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 11 Mar 2025 15:35:41 +0100 Subject: [PATCH 11/11] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a77fb7f7..e0877f97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.4.2" +version = "1.4.3" authors = [ "Together AI " ]