stanfordnlp
diff --git a/‎dspy/clients/finetune.py‎
Lines changed: 21 additions & 20 deletions b/‎dspy/clients/finetune.py‎
Lines changed: 21 additions & 20 deletions
diff --git a/‎dspy/clients/lm_finetune_utils.py‎
Lines changed: 5 additions & 10 deletions b/‎dspy/clients/lm_finetune_utils.py‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎dspy/clients/openai.py‎
Lines changed: 32 additions & 75 deletions b/‎dspy/clients/openai.py‎
Lines changed: 32 additions & 75 deletions
@@ -1,11 +1,11 @@
+import os
 from abc import abstractmethod
 from concurrent.futures import Future
 from enum import Enum
-import os
 from pathlib import Path
-from typing import List, Dict, Any, Optional
-import ujson
+from typing import Any, Dict, List, Optional
 
+import ujson
 from datasets.fingerprint import Hasher
 
 
@@ -14,7 +14,7 @@ def get_finetune_directory() -> str:
     # TODO: Move to a centralized location with all the other env variables
     dspy_cachedir = os.environ.get("DSPY_CACHEDIR")
     dspy_cachedir = dspy_cachedir or os.path.join(Path.home(), ".dspy_cache")
-    finetune_dir = os.path.join(dspy_cachedir, 'finetune')
+    finetune_dir = os.path.join(dspy_cachedir, "finetune")
     finetune_dir = os.path.abspath(finetune_dir)
     return finetune_dir
 
@@ -24,17 +24,19 @@ def get_finetune_directory() -> str:
 
 class TrainingMethod(str, Enum):
     """Enum class for training methods.
-    
+
     When comparing enums, Python checks for object IDs, which means that the
     enums can't be compared directly. Subclassing the Enum class along with the
     str class allows for direct comparison of the enums.
     """
+
     SFT = "SFT"
     Preference = "Preference"
 
 
 class TrainingStatus(str, Enum):
     """Enum class for remote training status."""
+
     not_started = "not_started"
     pending = "pending"
     running = "running"
@@ -49,12 +51,13 @@ class TrainingStatus(str, Enum):
     TrainingMethod.Preference: ["prompt", "chosen", "rejected"],
 }
 
-class FinetuneJob(Future):
 
-    def __init__(self,
+class FinetuneJob(Future):
+    def __init__(
+        self,
         model: str,
         train_data: List[Dict[str, Any]],
-        train_kwargs: Optional[Dict[str, Any]]=None,
+        train_kwargs: Optional[Dict[str, Any]] = None,
         train_method: TrainingMethod = TrainingMethod.SFT,
         provider: str = "openai",
     ):
@@ -64,7 +67,7 @@ def __init__(self,
         self.train_method = train_method
         self.provider = provider
         super().__init__()
-    
+
     def get_kwargs(self):
         return dict(
             model=self.model,
@@ -89,26 +92,24 @@ def status(self):
         raise NotImplementedError("Method `status` is not implemented.")
 
 
-def validate_finetune_data(
-        data: List[Dict[str, Any]],
-        train_method: TrainingMethod
-    ) -> Optional[AssertionError]:
+def validate_finetune_data(data: List[Dict[str, Any]], train_method: TrainingMethod) -> Optional[AssertionError]:
     """Validate the finetune data based on the training method."""
     # Get the required data keys for the training method
     required_keys = TRAINING_METHOD_TO_DATA_KEYS[train_method]
 
     # Check if the training data has the required keys
     for ind, data_dict in enumerate(data):
-        err_msg = f"The datapoint at index {ind} is missing the keys required for {train_method} training."
-        err_msg = f"\n    Expected: {required_keys}"
-        err_msg = f"\n    Found: {data_dict.keys()}"
-        assert all([key in data_dict for key in required_keys]), err_msg
+        if not all([key in data_dict for key in required_keys]):
+            raise ValueError(
+                f"The datapoint at index {ind} is missing the keys required for {train_method} training. Expected: "
+                f"{required_keys}, Found: {data_dict.keys()}"
+            )
 
 
 def save_data(
-        data: List[Dict[str, Any]],
-        provider_name: Optional[str]=None,
-    ) -> str:
+    data: List[Dict[str, Any]],
+    provider_name: Optional[str] = None,
+) -> str:
     """Save the fine-tuning data to a file."""
     # Construct the file name based on the data hash
     hash = Hasher.hash(data)
 
@@ -1,10 +1,9 @@
 from typing import Any, Dict, List, Optional, Type, Union
 
-from dspy.utils.logging import logger
+from dspy.clients.anyscale import FinetuneJobAnyScale, finetune_anyscale
 from dspy.clients.finetune import FinetuneJob, TrainingMethod
 from dspy.clients.openai import FinetuneJobOpenAI, finetune_openai
-from dspy.clients.anyscale import FinetuneJobAnyScale, finetune_anyscale
-
+from dspy.utils.logging import logger
 
 _PROVIDER_ANYSCALE = "anyscale"
 _PROVIDER_OPENAI = "openai"
@@ -30,11 +29,7 @@ def get_provider_finetune_function(provider: str) -> callable:
 
 # Note: Type of LM should be LM. We aren't importing it here to avoid
 # circular imports.
-def execute_finetune_job(
-    job: FinetuneJob,
-    lm: Any,
-    cache_finetune: bool=True
-):
+def execute_finetune_job(job: FinetuneJob, lm: Any, cache_finetune: bool = True):
     """Execute the finetune job in a blocking manner."""
     try:
         job_kwargs = job.get_kwargs()
@@ -54,7 +49,7 @@ def cached_finetune(
     job,
     model: str,
     train_data: List[Dict[str, Any]],
-    train_kwargs: Optional[Dict[str, Any]]=None,
+    train_kwargs: Optional[Dict[str, Any]] = None,
     train_method: TrainingMethod = TrainingMethod.SFT,
     provider: str = "openai",
 ) -> Union[str, Exception]:
@@ -72,7 +67,7 @@ def finetune(
     job,
     model: str,
     train_data: List[Dict[str, Any]],
-    train_kwargs: Optional[Dict[str, Any]]=None,
+    train_kwargs: Optional[Dict[str, Any]] = None,
     train_method: TrainingMethod = TrainingMethod.SFT,
     provider: str = "openai",
 ) -> Union[str, Exception]:
 
@@ -1,74 +1,34 @@
-from collections import defaultdict
 import re
 import time
+from collections import defaultdict
 from typing import Any, Dict, List, Optional, Union
 
 import openai
 
-from dspy.utils.logging import logger
 from dspy.clients.finetune import (
     FinetuneJob,
     TrainingMethod,
     TrainingStatus,
-    validate_finetune_data,
     save_data,
+    validate_finetune_data,
 )
+from dspy.utils.logging import logger
 
 # Provider name
 PROVIDER_OPENAI = "openai"
 
-# List of model IDs
-_MODEL_IDS = [
-    "gpt-4o",
-    "gpt-4o-2024-08-06",
-    "gpt-4o-2024-05-13",
-    "chatgpt-4o-latest",
-    "gpt-4o-mini",
-    "gpt-4o-mini-2024-07-18",
-    "gpt-4o-realtime-preview",
-    "gpt-4o-realtime-preview-2024-10-01",
-    "o1-preview",
-    "o1-preview-2024-09-12",
-    "o1-mini",
-    "o1-mini-2024-09-12",
-    "gpt-4-turbo",
-    "gpt-4-turbo-2024-04-09",
-    "gpt-4-turbo-preview",
-    "gpt-4-0125-preview",
-    "gpt-4-1106-preview",
-    "gpt-4",
-    "gpt-4-0613",
-    "gpt-4-0314",
-    "gpt-3.5-turbo-0125",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-1106",
-    "gpt-3.5-turbo-instruct",
-    "dall-e-3",
-    "dall-e-2",
-    "tts-1",
-    "tts-1-hd",
-    "text-embedding-3-large",
-    "text-embedding-3-small",
-    "text-embedding-ada-002",
-    "omni-moderation-latest",
-    "omni-moderation-2024-09-26",
-    "text-moderation-latest",
-    "text-moderation-stable",
-    "text-moderation-007",
-    "babbage-002",
-    "davinci-002"
-]
-
 
 def is_openai_model(model: str) -> bool:
     """Check if the model is an OpenAI model."""
     # Filter the provider_prefix, if exists
     provider_prefix = f"{PROVIDER_OPENAI}/"
     if model.startswith(provider_prefix):
-        model = model[len(provider_prefix):]
+        model = model[len(provider_prefix) :]
 
+    client = openai.OpenAI()
+    valid_model_names = [model.id for model in client.models.list().data]
     # Check if the model is a base OpenAI model
-    if model in _MODEL_IDS:
+    if model in valid_model_names:
         return True
 
     # Check if the model is a fine-tuned OpneAI model. Fine-tuned OpenAI models
@@ -77,15 +37,15 @@ def is_openai_model(model: str) -> bool:
     # base model name.
     # TODO: This part can be updated to match the actual fine-tuned model names
     # by making a call to the OpenAI API to be more exact, but this might
-    # require an API key with the right permissions. 
+    # require an API key with the right permissions.
     match = re.match(r"ft:([^:]+):", model)
-    if match and match.group(1) in _MODEL_IDS:
+    if match and match.group(1) in valid_model_names:
         return True
 
     return False
 
-class FinetuneJobOpenAI(FinetuneJob):
 
+class FinetuneJobOpenAI(FinetuneJob):
     def __init__(self, *args, **kwargs):
         self.provider_file_id = None  # TODO: Can we get this using the job_id?
         self.provider_job_id = None
@@ -118,12 +78,12 @@ def status(self) -> TrainingStatus:
 
 
 def finetune_openai(
-        job: FinetuneJobOpenAI,
-        model: str,
-        train_data: List[Dict[str, Any]],
-        train_kwargs: Optional[Dict[str, Any]]=None,
-        train_method: TrainingMethod = TrainingMethod.SFT,
-    ) -> str:
+    job: FinetuneJobOpenAI,
+    model: str,
+    train_data: List[Dict[str, Any]],
+    train_kwargs: Optional[Dict[str, Any]] = None,
+    train_method: TrainingMethod = TrainingMethod.SFT,
+) -> str:
     train_kwargs = train_kwargs or {}
     train_method = TrainingMethod.SFT  # Note: This could be an argument; ignoring method
 
@@ -171,10 +131,12 @@ def finetune_openai(
 
     return model
 
+
 _SUPPORTED_TRAINING_METHODS = [
     TrainingMethod.SFT,
 ]
 
+
 def _get_training_status(job_id: str) -> Union[TrainingStatus, Exception]:
     # TODO: Should this type be shared across all fine-tune clients?
     provider_status_to_training_status = {
@@ -228,10 +190,7 @@ def _is_terminal_training_status(status: TrainingStatus) -> bool:
     ]
 
 
-def _validate_data(
-        data: Dict[str, str],
-        train_method: TrainingMethod
-    ) -> Optional[Exception]:
+def _validate_data(data: Dict[str, str], train_method: TrainingMethod) -> Optional[Exception]:
     # Check if this train method is supported
     if train_method not in _SUPPORTED_TRAINING_METHODS:
         err_msg = f"OpenAI does not support the training method {train_method}."
@@ -241,20 +200,17 @@ def _validate_data(
 
 
 def _convert_data(
-        data: List[Dict[str, str]],
-        system_prompt: Optional[str]=None,
-    ) -> Union[List[Dict[str, Any]], Exception]:
+    data: List[Dict[str, str]],
+    system_prompt: Optional[str] = None,
+) -> Union[List[Dict[str, Any]], Exception]:
     # Item-wise conversion function
     def _row_converter(d):
-        messages = [
-            {"role": "user", "content": d["prompt"]},
-            {"role": "assistant", "content": d["completion"]}
-        ]
+        messages = [{"role": "user", "content": d["prompt"]}, {"role": "assistant", "content": d["completion"]}]
         if system_prompt:
             messages.insert(0, {"role": "system", "content": system_prompt})
         messages_dict = {"messages": messages}
         return messages_dict
-    
+
     # Convert the data to the OpenAI format; validate the converted data
     converted_data = list(map(_row_converter, data))
     openai_data_validation(converted_data)
@@ -270,11 +226,7 @@ def _upload_data(data_path: str) -> str:
     return provider_file.id
 
 
-def _start_remote_training(
-        train_file_id: str,
-        model: id,
-        train_kwargs: Optional[Dict[str, Any]]=None
-    ) -> str:
+def _start_remote_training(train_file_id: str, model: id, train_kwargs: Optional[Dict[str, Any]] = None) -> str:
     train_kwargs = train_kwargs or {}
     provider_job = openai.fine_tuning.jobs.create(
         model=model,
@@ -286,7 +238,7 @@ def _start_remote_training(
 
 def _wait_for_job(
     job: FinetuneJobOpenAI,
-    poll_frequency: int=60,
+    poll_frequency: int = 60,
 ):
     while not _is_terminal_training_status(job.status()):
         time.sleep(poll_frequency)
@@ -304,6 +256,7 @@ def _get_trained_model(job):
     finetuned_model = provider_job.fine_tuned_model
     return finetuned_model
 
+
 # Adapted from https://cookbook.openai.com/examples/chat_finetuning_data_prep
 def openai_data_validation(dataset: List[dict[str, Any]]):
     format_errors = defaultdict(int)
@@ -364,7 +317,9 @@ def check_message_lengths(dataset: List[dict[str, Any]]) -> list[int]:
     n_too_long = sum([length > 16385 for length in convo_lens])
 
     if n_too_long > 0:
-        logger.info(f"There are {n_too_long} examples that may be over the 16,385 token limit, they will be truncated during fine-tuning.")
+        logger.info(
+            f"There are {n_too_long} examples that may be over the 16,385 token limit, they will be truncated during fine-tuning."
+        )
 
     if n_missing_system > 0:
         logger.info(f"There are {n_missing_system} examples that are missing a system message.")
@@ -377,6 +332,7 @@ def check_message_lengths(dataset: List[dict[str, Any]]) -> list[int]:
 
 def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
     import tiktoken
+
     encoding = tiktoken.get_encoding("cl100k_base")
 
     num_tokens = 0
@@ -392,6 +348,7 @@ def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
 
 def num_assistant_tokens_from_messages(messages):
     import tiktoken
+
     encoding = tiktoken.get_encoding("cl100k_base")
 
     num_tokens = 0