[0.5.0] add finetuning API

yashbonde · yashbonde · commit 99517a863396 · 2024-08-11T16:16:08.000+05:30
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -7,6 +7,26 @@ minor versions.
 
 All relevant steps to be taken will be mentioned here.
 
+0.5.0 **(breaking)**
+--------------------
+
+In this release we have moved all the Tune Studio specific API out of ``tuneapi.apis`` to ``tuneapi.endpoints`` to avoid
+cluttering the ``apis`` namespace.
+
+.. code-block:: patch
+
+    - from tuneapi import apis as ta
+    + from tuneapi import endpoints as te
+    ...
+    - ta.ThreadsAPI(...)
+    + te.ThreadsAPI(...)
+
+- Add support for finetuning APIs with ``tuneapi.endpoints.FinetuningAPI``
+- Primary environment variables have been changed from ``TUNE_API_KEY`` to ``TUNEAPI_TOKEN`` and from ``TUNE_ORG_ID``
+  to ``TUNEORG_ID``, if you were using these please update your environment variables
+- Removed CLI methods ``test_models`` and ``benchmark_models``, if you want to use those, please copy the code from
+  `this commit <https://github.com/NimbleBoxAI/tuneapi/blob/2fabdae461f4187621fe8ffda73a58a5ab7485b0/tuneapi/apis/__init__.py#L26>`_
+
 0.4.18
 ------
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -13,7 +13,7 @@
 project = "tuneapi"
 copyright = "2024, Frello Technologies"
 author = "Frello Technologies"
-release = "0.4.18"
+release = "0.5.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tuneapi"
-version = "0.4.18"
+version = "0.5.0"
 description = "Tune AI APIs."
 authors = ["Frello Technology Private Limited <engineering@nimblebox.ai>"]
 license = "MIT"
diff --git a/tuneapi/__init__.py b/tuneapi/__init__.py
@@ -1,3 +1,3 @@
 # Copyright © 2023- Frello Technology Private Limited
 
-__version__ = "0.4.18"
+__version__ = "0.5.0"
diff --git a/tuneapi/__main__.py b/tuneapi/__main__.py
@@ -2,18 +2,12 @@
 
 from fire import Fire
 
+from tuneapi import __version__
+
 
 def main():
-    from tuneapi.apis import test_models, benchmark_models
 
-    Fire(
-        {
-            "models": {
-                "test": test_models,
-                "benchmark": benchmark_models,
-            },
-        }
-    )
+    Fire({"version": __version__})
 
 
 if __name__ == "__main__":
diff --git a/tuneapi/apis/__init__.py b/tuneapi/apis/__init__.py
@@ -8,193 +8,9 @@
 from tuneapi.apis.model_mistral import Mistral
 from tuneapi.apis.model_gemini import Gemini
 
-# projectX APIs
-from tuneapi.apis.threads import ThreadsAPI
-from tuneapi.apis.assistants import AssistantsAPI
 
 # other imports
 import os
 import random
 from time import time
 from typing import List, Optional
-
-# other tuneapi modules
-import tuneapi.types as tt
-import tuneapi.utils as tu
-
-
-def test_models(thread: str | tt.Thread, models: Optional[List[str]] = None):
-    """
-    Runs thread on all the models and prints the time taken and response.
-    """
-    if os.path.exists(thread):
-        thread = tt.Thread.from_dict(tu.from_json(thread))
-
-    # get all the models
-    models_to_test = [TuneModel, Openai, Anthropic, Groq, Mistral, Gemini]
-    if models and models != "all":
-        models_to_test = []
-        for m in models:
-            models_to_test.append(globals()[m])
-
-    # run all in a loop
-    for model in models_to_test:
-        print(tu.color.bold(f"[{model.__name__}]"), end=" ", flush=True)
-        try:
-            st = time()
-            m = model()
-            out = m.chat(thread)
-            et = time()
-            print(
-                tu.color.blue(f"[{et-st:0.2f}s]"),
-                tu.color.green(f"[SUCCESS]", True),
-                out,
-            )
-        except Exception as e:
-            et = time()
-            print(
-                tu.color.blue(f"[{et-st:0.2f}s]"),
-                tu.color.red(f"[ERROR]", True),
-                str(e),
-            )
-            continue
-
-
-def benchmark_models(
-    thread: str | tt.Thread,
-    models: Optional[List[str]] = "all",
-    n: int = 20,
-    max_threads: int = 5,
-    o: str = "benchmark.csv",
-):
-    """
-    Benchmarks a thread on all the models and saves the time taken and response in a CSV file and creates matplotlib
-    histogram chart with latency and char count distribution. Runs `n` iterations for each model.
-
-    It requires `matplotlib>=3.8.2` and `pandas>=2.2.0` to be installed.
-    """
-
-    try:
-        import matplotlib.pyplot as plt
-        import pandas as pd
-    except ImportError:
-        tu.logger.error(
-            "This is a special CLI helper function. If you want to use this then run: pip install matplotlib>=3.8.2 pandas>=2.2.0"
-        )
-        raise ImportError("Please install the required packages")
-
-    # if this is a JSON then load the thread
-    if os.path.exists(thread):
-        thread = tt.Thread.from_dict(tu.from_json(thread))
-
-    # get all the models
-    models_to_test = [TuneModel, Openai, Anthropic, Groq, Mistral, Gemini]
-    if models and models != "all":
-        models_to_test = []
-        for m in models:
-            models_to_test.append(globals()[m])
-
-    # function to perform benchmarking
-    def _bench(thread, model):
-        try:
-            st = time()
-            m = model()
-            out = m.chat(thread)
-            return model.__name__, time() - st, out, False
-        except Exception as e:
-            return model.__name__, time() - st, str(e), True
-
-    # threaded map and get the results
-    inputs = []
-    for m in models_to_test:
-        for _ in range(n):
-            inputs.append((thread, m))
-    random.shuffle(inputs)
-    print(f"Total combinations: {len(inputs)}")
-    results = tu.threaded_map(
-        fn=_bench,
-        inputs=inputs,
-        pbar=True,
-        safe=False,
-        max_threads=max_threads,
-    )
-    model_wise_errors = {}
-    all_results = []
-    for r in results:
-        name, time_taken, out, error = r
-        if error:
-            model_wise_errors.setdefault(name, 0)
-            model_wise_errors[name] += 1
-        else:
-            all_results.append(
-                {
-                    "model": name,
-                    "time": time_taken,
-                    "response": out,
-                }
-            )
-    n_errors = sum(model_wise_errors.values())
-    if n_errors:
-        print(
-            tu.color.red(f"{n_errors} FAILED", True)
-            + f" ie. {n_errors/len(inputs)*100:.2f}% failure rate"
-        )
-    n_success = len(inputs) - n_errors
-    print(
-        tu.color.green(f"{n_success} SUCCESS", True)
-        + f" ie. {n_success/len(inputs)*100:.2f}% success rate"
-    )
-
-    # create the report and save it
-    df = pd.DataFrame(all_results)
-    print("Created the benchmark report at:", tu.color.bold(o))
-    df.to_csv(o, index=False)
-
-    # create the histogram
-    fig, axs = plt.subplots(3, 1, figsize=(15, 10))
-    latency_by_models = {}
-    char_count_by_models = {}
-    for res in all_results:
-        latency_by_models.setdefault(res["model"], []).append(res["time"])
-        char_count_by_models.setdefault(res["model"], []).append(len(res["response"]))
-
-    # histogram for latency
-    axs[0].hist(
-        latency_by_models.values(),
-        bins=20,
-        alpha=0.7,
-        label=list(latency_by_models.keys()),
-    )
-    axs[0].set_title("Latency Distribution (lower is better)")
-    axs[0].set_xlabel("Time (s)")
-    axs[0].set_ylabel("Frequency")
-    axs[0].legend()
-
-    # histogram for character count
-    axs[1].hist(
-        char_count_by_models.values(),
-        bins=20,
-        alpha=0.7,
-        label=list(char_count_by_models.keys()),
-    )
-    axs[1].set_title("Character Count Distribution")
-    axs[1].set_xlabel("Count")
-    axs[1].set_ylabel("Frequency")
-    axs[1].legend()
-    plt.tight_layout()
-
-    # bar graph for success and failure rate
-    axs[2].bar(
-        model_wise_errors.keys(),
-        model_wise_errors.values(),
-        color="red",
-        label="Failed",
-    )
-    axs[2].set_title("Failure Rate (lower is better)")
-    axs[2].set_xlabel("Model")
-    axs[2].set_ylabel("Count")
-    axs[2].legend()
-
-    # save the plot
-    print("Created the benchmark plot at:", tu.color.bold("benchmark.png"))
-    plt.savefig("benchmark.png")
diff --git a/tuneapi/apis/model_tune.py b/tuneapi/apis/model_tune.py
@@ -19,11 +19,12 @@ def __init__(
         self,
         id: Optional[str] = None,
         base_url: str = "https://proxy.tune.app/chat/completions",
+        api_token: Optional[str] = None,
         org_id: Optional[str] = None,
     ):
         self.tune_model_id = id or tu.ENV.TUNEAPI_MODEL("")
         self.base_url = base_url
-        self.tune_api_token = tu.ENV.TUNEAPI_TOKEN("")
+        self.tune_api_token = api_token or tu.ENV.TUNEAPI_TOKEN("")
         self.tune_org_id = org_id or tu.ENV.TUNEORG_ID("")
 
     def __repr__(self) -> str:
diff --git a/tuneapi/endpoints/__init__.py b/tuneapi/endpoints/__init__.py
@@ -0,0 +1,6 @@
+# Copyright © 2024- Frello Technology Private Limited
+
+# projectX APIs
+from tuneapi.endpoints.threads import ThreadsAPI
+from tuneapi.endpoints.assistants import AssistantsAPI
+from tuneapi.endpoints.finetune import FinetuningAPI
diff --git a/tuneapi/endpoints/assistants.py b/tuneapi/endpoints/assistants.py
@@ -9,18 +9,7 @@
 import tuneapi.utils as tu
 import tuneapi.types as tt
 
-
-@cache
-def get_sub(
-    url,
-    tune_org_id,
-    tune_api_key,
-) -> tu.Subway:
-    sess = tu.Subway._get_session()
-    sess.headers.update({"x-tune-key": tune_api_key})
-    if tune_org_id:
-        sess.headers.update({"X-Organization-Id": tune_org_id})
-    return tu.Subway(url, sess)
+from tuneapi.endpoints.common import get_sub
 
 
 @dataclass
@@ -36,17 +25,14 @@ def __init__(
         tune_api_key: str = None,
         base_url: str = "https://studio.tune.app/v1/assistants",
     ):
-        self.tune_org_id = tune_org_id or tu.ENV.TUNE_ORG_ID()
-        self.tune_api_key = tune_api_key or tu.ENV.TUNE_API_KEY()
+        self.tune_org_id = tune_org_id or tu.ENV.TUNEORG_ID()
+        self.tune_api_key = tune_api_key or tu.ENV.TUNEAPI_TOKEN()
+        self.base_url = base_url
         if not tune_api_key:
-            raise ValueError("Either pass tune_api_key or set Env var TUNE_API_KEY")
+            raise ValueError("Either pass tune_api_key or set Env var TUNEAPI_TOKEN")
         self.sub = get_sub(base_url, self.tune_org_id, self.tune_api_key)
 
-    def list_assistants(
-        self,
-        limit: int = 10,
-        order: str = "desc",
-    ):
+    def list_assistants(self, limit: int = 10, order: str = "desc"):
         out = self.sub(params={"limit": limit, "order": order})
         return out["data"]
 
diff --git a/tuneapi/endpoints/common.py b/tuneapi/endpoints/common.py
@@ -0,0 +1,19 @@
+# Copyright © 2024- Frello Technology Private Limited
+
+from functools import cache
+
+import tuneapi.utils as tu
+
+
+@cache
+def get_sub(
+    base_url,
+    tune_org_id: str,
+    tune_api_key: str,
+) -> tu.Subway:
+
+    sess = tu.Subway._get_session()
+    sess.headers.update({"x-tune-key": tune_api_key})
+    if tune_org_id:
+        sess.headers.update({"x-organization-id": tune_org_id})
+    return tu.Subway(base_url, sess)
diff --git a/tuneapi/endpoints/finetune.py b/tuneapi/endpoints/finetune.py
diff --git a/tuneapi/endpoints/threads.py b/tuneapi/endpoints/threads.py
diff --git a/tuneapi/types/chats.py b/tuneapi/types/chats.py
diff --git a/tuneapi/utils/env.py b/tuneapi/utils/env.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`# Copyright © 2023- Frello Technology Private Limited`
`2`	`2`
`3`		`-__version__ = "0.4.18"`
	`3`	`+__version__ = "0.5.0"`