[0.6.0] Turbo with distributed_chat

yashbonde · yashbonde · commit 091a33d7e96f · 2024-12-14T21:47:56.000+05:30
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -7,6 +7,12 @@ minor versions.
 
 All relevant steps to be taken will be mentioned here.
 
+0.6.0
+-----
+
+- ``distributed_chat`` functionality in ``tuneapi.apis.turbo`` support. In all APIs search for ``model.distributed_chat()``
+  method. This enables **fault tolerant LLM API calls**.
+
 0.5.13
 -----
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tuneapi"
-version = "0.5.13"
+version = "0.6.0"
 description = "Tune AI APIs."
 authors = ["Frello Technology Private Limited <engineering@nimblebox.ai>"]
 license = "MIT"
diff --git a/tuneapi/apis/__init__.py b/tuneapi/apis/__init__.py
@@ -7,3 +7,4 @@
 from tuneapi.apis.model_groq import Groq
 from tuneapi.apis.model_mistral import Mistral
 from tuneapi.apis.model_gemini import Gemini
+from tuneapi.apis.turbo import distributed_chat
diff --git a/tuneapi/apis/model_anthropic.py b/tuneapi/apis/model_anthropic.py
@@ -11,6 +11,7 @@
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
+from tuneapi.apis.turbo import distributed_chat
 
 
 class Anthropic(tt.ModelInterface):
@@ -236,6 +237,23 @@ def stream_chat(
                 break
         return
 
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
+
 
 # helper methods
 
diff --git a/tuneapi/apis/model_gemini.py b/tuneapi/apis/model_gemini.py
@@ -7,10 +7,11 @@
 
 import json
 import requests
-from typing import Optional, Any, Dict
+from typing import Optional, Any, Dict, List
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
+from tuneapi.apis.turbo import distributed_chat
 
 
 class Gemini(tt.ModelInterface):
@@ -276,3 +277,20 @@ def stream_chat(
                     fn_call["arguments"] = fn_call.pop("args")
                     yield fn_call
                 block_lines = ""
+
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
diff --git a/tuneapi/apis/model_groq.py b/tuneapi/apis/model_groq.py
@@ -10,6 +10,7 @@
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
+from tuneapi.apis.turbo import distributed_chat
 
 
 class Groq(tt.ModelInterface):
@@ -190,3 +191,20 @@ def stream_chat(
             fn_call["arguments"] = tu.from_json(fn_call["arguments"])
             yield fn_call
         return
+
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
diff --git a/tuneapi/apis/model_mistral.py b/tuneapi/apis/model_mistral.py
@@ -10,8 +10,7 @@
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
-from tuneapi.utils import ENV, SimplerTimes as stime, from_json, to_json
-from tuneapi.types import Thread, human, Message
+from tuneapi.apis.turbo import distributed_chat
 
 
 class Mistral(tt.ModelInterface):
@@ -23,7 +22,7 @@ def __init__(
     ):
         self.model_id = id
         self.base_url = base_url
-        self.api_token = ENV.MISTRAL_TOKEN("")
+        self.api_token = tu.ENV.MISTRAL_TOKEN("")
         self.extra_headers = extra_headers
 
     def set_api_token(self, token: str) -> None:
@@ -95,7 +94,7 @@ def _process_input(self, chats, token: Optional[str] = None):
 
     def chat(
         self,
-        chats: Thread | str,
+        chats: tt.Thread | str,
         model: Optional[str] = None,
         max_tokens: int = 1024,
         temperature: float = 1,
@@ -124,7 +123,7 @@ def chat(
 
     def stream_chat(
         self,
-        chats: Thread | str,
+        chats: tt.Thread | str,
         model: Optional[str] = None,
         max_tokens: int = 1024,
         temperature: float = 1,
@@ -135,7 +134,7 @@ def stream_chat(
         extra_headers: Optional[Dict[str, str]] = None,
     ):
         tools = []
-        if isinstance(chats, Thread):
+        if isinstance(chats, tt.Thread):
             tools = [{"type": "function", "function": x.to_dict()} for x in chats.tools]
         headers, messages = self._process_input(chats, token)
         extra_headers = extra_headers or self.extra_headers
@@ -191,6 +190,23 @@ def stream_chat(
                 except:
                     break
         if fn_call:
-            fn_call["arguments"] = from_json(fn_call["arguments"])
+            fn_call["arguments"] = tu.from_json(fn_call["arguments"])
             yield fn_call
         return
+
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
diff --git a/tuneapi/apis/model_openai.py b/tuneapi/apis/model_openai.py
@@ -11,6 +11,7 @@
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
+from tuneapi.apis.turbo import distributed_chat
 
 
 class Openai(tt.ModelInterface):
@@ -190,6 +191,23 @@ def stream_chat(
             yield fn_call
         return
 
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
+
     def embedding(
         self,
         chats: tt.Thread | List[str] | str,
diff --git a/tuneapi/apis/model_tune.py b/tuneapi/apis/model_tune.py
@@ -10,6 +10,7 @@
 
 import tuneapi.utils as tu
 import tuneapi.types as tt
+from tuneapi.apis.turbo import distributed_chat
 
 
 class TuneModel(tt.ModelInterface):
@@ -217,3 +218,20 @@ def stream_chat(
             fn_call["arguments"] = tu.from_json(fn_call["arguments"])
             yield fn_call
         return
+
+    def distributed_chat(
+        self,
+        prompts: List[tt.Thread],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+    ):
+        return distributed_chat(
+            self,
+            prompts=prompts,
+            post_logic=post_logic,
+            max_threads=max_threads,
+            retry=retry,
+            pbar=pbar,
+        )
diff --git a/tuneapi/apis/turbo.py b/tuneapi/apis/turbo.py
diff --git a/tuneapi/types/chats.py b/tuneapi/types/chats.py
diff --git a/tuneapi/utils/misc.py b/tuneapi/utils/misc.py