[0.6.3] adding structured generation to Gemini API

yashbonde · yashbonde · commit ac3af6b4128a · 2025-01-03T12:31:26.000+05:30
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -7,6 +7,22 @@ minor versions.
 
 All relevant steps to be taken will be mentioned here.
 
+0.6.3
+-----
+
+- ``<model>.distributed_chat`` now takes in args that are passed to the ``post_logic``.
+
+
+0.6.2
+-----
+
+- New set of utils in ``tuneapi.utils`` called ``prompt`` to help with the basics of prompting.
+
+0.6.1
+-----
+
+- Package now uses ``fire==0.7.0``
+
 0.6.0
 -----
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tuneapi"
-version = "0.6.0"
+version = "0.6.3"
 description = "Tune AI APIs."
 authors = ["Frello Technology Private Limited <engineering@nimblebox.ai>"]
 license = "MIT"
@@ -9,7 +9,7 @@ repository = "https://github.com/NimbleBoxAI/tuneapi"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-fire = "0.5.0"
+fire = "0.7.0"
 requests = "^2.31.0"
 cloudpickle = "3.0.0"
 cryptography = ">=42.0.5"
diff --git a/tuneapi/apis/model_anthropic.py b/tuneapi/apis/model_anthropic.py
@@ -4,7 +4,6 @@
 
 # Copyright © 2024- Frello Technology Private Limited
 
-import re
 import json
 import requests
 from typing import Optional, Dict, Any, Tuple, List
@@ -244,6 +243,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -252,16 +252,5 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
-
-
-# helper methods
-
-
-def get_section(tag: str, out: str) -> Optional[str]:
-    pattern = re.compile("<" + tag + ">(.*?)</" + tag + ">", re.DOTALL)
-    match = pattern.search(out)
-    if match:
-        content = match.group(1)
-        return content
-    return None
diff --git a/tuneapi/apis/model_gemini.py b/tuneapi/apis/model_gemini.py
@@ -114,7 +114,7 @@ def chat(
         self,
         chats: tt.Thread | str,
         model: Optional[str] = None,
-        max_tokens: int = 1024,
+        max_tokens: int = 4096,
         temperature: float = 1,
         token: Optional[str] = None,
         timeout=None,
@@ -150,7 +150,7 @@ def stream_chat(
         self,
         chats: tt.Thread | str,
         model: Optional[str] = None,
-        max_tokens: int = 1024,
+        max_tokens: int = 4096,
         temperature: float = 1,
         token: Optional[str] = None,
         timeout=(5, 60),
@@ -166,18 +166,12 @@ def stream_chat(
         extra_headers = extra_headers or self.extra_headers
         if extra_headers:
             headers.update(extra_headers)
+
         data = {
             "systemInstruction": {
                 "parts": [{"text": system}],
             },
             "contents": messages,
-            "generationConfig": {
-                "temperature": temperature,
-                "topK": 0,
-                "topP": 0.95,
-                "maxOutputTokens": max_tokens,
-                "stopSequences": [],
-            },
             "safetySettings": [
                 {
                     "category": "HARM_CATEGORY_HARASSMENT",
@@ -197,6 +191,22 @@ def stream_chat(
                 },
             ],
         }
+
+        generation_config = {
+            "temperature": temperature,
+            "maxOutputTokens": max_tokens,
+            "stopSequences": [],
+        }
+
+        if chats.gen_schema:
+            generation_config.update(
+                {
+                    "response_mime_type": "application/json",
+                    "response_schema": chats.gen_schema,
+                }
+            )
+        data["generationConfig"] = generation_config
+
         if tools:
             data["tool_config"] = {
                 "function_calling_config": {
@@ -285,6 +295,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -293,4 +304,5 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
diff --git a/tuneapi/apis/model_groq.py b/tuneapi/apis/model_groq.py
@@ -199,6 +199,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -207,4 +208,5 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
diff --git a/tuneapi/apis/model_mistral.py b/tuneapi/apis/model_mistral.py
@@ -201,6 +201,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -209,4 +210,5 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
diff --git a/tuneapi/apis/model_openai.py b/tuneapi/apis/model_openai.py
@@ -130,6 +130,7 @@ def stream_chat(
         extra_headers: Optional[Dict[str, str]] = None,
         debug: bool = False,
         raw: bool = False,
+        **kwargs,
     ):
         headers, messages = self._process_input(chats, token)
         extra_headers = extra_headers or self.extra_headers
@@ -148,6 +149,8 @@ def stream_chat(
                 {"type": "function", "function": x.to_dict()} for x in chats.tools
             ]
             data["parallel_tool_calls"] = parallel_tool_calls
+        if kwargs:
+            data.update(kwargs)
         if debug:
             fp = "sample_oai.json"
             print("Saving at path " + fp)
@@ -198,6 +201,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -206,6 +210,7 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
 
     def embedding(
diff --git a/tuneapi/apis/model_tune.py b/tuneapi/apis/model_tune.py
@@ -226,6 +226,7 @@ def distributed_chat(
         max_threads: int = 10,
         retry: int = 3,
         pbar=True,
+        **kwargs,
     ):
         return distributed_chat(
             self,
@@ -234,4 +235,5 @@ def distributed_chat(
             max_threads=max_threads,
             retry=retry,
             pbar=pbar,
+            **kwargs,
         )
diff --git a/tuneapi/apis/turbo.py b/tuneapi/apis/turbo.py
@@ -3,7 +3,7 @@
 import queue
 import threading
 from tqdm import trange
-from typing import List, Optional
+from typing import List, Optional, Dict
 from dataclasses import dataclass
 
 from tuneapi.types import Thread, ModelInterface, human, system
@@ -16,6 +16,7 @@ def distributed_chat(
     max_threads: int = 10,
     retry: int = 3,
     pbar=True,
+    **kwargs,
 ):
     """
     Distributes multiple chat prompts across a thread pool for parallel processing.
@@ -78,8 +79,7 @@ def worker():
                     break
 
                 try:
-                    print(">")
-                    out = task.model.chat(task.prompt)
+                    out = task.model.chat(chat=task.prompt, **task.kwargs)
                     if post_logic:
                         out = post_logic(out)
                     result_channel.put(_Result(task.index, out, True))
@@ -94,7 +94,13 @@ def worker():
                         nm.set_api_token(model.api_token)
                         # Increment retry count and requeue
                         task_channel.put(
-                            _Task(task.index, nm, task.prompt, task.retry_count + 1)
+                            _Task(
+                                index=task.index,
+                                model=nm,
+                                prompt=task.prompt,
+                                retry_count=task.retry_count + 1,
+                                kwargs=task.kwargs,
+                            )
                         )
                     else:
                         # If we've exhausted retries, store the error
@@ -122,7 +128,15 @@ def worker():
             extra_headers=model.extra_headers,
         )
         nm.set_api_token(model.api_token)
-        task_channel.put(_Task(i, nm, p))
+        task_channel.put(
+            _Task(
+                index=i,
+                model=nm,
+                prompt=p,
+                retry_count=0,
+                kwargs=kwargs,
+            )
+        )
 
     # Process results
     completed = 0
@@ -160,6 +174,7 @@ class _Task:
     model: ModelInterface
     prompt: Thread
     retry_count: int = 0
+    kwargs: Optional[Dict] = None
 
 
 @dataclass
diff --git a/tuneapi/types/chats.py b/tuneapi/types/chats.py
@@ -346,6 +346,17 @@ def stream_chat(
     ):
         """This is the main function to stream chat with the model where each token is iteratively generated"""
 
+    def distributed_chat(
+        self,
+        prompts: List["Thread"],
+        post_logic: Optional[callable] = None,
+        max_threads: int = 10,
+        retry: int = 3,
+        pbar=True,
+        **kwargs,
+    ):
+        """This is the main function to chat with the model in a distributed manner"""
+
 
 ########################################################################################################################
 #
@@ -372,6 +383,7 @@ def __init__(
         id: str = "",
         title: str = "",
         tools: List[Tool] = [],
+        gen_schema: Optional[Dict[str, Any]] = None,
         **kwargs,
     ):
         self.chats = list(chats)
@@ -380,6 +392,7 @@ def __init__(
         self.id = id or "thread_" + str(tu.get_snowflake())
         self.title = title
         self.tools = tools
+        self.gen_schema = gen_schema
 
         #
         kwargs = {k: v for k, v in sorted(kwargs.items())}
@@ -462,6 +475,7 @@ def to_dict(self, full: bool = False):
                 "title": self.title,
                 "id": self.id,
                 "tools": [x.to_dict() for x in self.tools],
+                "gen_schema": self.gen_schema,
             }
         return {
             "chats": [x.to_dict() for x in self.chats],
@@ -484,6 +498,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Thread":
             model=data.get("model", ""),
             title=data.get("title", ""),
             tools=[Tool.from_dict(x) for x in data.get("tools", [])],
+            gen_schema=data.get("gen_schema", {}),
             **data.get("meta", {}),
         )
 
diff --git a/tuneapi/utils/__init__.py b/tuneapi/utils/__init__.py
@@ -45,6 +45,9 @@
     batched,
     threaded_map,
 )
+from tuneapi.utils.prompts import (
+    get_tagged_section,
+)
 from tuneapi.utils.randomness import (
     get_random_string,
     get_snowflake,
diff --git a/tuneapi/utils/prompts.py b/tuneapi/utils/prompts.py
@@ -0,0 +1,16 @@
+# Copyright © 2024- Frello Technology Private Limited
+
+import re
+
+
+def get_tagged_section(tag: str, input_str: str):
+    html_pattern = re.compile("<" + tag + ">(.*?)</" + tag + ">", re.DOTALL)
+    match = html_pattern.search(input_str)
+    if match:
+        return match.group(1)
+
+    md_pattern = re.compile("```" + tag + "(.*?)```", re.DOTALL)
+    match = md_pattern.search(input_str)
+    if match:
+        return match.group(1)
+    return None