NimbleBoxAI
diff --git a/‎docs/changelog.rst‎
Lines changed: 10 additions & 0 deletions b/‎docs/changelog.rst‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/index.rst‎
Lines changed: 13 additions & 1 deletion b/‎docs/index.rst‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tuneapi/apis/model_anthropic.py‎
Lines changed: 106 additions & 62 deletions b/‎tuneapi/apis/model_anthropic.py‎
Lines changed: 106 additions & 62 deletions
@@ -7,6 +7,16 @@ minor versions.
 
 All relevant steps to be taken will be mentioned here.
 
+8.0.2
+-----
+
+- Added usage tracking for OpenAI and Anthropic
+
+8.0.1
+-----
+
+- Typo so now we are in 8.x.x series
+- Fix bug in structured generation for ``Openai``.
 
 0.8.0
 -----
 
@@ -13,7 +13,7 @@
 project = "tuneapi"
 copyright = "2024-2025, Frello Technologies"
 author = "Frello Technologies"
-release = "8.0.0"
+release = "8.0.2"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
@@ -53,7 +53,7 @@ paste the following code snippet in the prompt to generate the code for LLM API
 
     class MathTest(BaseModel):
         title: str = Field(..., description="Title of the test")
-        problems: List[MathProblem] = Field(..., description="List of math problems")
+        problems: List[MathProblem] = ... # only list of other BaseModel is allowed
 
     # define a thread which is a collection of messages
     thread = tt.Thread(
@@ -65,6 +65,18 @@ paste the following code snippet in the prompt to generate the code for LLM API
     resp: MathTest = model.chat(thread)
     ```
 
+Structured generation
+---------------------
+
+.. epigraph::
+
+    Types and Logic is the two parts of programming.
+
+
+With structured generation you can get ``pydantic.BaseModel`` objects from ``tt.ModelInterface.chat`` and
+``tt.ModelInterface.chat_async`` methods. The currect limitation is that keys cannot have another ``BaseModel`` as value
+only ``List[BaseModel]`` is allowed.
+
 
 .. toctree::
    :maxdepth: 2
 
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tuneapi"
-version = "8.0.0"
+version = "8.0.2"
 description = "Tune AI APIs."
 authors = ["Frello Technology Private Limited <[email protected]>"]
 license = "MIT"
 
@@ -6,6 +6,7 @@
 
 import httpx
 import requests
+from copy import deepcopy
 from typing import Optional, Dict, Any, List
 
 import tuneapi.utils as tu
@@ -18,11 +19,12 @@ def __init__(
         self,
         id: Optional[str] = "claude-3-haiku-20240307",
         base_url: str = "https://api.anthropic.com/v1/messages",
+        api_token: Optional[str] = None,
         extra_headers: Optional[Dict[str, str]] = None,
     ):
         self.model_id = id
         self.base_url = base_url
-        self.api_token = tu.ENV.ANTHROPIC_TOKEN("")
+        self.api_token = api_token or tu.ENV.ANTHROPIC_TOKEN("")
         self.extra_headers = extra_headers
 
     def set_api_token(self, token: str) -> None:
@@ -60,13 +62,17 @@ def _process_input(
         prev_tool_id = tu.get_random_string(5)
         for m in thread.chats[int(system != "") :]:
             if m.role == tt.Message.HUMAN:
-                msg = {
-                    "role": "user",
-                    "content": [{"type": "text", "text": m.value.strip()}],
-                }
+                if isinstance(m.value, str):
+                    content = [{"type": "text", "text": m.value}]
+                elif isinstance(m.value, list):
+                    content = deepcopy(m.value)
+                else:
+                    raise Exception(
+                        f"Unknown message type. Got: '{type(m.value)}', expected 'List[Dict[str, Any]]' or 'str'"
+                    )
                 if m.images:
                     for i in m.images:
-                        msg["content"].append(
+                        content.append(
                             {
                                 "type": "image",
                                 "source": {
@@ -76,14 +82,19 @@ def _process_input(
                                 },
                             }
                         )
+                msg = {"role": "user", "content": content}
             elif m.role == tt.Message.GPT:
-                msg = {
-                    "role": "assistant",
-                    "content": [{"type": "text", "text": m.value.strip()}],
-                }
+                if isinstance(m.value, str):
+                    content = [{"type": "text", "text": m.value}]
+                elif isinstance(m.value, list):
+                    content = deepcopy(m.value)
+                else:
+                    raise Exception(
+                        f"Unknown message type. Got: '{type(m.value)}', expected 'List[Dict[str, Any]]' or 'str'"
+                    )
                 if m.images:
                     for i in m.images:
-                        msg["content"].append(
+                        content.append(
                             {
                                 "type": "image",
                                 "source": {
@@ -93,6 +104,7 @@ def _process_input(
                                 },
                             }
                         )
+                msg = {"role": "assistant", "content": content}
             elif m.role == tt.Message.FUNCTION_CALL:
                 _m = tu.from_json(m.value) if isinstance(m.value, str) else m.value
                 msg = {
@@ -159,49 +171,64 @@ def _process_input(
 
         return headers, data
 
-    def _process_output(self, raw: bool, lines_fn: callable):
+    def _process_output(self, raw: bool, lines_fn: callable, yield_usage: bool):
         fn_call = None
+        usage_dict = {}
         for line in lines_fn():
             if isinstance(line, bytes):
                 line = line.decode().strip()
             if not line or not "data:" in line:
                 continue
 
-            try:
-                # print(line)
-                resp = tu.from_json(line.replace("data:", "").strip())
-                if resp["type"] == "content_block_start":
-                    if resp["content_block"]["type"] == "tool_use":
-                        fn_call = {
-                            "name": resp["content_block"]["name"],
-                            "arguments": "",
-                        }
-                elif resp["type"] == "content_block_delta":
-                    delta = resp["delta"]
-                    delta_type = delta["type"]
-                    if delta_type == "text_delta":
-                        if raw:
-                            yield b"data: " + tu.to_json(
-                                {
-                                    "object": delta_type,
-                                    "choices": [{"delta": {"content": delta["text"]}}],
-                                },
-                                tight=True,
-                            ).encode()
-                            yield b""  # uncomment this line if you want 1:1 with OpenAI
-                        else:
-                            yield delta["text"]
-                    elif delta_type == "input_json_delta":
-                        fn_call["arguments"] += delta["partial_json"]
-                elif resp["type"] == "content_block_stop":
-                    if fn_call:
-                        fn_call["arguments"] = tu.from_json(
-                            fn_call["arguments"] or "{}"
-                        )
-                        yield fn_call
-                        fn_call = None
-            except:
-                break
+            resp = tu.from_json(line.replace("data:", "").strip())
+            if resp["type"] == "message_start":
+                usage = resp["message"]["usage"]
+                usage_dict.update(usage)
+            elif resp["type"] == "content_block_start":
+                if resp["content_block"]["type"] == "tool_use":
+                    fn_call = {
+                        "name": resp["content_block"]["name"],
+                        "arguments": "",
+                    }
+            elif resp["type"] == "content_block_delta":
+                delta = resp["delta"]
+                delta_type = delta["type"]
+                if delta_type == "text_delta":
+                    if raw:
+                        yield b"data: " + tu.to_json(
+                            {
+                                "object": delta_type,
+                                "choices": [{"delta": {"content": delta["text"]}}],
+                            },
+                            tight=True,
+                        ).encode()
+                        yield b""  # uncomment this line if you want 1:1 with OpenAI
+                    else:
+                        yield delta["text"]
+                elif delta_type == "input_json_delta":
+                    fn_call["arguments"] += delta["partial_json"]
+            elif resp["type"] == "content_block_stop":
+                if fn_call:
+                    fn_call["arguments"] = tu.from_json(fn_call["arguments"] or "{}")
+                    yield fn_call
+                    fn_call = None
+            elif resp["type"] == "message_delta":
+                usage_dict["output_tokens"] += resp["usage"]["output_tokens"]
+                cached_tokens = usage_dict.get(
+                    "cache_read_input_tokens", 0
+                ) or usage_dict.get("cache_creation_input_tokens", 0)
+                usage_obj = tt.Usage(
+                    input_tokens=usage_dict.pop("input_tokens"),
+                    output_tokens=usage_dict.pop("output_tokens"),
+                    cached_tokens=cached_tokens,
+                    **usage_dict,
+                )
+        if yield_usage:
+            if raw:
+                yield b"data: " + usage_obj.to_json(tight=True).encode()
+                yield b""  # uncomment this line if you want 1:1 with OpenAI
+            else:
+                yield usage_obj
 
     # Interaction methods
 
@@ -212,30 +239,35 @@ def chat(
         max_tokens: int = 1024,
         temperature: Optional[float] = None,
         token: Optional[str] = None,
-        return_message: bool = False,
+        usage: bool = False,
         extra_headers: Optional[Dict[str, str]] = None,
         **kwargs,
     ):
         output = ""
+        usage_obj = None
         fn_call = None
         for i in self.stream_chat(
             chats=chats,
             model=model,
             max_tokens=max_tokens,
             temperature=temperature,
             token=token,
+            usage=usage,
             extra_headers=extra_headers,
             raw=False,
             **kwargs,
         ):
             if isinstance(i, dict):
                 fn_call = i.copy()
+            elif isinstance(i, tt.Usage):
+                usage_obj = i
             else:
                 output += i
-        if return_message:
-            return output, fn_call
         if fn_call:
-            return fn_call
+            output = fn_call
+
+        if usage:
+            return output, usage_obj
         return output
 
     def stream_chat(
@@ -246,6 +278,7 @@ def stream_chat(
         temperature: Optional[float] = None,
         token: Optional[str] = None,
         debug: bool = False,
+        usage: bool = False,
         extra_headers: Optional[Dict[str, str]] = None,
         timeout=(5, 30),
         raw: bool = False,
@@ -262,19 +295,23 @@ def stream_chat(
             extra_headers=extra_headers,
             **kwargs,
         )
-        r = requests.post(
-            self.base_url,
-            headers=headers,
-            json=data,
-            timeout=timeout,
-        )
         try:
+            r = requests.post(
+                self.base_url,
+                headers=headers,
+                json=data,
+                timeout=timeout,
+            )
             r.raise_for_status()
         except Exception as e:
             yield r.text
             raise e
 
-        yield from self._process_output(raw=raw, lines_fn=r.iter_lines)
+        yield from self._process_output(
+            raw=raw,
+            lines_fn=r.iter_lines,
+            yield_usage=usage,
+        )
 
     async def chat_async(
         self,
@@ -283,30 +320,35 @@ async def chat_async(
         max_tokens: int = 1024,
         temperature: Optional[float] = None,
         token: Optional[str] = None,
-        return_message: bool = False,
+        usage: bool = False,
         extra_headers: Optional[Dict[str, str]] = None,
         **kwargs,
     ):
         output = ""
+        usage_obj = None
         fn_call = None
         async for i in self.stream_chat_async(
             chats=chats,
             model=model,
             max_tokens=max_tokens,
             temperature=temperature,
             token=token,
+            usage=usage,
             extra_headers=extra_headers,
             raw=False,
             **kwargs,
         ):
             if isinstance(i, dict):
                 fn_call = i.copy()
+            elif isinstance(i, tt.Usage):
+                usage_obj = i
             else:
                 output += i
-        if return_message:
-            return output, fn_call
+
         if fn_call:
-            return fn_call
+            output = fn_call
+        if usage:
+            return output, usage_obj
         return output
 
     async def stream_chat_async(
@@ -317,6 +359,7 @@ async def stream_chat_async(
         temperature: Optional[float] = None,
         token: Optional[str] = None,
         debug: bool = False,
+        usage: bool = False,
         extra_headers: Optional[Dict[str, str]] = None,
         timeout=(5, 30),
         raw: bool = False,
@@ -351,6 +394,7 @@ async def stream_chat_async(
                 for x in self._process_output(
                     raw=raw,
                     lines_fn=chunk.decode("utf-8").splitlines,
+                    yield_usage=usage,
                 ):
                     yield x