AnswerDotAI
diff --git a/‎cachy.jsonl‎
Lines changed: 65 additions & 0 deletions b/‎cachy.jsonl‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎lisette/_modidx.py‎
Lines changed: 0 additions & 5 deletions b/‎lisette/_modidx.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎lisette/core.py‎
Lines changed: 18 additions & 39 deletions b/‎lisette/core.py‎
Lines changed: 18 additions & 39 deletions
diff --git a/‎lisette/usage.py‎
Lines changed: 11 additions & 3 deletions b/‎lisette/usage.py‎
Lines changed: 11 additions & 3 deletions
@@ -43,13 +43,8 @@
                                                                                       'lisette/core.py'),
                               'lisette.core.mk_msg': ('core.html#mk_msg', 'lisette/core.py'),
                               'lisette.core.mk_msgs': ('core.html#mk_msgs', 'lisette/core.py'),
-                              'lisette.core.mk_tc': ('core.html#mk_tc', 'lisette/core.py'),
-                              'lisette.core.mk_tc_req': ('core.html#mk_tc_req', 'lisette/core.py'),
-                              'lisette.core.mk_tc_result': ('core.html#mk_tc_result', 'lisette/core.py'),
-                              'lisette.core.mk_tc_results': ('core.html#mk_tc_results', 'lisette/core.py'),
                               'lisette.core.mk_tr_details': ('core.html#mk_tr_details', 'lisette/core.py'),
                               'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
-                              'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
                               'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
                               'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py'),
                               'lisette.core.structured': ('core.html#structured', 'lisette/core.py')},
 
@@ -5,8 +5,8 @@
 # %% auto 0
 __all__ = ['sonn45', 'opus45', 'detls_tag', 're_tools', 'effort', 'patch_litellm', 'remove_cache_ckpts', 'contents', 'mk_msg',
            'fmt2hist', 'mk_msgs', 'stream_with_complete', 'lite_mk_func', 'ToolResponse', 'structured', 'cite_footnote',
-           'cite_footnotes', 'Chat', 'random_tool_id', 'mk_tc', 'mk_tc_req', 'mk_tc_result', 'mk_tc_results',
-           'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
+           'cite_footnotes', 'Chat', 'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter',
+           'adisplay_stream']
 
 # %% ../nbs/00_core.ipynb
 import asyncio, base64, json, litellm, mimetypes, random, string
@@ -81,10 +81,12 @@ def _repr_markdown_(self: litellm.ModelResponse):
 
 # %% ../nbs/00_core.ipynb
 def _bytes2content(data):
-    "Convert bytes to litellm content dict (image or pdf)"
-    mtype = 'application/pdf' if data[:4] == b'%PDF' else mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
-    if not mtype: raise ValueError(f'Data must be image or PDF bytes, got {data[:10]}')
-    return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{base64.b64encode(data).decode("utf-8")}'}
+    "Convert bytes to litellm content dict (image, pdf, audio, video)"
+    mtype = detect_mime(data)
+    if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
+    encoded = base64.b64encode(data).decode("utf-8")    
+    if mtype.startswith('image/'): return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{encoded}'}
+    return {'type': 'file', 'file': {'file_data': f'data:{mtype};base64,{encoded}'}}
 
 # %% ../nbs/00_core.ipynb
 def _add_cache_control(msg,          # LiteLLM formatted msg
@@ -250,7 +252,7 @@ def cite_footnotes(stream_list):
 def _mk_prefill(pf): return ModelResponseStream([StreamingChoices(delta=Delta(content=pf,role='assistant'))])
 
 # %% ../nbs/00_core.ipynb
-_final_prompt = "You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed."
+_final_prompt = dict(role="user", content="You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed.")
 
 # %% ../nbs/00_core.ipynb
 class Chat:
@@ -285,7 +287,7 @@ def _prep_msg(self, msg=None, prefill=None):
             cache_idxs = L(self.cache_idxs).filter().map(lambda o: o-1 if o>0 else o)
         else:
             cache_idxs = self.cache_idxs
-        if msg: self.hist = mk_msgs(self.hist+[msg], self.cache, cache_idxs, self.ttl)
+        if msg: self.hist = mk_msgs(self.hist+[msg], self.cache and 'claude' in self.model, cache_idxs, self.ttl)
         pf = [{"role":"assistant","content":prefill}] if prefill else []
         return sp + self.hist + pf
 
@@ -306,6 +308,7 @@ def _call(self, msg=None, prefill=None, temp=None, think=None, search=None, stre
                          tools=self.tool_schemas, reasoning_effort = effort.get(think), tool_choice=tool_choice,
                          # temperature is not supported when reasoning
                          temperature=None if think else ifnone(temp,self.temp),
+                         caching=self.cache and 'claude' not in self.model,
                          **kwargs)
         if stream:
             if prefill: yield _mk_prefill(prefill)
@@ -348,29 +351,6 @@ def print_hist(self:Chat):
     "Print each message on a different line"
     for r in self.hist: print(r, end='\n\n')
 
-# %% ../nbs/00_core.ipynb
-def random_tool_id():
-    "Generate a random tool ID with 'toolu_' prefix"
-    random_part = ''.join(random.choices(string.ascii_letters + string.digits, k=25))
-    return f'toolu_{random_part}'
-
-# %% ../nbs/00_core.ipynb
-def mk_tc(func, args, tcid=None, idx=1):
-    if not tcid: tcid = random_tool_id()
-    return {'index': idx, 'function': {'arguments': args, 'name': func}, 'id': tcid, 'type': 'function'}
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_req(content, tcs):
-    msg = Message(content=content, role='assistant', tool_calls=tcs, function_call=None)
-    msg.tool_calls = [{**dict(tc), 'function': dict(tc['function'])} for tc in msg.tool_calls]
-    return msg
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_result(tc, result): return {'tool_call_id': tc['id'], 'role': 'tool', 'name': tc['function']['name'], 'content': result}
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_results(tcq, results): return [mk_tc_result(a,b) for a,b in zip(tcq.tool_calls, results)]
-
 # %% ../nbs/00_core.ipynb
 async def _alite_call_func(tc, ns, raise_on_err=True):
     try: fargs = json.loads(tc.function.arguments)
@@ -401,6 +381,7 @@ async def _call(self, msg=None, prefill=None, temp=None, think=None, search=None
                          tools=self.tool_schemas, reasoning_effort=effort.get(think), tool_choice=tool_choice,
                          # temperature is not supported when reasoning
                          temperature=None if think else ifnone(temp,self.temp), 
+                         caching=self.cache and 'claude' not in self.model,
                          **kwargs)
         if stream:
             if prefill: yield _mk_prefill(prefill)
@@ -460,20 +441,18 @@ def mk_tr_details(tr, tc, mx=2000):
 # %% ../nbs/00_core.ipynb
 class AsyncStreamFormatter:
     def __init__(self, include_usage=False, mx=2000):
-        self.outp,self.tcs,self.include_usage,self.think,self.mx = '',{},include_usage,False,mx
+        self.outp,self.tcs,self.include_usage,self.mx = '',{},include_usage,mx
 
     def format_item(self, o):
         "Format a single item from the response stream."
         res = ''
         if isinstance(o, ModelResponseStream):
             d = o.choices[0].delta
-            if nested_idx(d, 'reasoning_content'): 
-                self.think = True
-                res += '🧠'
-            elif self.think:
-                self.think = False
-                res += '\n\n'
-            if c:=d.content: res+=c
+            if nested_idx(d, 'reasoning_content') and d['reasoning_content']!='{"text": ""}':
+                res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠' # gemini can interleave reasoning
+            elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
+            if c:=d.content: # gemini has text content in last reasoning chunk
+                res+=f"\n\n{c}" if res and res[-1] == '🧠' else c
         elif isinstance(o, ModelResponse):
             if self.include_usage: res += f"\nUsage: {o.usage}"
             if c:=getattr(contents(o),'tool_calls',None):
 
@@ -26,9 +26,17 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
     def _log_usage(self, response_obj, response_cost, start_time, end_time):
         usage = response_obj.usage
         ptd   = usage.prompt_tokens_details
-        self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
-                                    total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens, 
-                                    cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
+        self.usage.insert(Usage(timestamp=time.time(), 
+                                model=response_obj.model, 
+                                user_id=self.user_id_fn(), 
+                                prompt_tokens=usage.prompt_tokens, 
+                                completion_tokens=usage.completion_tokens,
+                                total_tokens=usage.total_tokens, 
+                                cached_tokens=ptd.cached_tokens if ptd else 0, # used by gemini (read tokens)
+                                cache_creation_tokens=nested_idx(usage, 'cache_creation_input_tokens'),
+                                cache_read_tokens=nested_idx(usage, 'cache_read_input_tokens'), # used by anthropic 
+                                web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'),
+                                response_cost=response_cost))
 
     def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')