Skip to content

Commit 41c8642

Browse files
committed
gemini multimodal support
1 parent 10a296b commit 41c8642

File tree

6 files changed

+1447
-1610
lines changed

6 files changed

+1447
-1610
lines changed

cachy.jsonl

Lines changed: 54 additions & 0 deletions
Large diffs are not rendered by default.

lisette/_modidx.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
'lisette.core.Chat.__init__': ('core.html#chat.__init__', 'lisette/core.py'),
2020
'lisette.core.Chat._call': ('core.html#chat._call', 'lisette/core.py'),
2121
'lisette.core.Chat._prep_msg': ('core.html#chat._prep_msg', 'lisette/core.py'),
22-
'lisette.core.Chat.print_hist': ('core.html#chat.print_hist', 'lisette/core.py'),
2322
'lisette.core.ToolResponse': ('core.html#toolresponse', 'lisette/core.py'),
2423
'lisette.core._add_cache_control': ('core.html#_add_cache_control', 'lisette/core.py'),
2524
'lisette.core._alite_call_func': ('core.html#_alite_call_func', 'lisette/core.py'),
2625
'lisette.core._apply_cache_idxs': ('core.html#_apply_cache_idxs', 'lisette/core.py'),
2726
'lisette.core._bytes2content': ('core.html#_bytes2content', 'lisette/core.py'),
27+
'lisette.core._detect_mime': ('core.html#_detect_mime', 'lisette/core.py'),
2828
'lisette.core._extract_tool': ('core.html#_extract_tool', 'lisette/core.py'),
2929
'lisette.core._has_cache': ('core.html#_has_cache', 'lisette/core.py'),
3030
'lisette.core._has_search': ('core.html#_has_search', 'lisette/core.py'),
@@ -43,13 +43,8 @@
4343
'lisette/core.py'),
4444
'lisette.core.mk_msg': ('core.html#mk_msg', 'lisette/core.py'),
4545
'lisette.core.mk_msgs': ('core.html#mk_msgs', 'lisette/core.py'),
46-
'lisette.core.mk_tc': ('core.html#mk_tc', 'lisette/core.py'),
47-
'lisette.core.mk_tc_req': ('core.html#mk_tc_req', 'lisette/core.py'),
48-
'lisette.core.mk_tc_result': ('core.html#mk_tc_result', 'lisette/core.py'),
49-
'lisette.core.mk_tc_results': ('core.html#mk_tc_results', 'lisette/core.py'),
5046
'lisette.core.mk_tr_details': ('core.html#mk_tr_details', 'lisette/core.py'),
5147
'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
52-
'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
5348
'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
5449
'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py'),
5550
'lisette.core.structured': ('core.html#structured', 'lisette/core.py')},

lisette/core.py

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# %% auto 0
66
__all__ = ['sonn45', 'opus45', 'detls_tag', 're_tools', 'effort', 'patch_litellm', 'remove_cache_ckpts', 'contents', 'mk_msg',
77
'fmt2hist', 'mk_msgs', 'stream_with_complete', 'lite_mk_func', 'ToolResponse', 'structured', 'cite_footnote',
8-
'cite_footnotes', 'Chat', 'random_tool_id', 'mk_tc', 'mk_tc_req', 'mk_tc_result', 'mk_tc_results',
9-
'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
8+
'cite_footnotes', 'Chat', 'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter',
9+
'adisplay_stream']
1010

1111
# %% ../nbs/00_core.ipynb
1212
import asyncio, base64, json, litellm, mimetypes, random, string
@@ -80,11 +80,34 @@ def _repr_markdown_(self: litellm.ModelResponse):
8080
opus45 = "claude-opus-4-5"
8181

8282
# %% ../nbs/00_core.ipynb
83+
_sigs = {
84+
(b'%PDF', 0): 'application/pdf',
85+
(b'RIFF', 0): lambda d: 'audio/wav' if d[8:12]==b'WAVE' else 'video/avi' if d[8:12]==b'AVI ' else None,
86+
(b'ID3', 0): 'audio/mp3',
87+
(b'\xff\xfb', 0): 'audio/mp3',
88+
(b'\xff\xf3', 0): 'audio/mp3',
89+
(b'FORM', 0): lambda d: 'audio/aiff' if d[8:12]==b'AIFF' else None,
90+
(b'OggS', 0): 'audio/ogg',
91+
(b'fLaC', 0): 'audio/flac',
92+
(b'ftyp', 4): lambda d: 'video/3gpp' if d[8:11]==b'3gp' else 'video/mp4',
93+
(b'\x1a\x45\xdf', 0): 'video/webm',
94+
(b'FLV', 0): 'video/x-flv',
95+
(b'\x30\x26\xb2\x75', 0): 'video/wmv',
96+
(b'\x00\x00\x01\xb3', 0): 'video/mpeg',
97+
}
98+
99+
def _detect_mime(data):
100+
for (sig,pos),mime in _sigs.items():
101+
if data[pos:pos+len(sig)]==sig: return mime(data) if callable(mime) else mime
102+
return mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
103+
83104
def _bytes2content(data):
84-
"Convert bytes to litellm content dict (image or pdf)"
85-
mtype = 'application/pdf' if data[:4] == b'%PDF' else mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
86-
if not mtype: raise ValueError(f'Data must be image or PDF bytes, got {data[:10]}')
87-
return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{base64.b64encode(data).decode("utf-8")}'}
105+
"Convert bytes to litellm content dict (image, pdf, audio, video)"
106+
mtype = _detect_mime(data)
107+
if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
108+
encoded = base64.b64encode(data).decode("utf-8")
109+
if mtype.startswith('image/'): return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{encoded}'}
110+
return {'type': 'file', 'file': {'file_data': f'data:{mtype};base64,{encoded}'}}
88111

89112
# %% ../nbs/00_core.ipynb
90113
def _add_cache_control(msg, # LiteLLM formatted msg
@@ -250,7 +273,7 @@ def cite_footnotes(stream_list):
250273
def _mk_prefill(pf): return ModelResponseStream([StreamingChoices(delta=Delta(content=pf,role='assistant'))])
251274

252275
# %% ../nbs/00_core.ipynb
253-
_final_prompt = "You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed."
276+
_final_prompt = dict(role="user", content="You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed.")
254277

255278
# %% ../nbs/00_core.ipynb
256279
class Chat:
@@ -285,7 +308,7 @@ def _prep_msg(self, msg=None, prefill=None):
285308
cache_idxs = L(self.cache_idxs).filter().map(lambda o: o-1 if o>0 else o)
286309
else:
287310
cache_idxs = self.cache_idxs
288-
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache, cache_idxs, self.ttl)
311+
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache and 'claude' in self.model, cache_idxs, self.ttl)
289312
pf = [{"role":"assistant","content":prefill}] if prefill else []
290313
return sp + self.hist + pf
291314

@@ -306,6 +329,7 @@ def _call(self, msg=None, prefill=None, temp=None, think=None, search=None, stre
306329
tools=self.tool_schemas, reasoning_effort = effort.get(think), tool_choice=tool_choice,
307330
# temperature is not supported when reasoning
308331
temperature=None if think else ifnone(temp,self.temp),
332+
caching=self.cache and 'claude' not in self.model,
309333
**kwargs)
310334
if stream:
311335
if prefill: yield _mk_prefill(prefill)
@@ -342,35 +366,6 @@ def __call__(self,
342366
elif return_all: return list(result_gen) # toolloop behavior
343367
else: return last(result_gen) # normal chat behavior
344368

345-
# %% ../nbs/00_core.ipynb
346-
@patch
347-
def print_hist(self:Chat):
348-
"Print each message on a different line"
349-
for r in self.hist: print(r, end='\n\n')
350-
351-
# %% ../nbs/00_core.ipynb
352-
def random_tool_id():
353-
"Generate a random tool ID with 'toolu_' prefix"
354-
random_part = ''.join(random.choices(string.ascii_letters + string.digits, k=25))
355-
return f'toolu_{random_part}'
356-
357-
# %% ../nbs/00_core.ipynb
358-
def mk_tc(func, args, tcid=None, idx=1):
359-
if not tcid: tcid = random_tool_id()
360-
return {'index': idx, 'function': {'arguments': args, 'name': func}, 'id': tcid, 'type': 'function'}
361-
362-
# %% ../nbs/00_core.ipynb
363-
def mk_tc_req(content, tcs):
364-
msg = Message(content=content, role='assistant', tool_calls=tcs, function_call=None)
365-
msg.tool_calls = [{**dict(tc), 'function': dict(tc['function'])} for tc in msg.tool_calls]
366-
return msg
367-
368-
# %% ../nbs/00_core.ipynb
369-
def mk_tc_result(tc, result): return {'tool_call_id': tc['id'], 'role': 'tool', 'name': tc['function']['name'], 'content': result}
370-
371-
# %% ../nbs/00_core.ipynb
372-
def mk_tc_results(tcq, results): return [mk_tc_result(a,b) for a,b in zip(tcq.tool_calls, results)]
373-
374369
# %% ../nbs/00_core.ipynb
375370
async def _alite_call_func(tc, ns, raise_on_err=True):
376371
try: fargs = json.loads(tc.function.arguments)
@@ -401,6 +396,7 @@ async def _call(self, msg=None, prefill=None, temp=None, think=None, search=None
401396
tools=self.tool_schemas, reasoning_effort=effort.get(think), tool_choice=tool_choice,
402397
# temperature is not supported when reasoning
403398
temperature=None if think else ifnone(temp,self.temp),
399+
caching=self.cache and 'claude' not in self.model,
404400
**kwargs)
405401
if stream:
406402
if prefill: yield _mk_prefill(prefill)
@@ -460,20 +456,18 @@ def mk_tr_details(tr, tc, mx=2000):
460456
# %% ../nbs/00_core.ipynb
461457
class AsyncStreamFormatter:
462458
def __init__(self, include_usage=False, mx=2000):
463-
self.outp,self.tcs,self.include_usage,self.think,self.mx = '',{},include_usage,False,mx
459+
self.outp,self.tcs,self.include_usage,self.mx = '',{},include_usage,mx
464460

465461
def format_item(self, o):
466462
"Format a single item from the response stream."
467463
res = ''
468464
if isinstance(o, ModelResponseStream):
469465
d = o.choices[0].delta
470-
if nested_idx(d, 'reasoning_content'):
471-
self.think = True
472-
res += '🧠'
473-
elif self.think:
474-
self.think = False
475-
res += '\n\n'
476-
if c:=d.content: res+=c
466+
if nested_idx(d, 'reasoning_content') and d['reasoning_content']!='{"text": ""}':
467+
res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠' # gemini can interleave reasoning
468+
elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
469+
if c:=d.content: # gemini has text content in last reasoning chunk
470+
res+=f"\n\n{c}" if res and res[-1] == '🧠' else c
477471
elif isinstance(o, ModelResponse):
478472
if self.include_usage: res += f"\nUsage: {o.usage}"
479473
if c:=getattr(contents(o),'tool_calls',None):

lisette/usage.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,17 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
2828
def _log_usage(self, response_obj, response_cost, start_time, end_time):
2929
usage = response_obj.usage
3030
ptd = usage.prompt_tokens_details
31-
self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
32-
total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens,
33-
cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
31+
self.usage.insert(Usage(timestamp=time.time(),
32+
model=response_obj.model,
33+
user_id=self.user_id_fn(),
34+
prompt_tokens=usage.prompt_tokens,
35+
completion_tokens=usage.completion_tokens,
36+
total_tokens=usage.total_tokens,
37+
cached_tokens=ptd.cached_tokens if ptd else 0, # used by gemini (read tokens)
38+
cache_creation_tokens=nested_idx(usage, 'cache_creation_input_tokens'),
39+
cache_read_tokens=nested_idx(usage, 'cache_read_input_tokens'), # used by anthropic
40+
web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'),
41+
response_cost=response_cost))
3442

3543
def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')
3644

0 commit comments

Comments
 (0)