Skip to content

Commit 26fc6af

Browse files
committed
gemini multimodal support
1 parent b759709 commit 26fc6af

File tree

6 files changed

+1463
-1623
lines changed

6 files changed

+1463
-1623
lines changed

cachy.jsonl

Lines changed: 54 additions & 0 deletions
Large diffs are not rendered by default.

lisette/_modidx.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
'lisette.core.Chat.__init__': ('core.html#chat.__init__', 'lisette/core.py'),
2020
'lisette.core.Chat._call': ('core.html#chat._call', 'lisette/core.py'),
2121
'lisette.core.Chat._prep_msg': ('core.html#chat._prep_msg', 'lisette/core.py'),
22-
'lisette.core.Chat.print_hist': ('core.html#chat.print_hist', 'lisette/core.py'),
2322
'lisette.core.ToolResponse': ('core.html#toolresponse', 'lisette/core.py'),
2423
'lisette.core._add_cache_control': ('core.html#_add_cache_control', 'lisette/core.py'),
2524
'lisette.core._alite_call_func': ('core.html#_alite_call_func', 'lisette/core.py'),
2625
'lisette.core._apply_cache_idxs': ('core.html#_apply_cache_idxs', 'lisette/core.py'),
2726
'lisette.core._bytes2content': ('core.html#_bytes2content', 'lisette/core.py'),
27+
'lisette.core._detect_mime': ('core.html#_detect_mime', 'lisette/core.py'),
2828
'lisette.core._extract_tool': ('core.html#_extract_tool', 'lisette/core.py'),
2929
'lisette.core._has_cache': ('core.html#_has_cache', 'lisette/core.py'),
3030
'lisette.core._has_search': ('core.html#_has_search', 'lisette/core.py'),
@@ -43,13 +43,8 @@
4343
'lisette/core.py'),
4444
'lisette.core.mk_msg': ('core.html#mk_msg', 'lisette/core.py'),
4545
'lisette.core.mk_msgs': ('core.html#mk_msgs', 'lisette/core.py'),
46-
'lisette.core.mk_tc': ('core.html#mk_tc', 'lisette/core.py'),
47-
'lisette.core.mk_tc_req': ('core.html#mk_tc_req', 'lisette/core.py'),
48-
'lisette.core.mk_tc_result': ('core.html#mk_tc_result', 'lisette/core.py'),
49-
'lisette.core.mk_tc_results': ('core.html#mk_tc_results', 'lisette/core.py'),
5046
'lisette.core.mk_tr_details': ('core.html#mk_tr_details', 'lisette/core.py'),
5147
'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
52-
'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
5348
'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
5449
'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py')},
5550
'lisette.usage': { 'lisette.usage.LisetteUsageLogger': ('usage.html#lisetteusagelogger', 'lisette/usage.py'),

lisette/core.py

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# %% auto 0
66
__all__ = ['sonn45', 'opus45', 'detls_tag', 're_tools', 'effort', 'patch_litellm', 'remove_cache_ckpts', 'contents', 'mk_msg',
77
'fmt2hist', 'mk_msgs', 'stream_with_complete', 'lite_mk_func', 'ToolResponse', 'cite_footnote',
8-
'cite_footnotes', 'Chat', 'random_tool_id', 'mk_tc', 'mk_tc_req', 'mk_tc_result', 'mk_tc_results',
9-
'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
8+
'cite_footnotes', 'Chat', 'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter',
9+
'adisplay_stream']
1010

1111
# %% ../nbs/00_core.ipynb
1212
import asyncio, base64, json, litellm, mimetypes, random, string
@@ -79,11 +79,34 @@ def _repr_markdown_(self: litellm.ModelResponse):
7979
opus45 = "claude-opus-4-5"
8080

8181
# %% ../nbs/00_core.ipynb
82+
_sigs = {
83+
(b'%PDF', 0): 'application/pdf',
84+
(b'RIFF', 0): lambda d: 'audio/wav' if d[8:12]==b'WAVE' else 'video/avi' if d[8:12]==b'AVI ' else None,
85+
(b'ID3', 0): 'audio/mp3',
86+
(b'\xff\xfb', 0): 'audio/mp3',
87+
(b'\xff\xf3', 0): 'audio/mp3',
88+
(b'FORM', 0): lambda d: 'audio/aiff' if d[8:12]==b'AIFF' else None,
89+
(b'OggS', 0): 'audio/ogg',
90+
(b'fLaC', 0): 'audio/flac',
91+
(b'ftyp', 4): lambda d: 'video/3gpp' if d[8:11]==b'3gp' else 'video/mp4',
92+
(b'\x1a\x45\xdf', 0): 'video/webm',
93+
(b'FLV', 0): 'video/x-flv',
94+
(b'\x30\x26\xb2\x75', 0): 'video/wmv',
95+
(b'\x00\x00\x01\xb3', 0): 'video/mpeg',
96+
}
97+
98+
def _detect_mime(data):
99+
for (sig,pos),mime in _sigs.items():
100+
if data[pos:pos+len(sig)]==sig: return mime(data) if callable(mime) else mime
101+
return mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
102+
82103
def _bytes2content(data):
83-
"Convert bytes to litellm content dict (image or pdf)"
84-
mtype = 'application/pdf' if data[:4] == b'%PDF' else mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
85-
if not mtype: raise ValueError(f'Data must be image or PDF bytes, got {data[:10]}')
86-
return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{base64.b64encode(data).decode("utf-8")}'}
104+
"Convert bytes to litellm content dict (image, pdf, audio, video)"
105+
mtype = _detect_mime(data)
106+
if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
107+
encoded = base64.b64encode(data).decode("utf-8")
108+
if mtype.startswith('image/'): return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{encoded}'}
109+
return {'type': 'file', 'file': {'file_data': f'data:{mtype};base64,{encoded}'}}
87110

88111
# %% ../nbs/00_core.ipynb
89112
def _add_cache_control(msg, # LiteLLM formatted msg
@@ -236,7 +259,7 @@ def cite_footnotes(stream_list):
236259
def _mk_prefill(pf): return ModelResponseStream([StreamingChoices(delta=Delta(content=pf,role='assistant'))])
237260

238261
# %% ../nbs/00_core.ipynb
239-
_final_prompt = "You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed."
262+
_final_prompt = dict(role="user", content="You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed.")
240263

241264
# %% ../nbs/00_core.ipynb
242265
class Chat:
@@ -271,7 +294,7 @@ def _prep_msg(self, msg=None, prefill=None):
271294
cache_idxs = L(self.cache_idxs).filter().map(lambda o: o-1 if o>0 else o)
272295
else:
273296
cache_idxs = self.cache_idxs
274-
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache, cache_idxs, self.ttl)
297+
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache and 'claude' in self.model, cache_idxs, self.ttl)
275298
pf = [{"role":"assistant","content":prefill}] if prefill else []
276299
return sp + self.hist + pf
277300

@@ -292,6 +315,7 @@ def _call(self, msg=None, prefill=None, temp=None, think=None, search=None, stre
292315
tools=self.tool_schemas, reasoning_effort = effort.get(think), tool_choice=tool_choice,
293316
# temperature is not supported when reasoning
294317
temperature=None if think else ifnone(temp,self.temp),
318+
caching=self.cache and 'claude' not in self.model,
295319
**kwargs)
296320
if stream:
297321
if prefill: yield _mk_prefill(prefill)
@@ -328,35 +352,6 @@ def __call__(self,
328352
elif return_all: return list(result_gen) # toolloop behavior
329353
else: return last(result_gen) # normal chat behavior
330354

331-
# %% ../nbs/00_core.ipynb
332-
@patch
333-
def print_hist(self:Chat):
334-
"Print each message on a different line"
335-
for r in self.hist: print(r, end='\n\n')
336-
337-
# %% ../nbs/00_core.ipynb
338-
def random_tool_id():
339-
"Generate a random tool ID with 'toolu_' prefix"
340-
random_part = ''.join(random.choices(string.ascii_letters + string.digits, k=25))
341-
return f'toolu_{random_part}'
342-
343-
# %% ../nbs/00_core.ipynb
344-
def mk_tc(func, args, tcid=None, idx=1):
345-
if not tcid: tcid = random_tool_id()
346-
return {'index': idx, 'function': {'arguments': args, 'name': func}, 'id': tcid, 'type': 'function'}
347-
348-
# %% ../nbs/00_core.ipynb
349-
def mk_tc_req(content, tcs):
350-
msg = Message(content=content, role='assistant', tool_calls=tcs, function_call=None)
351-
msg.tool_calls = [{**dict(tc), 'function': dict(tc['function'])} for tc in msg.tool_calls]
352-
return msg
353-
354-
# %% ../nbs/00_core.ipynb
355-
def mk_tc_result(tc, result): return {'tool_call_id': tc['id'], 'role': 'tool', 'name': tc['function']['name'], 'content': result}
356-
357-
# %% ../nbs/00_core.ipynb
358-
def mk_tc_results(tcq, results): return [mk_tc_result(a,b) for a,b in zip(tcq.tool_calls, results)]
359-
360355
# %% ../nbs/00_core.ipynb
361356
async def _alite_call_func(tc, ns, raise_on_err=True):
362357
try: fargs = json.loads(tc.function.arguments)
@@ -387,6 +382,7 @@ async def _call(self, msg=None, prefill=None, temp=None, think=None, search=None
387382
tools=self.tool_schemas, reasoning_effort=effort.get(think), tool_choice=tool_choice,
388383
# temperature is not supported when reasoning
389384
temperature=None if think else ifnone(temp,self.temp),
385+
caching=self.cache and 'claude' not in self.model,
390386
**kwargs)
391387
if stream:
392388
if prefill: yield _mk_prefill(prefill)
@@ -446,20 +442,18 @@ def mk_tr_details(tr, tc, mx=2000):
446442
# %% ../nbs/00_core.ipynb
447443
class AsyncStreamFormatter:
448444
def __init__(self, include_usage=False, mx=2000):
449-
self.outp,self.tcs,self.include_usage,self.think,self.mx = '',{},include_usage,False,mx
445+
self.outp,self.tcs,self.include_usage,self.mx = '',{},include_usage,mx
450446

451447
def format_item(self, o):
452448
"Format a single item from the response stream."
453449
res = ''
454450
if isinstance(o, ModelResponseStream):
455451
d = o.choices[0].delta
456-
if nested_idx(d, 'reasoning_content'):
457-
self.think = True
458-
res += '🧠'
459-
elif self.think:
460-
self.think = False
461-
res += '\n\n'
462-
if c:=d.content: res+=c
452+
if nested_idx(d, 'reasoning_content') and d['reasoning_content']!='{"text": ""}':
453+
res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠' # gemini can interleave reasoning
454+
elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
455+
if c:=d.content: # gemini has text content in last reasoning chunk
456+
res+=f"\n\n{c}" if res and res[-1] == '🧠' else c
463457
elif isinstance(o, ModelResponse):
464458
if self.include_usage: res += f"\nUsage: {o.usage}"
465459
if c:=getattr(contents(o),'tool_calls',None):

lisette/usage.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,17 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
2727
def _log_usage(self, response_obj, response_cost, start_time, end_time):
2828
usage = response_obj.usage
2929
ptd = usage.prompt_tokens_details
30-
self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
31-
total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens,
32-
cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
30+
self.usage.insert(Usage(timestamp=time.time(),
31+
model=response_obj.model,
32+
user_id=self.user_id_fn(),
33+
prompt_tokens=usage.prompt_tokens,
34+
completion_tokens=usage.completion_tokens,
35+
total_tokens=usage.total_tokens,
36+
cached_tokens=ptd.cached_tokens if ptd else 0, # used by gemini (read tokens)
37+
cache_creation_tokens=nested_idx(usage, 'cache_creation_input_tokens'),
38+
cache_read_tokens=nested_idx(usage, 'cache_read_input_tokens'), # used by anthropic
39+
web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'),
40+
response_cost=response_cost))
3341

3442
def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')
3543

0 commit comments

Comments
 (0)