Skip to content

Commit 4459917

Browse files
committed
gemini multimodal support
1 parent f58d741 commit 4459917

File tree

6 files changed

+1408
-1555
lines changed

6 files changed

+1408
-1555
lines changed

cachy.jsonl

Lines changed: 65 additions & 0 deletions
Large diffs are not rendered by default.

lisette/_modidx.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,8 @@
4343
'lisette/core.py'),
4444
'lisette.core.mk_msg': ('core.html#mk_msg', 'lisette/core.py'),
4545
'lisette.core.mk_msgs': ('core.html#mk_msgs', 'lisette/core.py'),
46-
'lisette.core.mk_tc': ('core.html#mk_tc', 'lisette/core.py'),
47-
'lisette.core.mk_tc_req': ('core.html#mk_tc_req', 'lisette/core.py'),
48-
'lisette.core.mk_tc_result': ('core.html#mk_tc_result', 'lisette/core.py'),
49-
'lisette.core.mk_tc_results': ('core.html#mk_tc_results', 'lisette/core.py'),
5046
'lisette.core.mk_tr_details': ('core.html#mk_tr_details', 'lisette/core.py'),
5147
'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
52-
'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
5348
'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
5449
'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py'),
5550
'lisette.core.structured': ('core.html#structured', 'lisette/core.py')},

lisette/core.py

Lines changed: 18 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# %% auto 0
66
__all__ = ['sonn45', 'opus45', 'detls_tag', 're_tools', 'effort', 'patch_litellm', 'remove_cache_ckpts', 'contents', 'mk_msg',
77
'fmt2hist', 'mk_msgs', 'stream_with_complete', 'lite_mk_func', 'ToolResponse', 'structured', 'cite_footnote',
8-
'cite_footnotes', 'Chat', 'random_tool_id', 'mk_tc', 'mk_tc_req', 'mk_tc_result', 'mk_tc_results',
9-
'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
8+
'cite_footnotes', 'Chat', 'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter',
9+
'adisplay_stream']
1010

1111
# %% ../nbs/00_core.ipynb
1212
import asyncio, base64, json, litellm, mimetypes, random, string
@@ -81,10 +81,12 @@ def _repr_markdown_(self: litellm.ModelResponse):
8181

8282
# %% ../nbs/00_core.ipynb
8383
def _bytes2content(data):
84-
"Convert bytes to litellm content dict (image or pdf)"
85-
mtype = 'application/pdf' if data[:4] == b'%PDF' else mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
86-
if not mtype: raise ValueError(f'Data must be image or PDF bytes, got {data[:10]}')
87-
return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{base64.b64encode(data).decode("utf-8")}'}
84+
"Convert bytes to litellm content dict (image, pdf, audio, video)"
85+
mtype = detect_mime(data)
86+
if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
87+
encoded = base64.b64encode(data).decode("utf-8")
88+
if mtype.startswith('image/'): return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{encoded}'}
89+
return {'type': 'file', 'file': {'file_data': f'data:{mtype};base64,{encoded}'}}
8890

8991
# %% ../nbs/00_core.ipynb
9092
def _add_cache_control(msg, # LiteLLM formatted msg
@@ -250,7 +252,7 @@ def cite_footnotes(stream_list):
250252
def _mk_prefill(pf): return ModelResponseStream([StreamingChoices(delta=Delta(content=pf,role='assistant'))])
251253

252254
# %% ../nbs/00_core.ipynb
253-
_final_prompt = "You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed."
255+
_final_prompt = dict(role="user", content="You have no more tool uses. Please summarize your findings. If you did not complete your goal please tell the user what further work needs to be done so they can choose how best to proceed.")
254256

255257
# %% ../nbs/00_core.ipynb
256258
class Chat:
@@ -285,7 +287,7 @@ def _prep_msg(self, msg=None, prefill=None):
285287
cache_idxs = L(self.cache_idxs).filter().map(lambda o: o-1 if o>0 else o)
286288
else:
287289
cache_idxs = self.cache_idxs
288-
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache, cache_idxs, self.ttl)
290+
if msg: self.hist = mk_msgs(self.hist+[msg], self.cache and 'claude' in self.model, cache_idxs, self.ttl)
289291
pf = [{"role":"assistant","content":prefill}] if prefill else []
290292
return sp + self.hist + pf
291293

@@ -306,6 +308,7 @@ def _call(self, msg=None, prefill=None, temp=None, think=None, search=None, stre
306308
tools=self.tool_schemas, reasoning_effort = effort.get(think), tool_choice=tool_choice,
307309
# temperature is not supported when reasoning
308310
temperature=None if think else ifnone(temp,self.temp),
311+
caching=self.cache and 'claude' not in self.model,
309312
**kwargs)
310313
if stream:
311314
if prefill: yield _mk_prefill(prefill)
@@ -348,29 +351,6 @@ def print_hist(self:Chat):
348351
"Print each message on a different line"
349352
for r in self.hist: print(r, end='\n\n')
350353

351-
# %% ../nbs/00_core.ipynb
352-
def random_tool_id():
353-
"Generate a random tool ID with 'toolu_' prefix"
354-
random_part = ''.join(random.choices(string.ascii_letters + string.digits, k=25))
355-
return f'toolu_{random_part}'
356-
357-
# %% ../nbs/00_core.ipynb
358-
def mk_tc(func, args, tcid=None, idx=1):
359-
if not tcid: tcid = random_tool_id()
360-
return {'index': idx, 'function': {'arguments': args, 'name': func}, 'id': tcid, 'type': 'function'}
361-
362-
# %% ../nbs/00_core.ipynb
363-
def mk_tc_req(content, tcs):
364-
msg = Message(content=content, role='assistant', tool_calls=tcs, function_call=None)
365-
msg.tool_calls = [{**dict(tc), 'function': dict(tc['function'])} for tc in msg.tool_calls]
366-
return msg
367-
368-
# %% ../nbs/00_core.ipynb
369-
def mk_tc_result(tc, result): return {'tool_call_id': tc['id'], 'role': 'tool', 'name': tc['function']['name'], 'content': result}
370-
371-
# %% ../nbs/00_core.ipynb
372-
def mk_tc_results(tcq, results): return [mk_tc_result(a,b) for a,b in zip(tcq.tool_calls, results)]
373-
374354
# %% ../nbs/00_core.ipynb
375355
async def _alite_call_func(tc, ns, raise_on_err=True):
376356
try: fargs = json.loads(tc.function.arguments)
@@ -401,6 +381,7 @@ async def _call(self, msg=None, prefill=None, temp=None, think=None, search=None
401381
tools=self.tool_schemas, reasoning_effort=effort.get(think), tool_choice=tool_choice,
402382
# temperature is not supported when reasoning
403383
temperature=None if think else ifnone(temp,self.temp),
384+
caching=self.cache and 'claude' not in self.model,
404385
**kwargs)
405386
if stream:
406387
if prefill: yield _mk_prefill(prefill)
@@ -460,20 +441,18 @@ def mk_tr_details(tr, tc, mx=2000):
460441
# %% ../nbs/00_core.ipynb
461442
class AsyncStreamFormatter:
462443
def __init__(self, include_usage=False, mx=2000):
463-
self.outp,self.tcs,self.include_usage,self.think,self.mx = '',{},include_usage,False,mx
444+
self.outp,self.tcs,self.include_usage,self.mx = '',{},include_usage,mx
464445

465446
def format_item(self, o):
466447
"Format a single item from the response stream."
467448
res = ''
468449
if isinstance(o, ModelResponseStream):
469450
d = o.choices[0].delta
470-
if nested_idx(d, 'reasoning_content'):
471-
self.think = True
472-
res += '🧠'
473-
elif self.think:
474-
self.think = False
475-
res += '\n\n'
476-
if c:=d.content: res+=c
451+
if nested_idx(d, 'reasoning_content') and d['reasoning_content']!='{"text": ""}':
452+
res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠' # gemini can interleave reasoning
453+
elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
454+
if c:=d.content: # gemini has text content in last reasoning chunk
455+
res+=f"\n\n{c}" if res and res[-1] == '🧠' else c
477456
elif isinstance(o, ModelResponse):
478457
if self.include_usage: res += f"\nUsage: {o.usage}"
479458
if c:=getattr(contents(o),'tool_calls',None):

lisette/usage.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,17 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
2626
def _log_usage(self, response_obj, response_cost, start_time, end_time):
2727
usage = response_obj.usage
2828
ptd = usage.prompt_tokens_details
29-
self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
30-
total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens,
31-
cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
29+
self.usage.insert(Usage(timestamp=time.time(),
30+
model=response_obj.model,
31+
user_id=self.user_id_fn(),
32+
prompt_tokens=usage.prompt_tokens,
33+
completion_tokens=usage.completion_tokens,
34+
total_tokens=usage.total_tokens,
35+
cached_tokens=ptd.cached_tokens if ptd else 0, # used by gemini (read tokens)
36+
cache_creation_tokens=nested_idx(usage, 'cache_creation_input_tokens'),
37+
cache_read_tokens=nested_idx(usage, 'cache_read_input_tokens'), # used by anthropic
38+
web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'),
39+
response_cost=response_cost))
3240

3341
def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')
3442

0 commit comments

Comments
 (0)