Skip to content

Commit 26eb593

Browse files
committed
feat: add token usage, response timing, and block properties
- Add TokenUsage and ResponseTiming models to MessageBlock - Populate usage from all provider engines (streaming and non-streaming) - Track completion timing in CompletionHandler - Add web_search_mode to MessageBlock and persist in DB - Add MessageBlockPropertiesDialog for viewing block metadata - ResponseTiming: add request_sent_at, first_token_at; TTFT, generation duration - completion_handler: capture request_sent_at, first_token_at for streaming - DB: persist usage_json, timing_json on message_blocks (migration 005) - Add usage_utils with provider-specific extractors; refactor all engines - Add Properties context menu item for message blocks (Alt+Enter)
1 parent 005bb35 commit 26eb593

22 files changed

+766
-59
lines changed

basilisk/completion_handler.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import re
1212
import threading
1313
import time
14+
from datetime import datetime
1415
from typing import TYPE_CHECKING, Any, Callable, Optional
1516

1617
import wx
@@ -153,6 +154,7 @@ def _handle_completion(self, engine: BaseEngine, **kwargs: dict[str, Any]):
153154
engine: The engine to use for completion
154155
kwargs: The keyword arguments for the completion request
155156
"""
157+
started_at = datetime.now()
156158
try:
157159
play_sound("progress", loop=True)
158160
response = engine.completion(**kwargs)
@@ -161,6 +163,11 @@ def _handle_completion(self, engine: BaseEngine, **kwargs: dict[str, Any]):
161163
wx.CallAfter(self._handle_error, str(e))
162164
return
163165

166+
# Request is fully sent when completion() returns (streaming: we have the stream)
167+
request_sent_at = (
168+
datetime.now() if kwargs.get("stream", False) else None
169+
)
170+
164171
handle_func = (
165172
self._handle_streaming_completion
166173
if kwargs.get("stream", False)
@@ -169,6 +176,8 @@ def _handle_completion(self, engine: BaseEngine, **kwargs: dict[str, Any]):
169176
self._last_completed_block = None
170177
kwargs["engine"] = engine
171178
kwargs["response"] = response
179+
kwargs["started_at"] = started_at
180+
kwargs["request_sent_at"] = request_sent_at
172181
try:
173182
success = handle_func(**kwargs)
174183
except Exception as e:
@@ -270,7 +279,12 @@ def _handle_streaming_completion(
270279
if self.on_stream_start:
271280
wx.CallAfter(self.on_stream_start, new_block, system_message)
272281

273-
for chunk in engine.completion_response_with_stream(response):
282+
first_token_at: datetime | None = None
283+
for chunk in engine.completion_response_with_stream(
284+
response, new_block=new_block
285+
):
286+
if first_token_at is None:
287+
first_token_at = datetime.now()
274288
if self._stop_completion or global_vars.app_should_exit:
275289
logger.debug("Stopping completion")
276290
return False
@@ -282,6 +296,17 @@ def _handle_streaming_completion(
282296
wx.CallAfter(self._handle_stream_buffer, f"\n{END_REASONING}\n\n")
283297
# Parse legacy ```think...``` format into reasoning + content
284298
self._split_reasoning_from_content(new_block)
299+
started_at = kwargs.get("started_at")
300+
request_sent_at = kwargs.get("request_sent_at")
301+
if started_at is not None:
302+
from basilisk.conversation.conversation_model import ResponseTiming
303+
304+
new_block.timing = ResponseTiming(
305+
started_at=started_at,
306+
request_sent_at=request_sent_at,
307+
first_token_at=first_token_at,
308+
finished_at=datetime.now(),
309+
)
285310
if self.on_stream_finish:
286311
wx.CallAfter(self.on_stream_finish, new_block)
287312
return True
@@ -306,9 +331,16 @@ def _handle_non_streaming_completion(
306331
Returns:
307332
True if non-streaming completion was handled successfully, False if stopped
308333
"""
334+
from basilisk.conversation.conversation_model import ResponseTiming
335+
309336
completed_block = engine.completion_response_without_stream(
310337
response=response, new_block=new_block, **kwargs
311338
)
339+
started_at = kwargs.get("started_at")
340+
if started_at is not None:
341+
completed_block.timing = ResponseTiming(
342+
started_at=started_at, finished_at=datetime.now()
343+
)
312344

313345
# Notify that non-streaming completion has finished
314346
if self.on_non_stream_finish:

basilisk/conversation/conversation_model.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,81 @@
2727
)
2828

2929

30+
class TokenUsage(BaseModel):
31+
"""Token consumption for a completion request.
32+
33+
Normalized across providers. All fields optional except where noted.
34+
"""
35+
36+
input_tokens: int = 0
37+
output_tokens: int = 0
38+
reasoning_tokens: int | None = None
39+
cached_input_tokens: int | None = None
40+
total_tokens: int | None = None
41+
42+
@property
43+
def effective_total(self) -> int:
44+
"""Total tokens (computed if not provided)."""
45+
if self.total_tokens is not None:
46+
return self.total_tokens
47+
return self.input_tokens + self.output_tokens
48+
49+
50+
class ResponseTiming(BaseModel):
51+
"""Timing for a completion request."""
52+
53+
started_at: datetime | None = None
54+
request_sent_at: datetime | None = None
55+
first_token_at: datetime | None = None
56+
finished_at: datetime | None = None
57+
58+
@property
59+
def duration_seconds(self) -> float | None:
60+
"""Total duration in seconds (start to last token), or None if incomplete."""
61+
if self.started_at is None or self.finished_at is None:
62+
return None
63+
return (self.finished_at - self.started_at).total_seconds()
64+
65+
@property
66+
def time_to_send_request_seconds(self) -> float | None:
67+
"""Time from start until request fully sent. None if request_sent_at unknown."""
68+
if (
69+
self.started_at is None
70+
or self.request_sent_at is None
71+
or self.request_sent_at < self.started_at
72+
):
73+
return None
74+
return (self.request_sent_at - self.started_at).total_seconds()
75+
76+
@property
77+
def time_to_first_token_seconds(self) -> float | None:
78+
"""Time from request sent to first token received (TTFT). None if unknown."""
79+
# Use request_sent_at when available, else started_at for backward compat
80+
from_ts = (
81+
self.request_sent_at
82+
if self.request_sent_at is not None
83+
else self.started_at
84+
)
85+
if (
86+
from_ts is None
87+
or self.first_token_at is None
88+
or self.first_token_at < from_ts
89+
):
90+
return None
91+
return (self.first_token_at - from_ts).total_seconds()
92+
93+
@property
94+
def generation_duration_seconds(self) -> float | None:
95+
"""Time from first token to last token (excludes TTFT). None if first_token_at unknown."""
96+
if (
97+
self.first_token_at is None
98+
or self.finished_at is None
99+
or self.finished_at < self.first_token_at
100+
):
101+
return None
102+
return (self.finished_at - self.first_token_at).total_seconds()
103+
104+
30105
class MessageRoleEnum(enum.StrEnum):
31106
"""Enumeration of the roles that a message can have in a conversation."""
32107

@@ -150,12 +225,15 @@ class MessageBlock(BaseModel):
150225
reasoning_budget_tokens: int | None = Field(default=None)
151226
reasoning_effort: str | None = Field(default=None)
152227
reasoning_adaptive: bool = Field(default=False)
228+
web_search_mode: bool = Field(default=False)
153229
output_modality: str = Field(default="text")
154230
audio_voice: str = Field(default="alloy")
155231
audio_format: str = Field(default="wav")
156232
created_at: datetime = Field(default_factory=datetime.now)
157233
updated_at: datetime = Field(default_factory=datetime.now)
158234
db_id: int | None = Field(default=None, exclude=True)
235+
usage: TokenUsage | None = Field(default=None)
236+
timing: ResponseTiming | None = Field(default=None)
159237

160238
@field_validator("response", mode="after")
161239
@classmethod

basilisk/conversation/database/manager.py

Lines changed: 74 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
Message,
2424
MessageBlock,
2525
MessageRoleEnum,
26+
ResponseTiming,
2627
SystemMessage,
28+
TokenUsage,
2729
)
2830
from basilisk.custom_types import PydanticOrderedSet
2931
from basilisk.provider_ai_model import AIModelInfo
@@ -385,6 +387,13 @@ def _create_db_block(
385387
csp_id: int | None,
386388
) -> DBMessageBlock:
387389
"""Create and flush a DBMessageBlock, updating block.db_id."""
390+
usage_json = None
391+
if block.usage:
392+
usage_json = block.usage.model_dump_json()
393+
timing_json = None
394+
if block.timing:
395+
timing_json = block.timing.model_dump_json()
396+
388397
db_block = DBMessageBlock(
389398
conversation_id=conv_id,
390399
position=block_index,
@@ -395,6 +404,9 @@ def _create_db_block(
395404
max_tokens=block.max_tokens,
396405
top_p=block.top_p,
397406
stream=block.stream,
407+
web_search_mode=getattr(block, "web_search_mode", False),
408+
usage_json=usage_json,
409+
timing_json=timing_json,
398410
created_at=block.created_at,
399411
updated_at=block.updated_at,
400412
)
@@ -667,6 +679,61 @@ def get_conversation_count(self, search: str | None = None) -> int:
667679
query = self._apply_search_filter(query, search)
668680
return session.execute(query).scalar_one()
669681

682+
def _load_block_from_db(
683+
self, db_block: DBMessageBlock
684+
) -> MessageBlock | None:
685+
"""Build a MessageBlock from a DBMessageBlock. Returns None if block has no request."""
686+
request_msg = None
687+
response_msg = None
688+
for db_msg in db_block.messages:
689+
if db_msg.role == "user":
690+
request_msg = self._load_message(db_msg)
691+
elif db_msg.role == "assistant":
692+
response_msg = self._load_message(db_msg)
693+
694+
if request_msg is None:
695+
log.warning("Block %d has no request, skipping", db_block.id)
696+
return None
697+
698+
system_index = None
699+
if db_block.system_prompt_link is not None:
700+
system_index = db_block.system_prompt_link.position
701+
702+
usage = None
703+
if getattr(db_block, "usage_json", None):
704+
try:
705+
usage = TokenUsage.model_validate_json(db_block.usage_json)
706+
except Exception:
707+
pass
708+
timing = None
709+
if getattr(db_block, "timing_json", None):
710+
try:
711+
timing = ResponseTiming.model_validate_json(
712+
db_block.timing_json
713+
)
714+
except Exception:
715+
pass
716+
717+
block = MessageBlock(
718+
request=request_msg,
719+
response=response_msg,
720+
system_index=system_index,
721+
model=AIModelInfo(
722+
provider_id=db_block.model_provider, model_id=db_block.model_id
723+
),
724+
temperature=db_block.temperature,
725+
max_tokens=db_block.max_tokens,
726+
top_p=db_block.top_p,
727+
stream=db_block.stream,
728+
web_search_mode=getattr(db_block, "web_search_mode", False),
729+
usage=usage,
730+
timing=timing,
731+
created_at=db_block.created_at,
732+
updated_at=db_block.updated_at,
733+
)
734+
block.db_id = db_block.id
735+
return block
736+
670737
def load_conversation(self, conv_id: int) -> Conversation:
671738
"""Load a conversation from the database.
672739
@@ -684,57 +751,20 @@ def load_conversation(self, conv_id: int) -> Conversation:
684751
if db_conv is None:
685752
raise ValueError(f"Conversation {conv_id} not found")
686753

687-
# Rebuild systems OrderedSet
688754
systems = PydanticOrderedSet[SystemMessage]()
689-
csp_links = sorted(
755+
for csp in sorted(
690756
db_conv.system_prompt_links, key=lambda x: x.position
691-
)
692-
for csp in csp_links:
757+
):
693758
sys_msg = SystemMessage(content=csp.system_prompt.content)
694759
sys_msg.db_id = csp.system_prompt.id
695760
systems.add(sys_msg)
696761

697-
# Rebuild message blocks
698762
blocks = []
699-
sorted_blocks = sorted(db_conv.blocks, key=lambda x: x.position)
700-
for db_block in sorted_blocks:
701-
# Find request and response messages
702-
request_msg = None
703-
response_msg = None
704-
for db_msg in db_block.messages:
705-
if db_msg.role == "user":
706-
request_msg = self._load_message(db_msg)
707-
elif db_msg.role == "assistant":
708-
response_msg = self._load_message(db_msg)
709-
710-
if request_msg is None:
711-
log.warning(
712-
"Block %d has no request, skipping", db_block.id
713-
)
714-
continue
715-
716-
# Determine system_index
717-
system_index = None
718-
if db_block.system_prompt_link is not None:
719-
system_index = db_block.system_prompt_link.position
720-
721-
block = MessageBlock(
722-
request=request_msg,
723-
response=response_msg,
724-
system_index=system_index,
725-
model=AIModelInfo(
726-
provider_id=db_block.model_provider,
727-
model_id=db_block.model_id,
728-
),
729-
temperature=db_block.temperature,
730-
max_tokens=db_block.max_tokens,
731-
top_p=db_block.top_p,
732-
stream=db_block.stream,
733-
created_at=db_block.created_at,
734-
updated_at=db_block.updated_at,
735-
)
736-
block.db_id = db_block.id
737-
blocks.append(block)
763+
for db_block in sorted(db_conv.blocks, key=lambda x: x.position):
764+
block = self._load_block_from_db(db_block)
765+
if block is not None:
766+
blocks.append(block)
767+
738768
return Conversation(
739769
messages=blocks,
740770
systems=systems,

basilisk/conversation/database/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ class DBMessageBlock(Base):
9898
max_tokens: Mapped[int] = mapped_column(default=4096)
9999
top_p: Mapped[float] = mapped_column(default=1.0)
100100
stream: Mapped[bool] = mapped_column(default=False)
101+
web_search_mode: Mapped[bool] = mapped_column(default=False)
102+
usage_json: Mapped[str | None] = mapped_column(default=None)
103+
timing_json: Mapped[str | None] = mapped_column(default=None)
101104
created_at: Mapped[datetime] = mapped_column(
102105
default=lambda: datetime.now(timezone.utc)
103106
)

basilisk/presenters/conversation_presenter.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ def get_new_message_block(self) -> MessageBlock | None:
174174
stream = view.stream_mode.GetValue()
175175
if audio_params.get("output_modality") == "audio":
176176
stream = False
177+
web_search = False
178+
if hasattr(view, "web_search_mode") and view.web_search_mode.IsShown():
179+
web_search = view.web_search_mode.GetValue()
177180
return MessageBlock(
178181
request=Message(
179182
role=MessageRoleEnum.USER,
@@ -187,6 +190,7 @@ def get_new_message_block(self) -> MessageBlock | None:
187190
top_p=view.top_p_spinner.GetValue(),
188191
max_tokens=view.max_tokens_spin_ctrl.GetValue(),
189192
stream=stream,
193+
web_search_mode=web_search,
190194
**reasoning_params,
191195
**audio_params,
192196
)
@@ -571,6 +575,9 @@ def _build_draft_block(self) -> MessageBlock | None:
571575
return None
572576
reasoning_params = get_reasoning_params_from_view(view)
573577
audio_params = get_audio_params_from_view(view)
578+
web_search = False
579+
if hasattr(view, "web_search_mode") and view.web_search_mode.IsShown():
580+
web_search = view.web_search_mode.GetValue()
574581
block = MessageBlock(
575582
request=Message(
576583
role=MessageRoleEnum.USER,
@@ -585,6 +592,7 @@ def _build_draft_block(self) -> MessageBlock | None:
585592
max_tokens=view.max_tokens_spin_ctrl.GetValue(),
586593
top_p=view.top_p_spinner.GetValue(),
587594
stream=view.stream_mode.GetValue(),
595+
web_search_mode=web_search,
588596
**reasoning_params,
589597
**audio_params,
590598
)

0 commit comments

Comments
 (0)