Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions basilisk/audio_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Utilities for playing audio from base64-encoded data."""

from __future__ import annotations

import base64
import logging
import tempfile

from basilisk.sound_manager import play_sound

log = logging.getLogger(__name__)

# sound_manager plays WAV via sounddevice. For other formats we'd need
# conversion. For now we only support WAV playback.
_SUPPORTED_PLAYBACK = {"wav"}


def play_audio_from_base64(data: str, format: str = "wav") -> None:
"""Decode base64 audio, write to temp file, and play.

Args:
data: Base64-encoded audio bytes.
format: Audio format (wav, mp3, etc). Only wav is supported for
playback; other formats are stored but not played.
"""
if not data:
return
if format.lower() not in _SUPPORTED_PLAYBACK:
log.debug(
"Audio format %s not supported for playback; only wav supported",
format,
)
return
try:
raw = base64.b64decode(data)
suffix = f".{format}" if not format.startswith(".") else format
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
path = tmp.name
play_sound(path)
# Note: temp file is not deleted immediately since play_sound
# returns before playback finishes. OS temp cleanup will remove it.
except Exception as exc:
log.error("Failed to play audio: %s", exc, exc_info=True)
105 changes: 101 additions & 4 deletions basilisk/completion_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,17 @@
import re
import threading
import time
from datetime import datetime
from typing import TYPE_CHECKING, Any, Callable, Optional

import wx

from basilisk import global_vars
from basilisk.conversation.content_utils import (
END_REASONING,
START_BLOCK_REASONING,
split_reasoning_and_content,
)
from basilisk.conversation.conversation_model import (
Conversation,
Message,
Expand Down Expand Up @@ -77,8 +83,10 @@ def __init__(
self.on_non_stream_finish = on_non_stream_finish
self.task: Optional[threading.Thread] = None
self._stop_completion = False
self._last_completed_block: Optional[MessageBlock] = None
self.last_time = 0
self.stream_buffer: str = ""
self._stream_reasoning_started: bool = False

@ensure_no_task_running
def start_completion(
Expand Down Expand Up @@ -146,6 +154,7 @@ def _handle_completion(self, engine: BaseEngine, **kwargs: dict[str, Any]):
engine: The engine to use for completion
kwargs: The keyword arguments for the completion request
"""
started_at = datetime.now()
try:
play_sound("progress", loop=True)
response = engine.completion(**kwargs)
Expand All @@ -154,13 +163,21 @@ def _handle_completion(self, engine: BaseEngine, **kwargs: dict[str, Any]):
wx.CallAfter(self._handle_error, str(e))
return

# Request is fully sent when completion() returns (streaming: we have the stream)
request_sent_at = (
datetime.now() if kwargs.get("stream", False) else None
)

handle_func = (
self._handle_streaming_completion
if kwargs.get("stream", False)
else self._handle_non_streaming_completion
)
self._last_completed_block = None
kwargs["engine"] = engine
kwargs["response"] = response
kwargs["started_at"] = started_at
kwargs["request_sent_at"] = request_sent_at
try:
success = handle_func(**kwargs)
except Exception as e:
Expand All @@ -182,6 +199,28 @@ def _handle_stream_chunk(
if not message_block.response.citations:
message_block.response.citations = []
message_block.response.citations.append(chunk_data)
elif chunk_type == "reasoning":
message_block.response.reasoning = (
message_block.response.reasoning or ""
) + chunk_data
if not self._stream_reasoning_started:
self._stream_reasoning_started = True
wx.CallAfter(
self._handle_stream_buffer,
f"{START_BLOCK_REASONING}\n{chunk_data}",
)
else:
wx.CallAfter(self._handle_stream_buffer, chunk_data)
elif chunk_type == "content":
message_block.response.content += chunk_data
if self._stream_reasoning_started:
self._stream_reasoning_started = False
wx.CallAfter(
self._handle_stream_buffer,
f"\n{END_REASONING}\n\n{chunk_data}",
)
else:
wx.CallAfter(self._handle_stream_buffer, chunk_data)
else:
logger.warning(
"Unknown chunk type in streaming response: %s", chunk_type
Expand All @@ -197,6 +236,20 @@ def flush_stream_buffer(self, message_block: MessageBlock) -> None:
wx.CallAfter(self._handle_stream_buffer, self.stream_buffer)
self.stream_buffer = ""

def _split_reasoning_from_content(
self, message_block: MessageBlock
) -> None:
"""Parse legacy ```think...``` format into reasoning and content."""
if not message_block.response:
return
reasoning, content = split_reasoning_and_content(
message_block.response.content
)
if reasoning is not None:
message_block.response = message_block.response.model_copy(
update={"reasoning": reasoning, "content": content}
)

def _handle_streaming_completion(
self,
engine: BaseEngine,
Expand All @@ -217,20 +270,43 @@ def _handle_streaming_completion(
Returns:
True if streaming was handled successfully, False if stopped
"""
new_block.response = Message(role=MessageRoleEnum.ASSISTANT, content="")
new_block.response = Message(
role=MessageRoleEnum.ASSISTANT, content="", reasoning=None
)
self._stream_reasoning_started = False

# Notify that streaming has started
if self.on_stream_start:
wx.CallAfter(self.on_stream_start, new_block, system_message)

for chunk in engine.completion_response_with_stream(response):
first_token_at: datetime | None = None
for chunk in engine.completion_response_with_stream(
response, new_block=new_block
):
if first_token_at is None:
first_token_at = datetime.now()
if self._stop_completion or global_vars.app_should_exit:
logger.debug("Stopping completion")
return False
self._handle_stream_chunk(chunk, new_block)

# Notify that streaming has finished
self.flush_stream_buffer(new_block)
if self._stream_reasoning_started:
wx.CallAfter(self._handle_stream_buffer, f"\n{END_REASONING}\n\n")
# Parse legacy ```think...``` format into reasoning + content
self._split_reasoning_from_content(new_block)
started_at = kwargs.get("started_at")
request_sent_at = kwargs.get("request_sent_at")
if started_at is not None:
from basilisk.conversation.conversation_model import ResponseTiming

new_block.timing = ResponseTiming(
started_at=started_at,
request_sent_at=request_sent_at,
first_token_at=first_token_at,
finished_at=datetime.now(),
)
if self.on_stream_finish:
wx.CallAfter(self.on_stream_finish, new_block)
return True
Expand All @@ -255,16 +331,27 @@ def _handle_non_streaming_completion(
Returns:
True if non-streaming completion was handled successfully, False if stopped
"""
from basilisk.conversation.conversation_model import ResponseTiming

completed_block = engine.completion_response_without_stream(
response=response, new_block=new_block, **kwargs
)
started_at = kwargs.get("started_at")
if started_at is not None:
completed_block.timing = ResponseTiming(
started_at=started_at, finished_at=datetime.now()
)

# Notify that non-streaming completion has finished
if self.on_non_stream_finish:
wx.CallAfter(
self.on_non_stream_finish, completed_block, system_message
)

# Pass block so _completion_finished_success can skip completion sound
# when response is audio (play_sound in on_non_stream_finish already
# stopped progress; we must not stop_sound again or we interrupt audio)
self._last_completed_block = completed_block
return True

def _handle_stream_buffer(self, buffer: str):
Expand All @@ -284,8 +371,18 @@ def _handle_stream_buffer(self, buffer: str):

def _completion_finished_success(self):
"""Handle completion finish in success on the main thread."""
stop_sound()
play_sound("chat_response_received")
block = getattr(self, "_last_completed_block", None)
has_audio = (
block
and block.response
and getattr(block.response, "audio_data", None)
)
if not has_audio:
stop_sound()
play_sound("chat_response_received")
# When has_audio: progress was already stopped when audio started;
# do not call stop_sound() or play chime, or we'd interrupt playback
self._last_completed_block = None
if self.on_completion_end:
self.on_completion_end(True)
self.task = None
Expand Down
11 changes: 11 additions & 0 deletions basilisk/config/conversation_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ class ConversationProfile(BaseModel):
temperature: Optional[float] = Field(default=None)
top_p: Optional[float] = Field(default=None)
stream_mode: bool = Field(default=True)
reasoning_mode: bool = Field(default=False)
reasoning_budget_tokens: Optional[int] = Field(default=None)
reasoning_effort: Optional[str] = Field(default=None)
reasoning_adaptive: bool = Field(default=False)
output_modality: str = Field(default="text")
audio_voice: str = Field(default="alloy")
audio_format: str = Field(default="wav")

def __init__(self, **data: Any):
"""Initialize a conversation profile with the provided data.
Expand Down Expand Up @@ -252,6 +259,10 @@ def check_model_params(self) -> ConversationProfile:
raise ValueError("Temperature must be None without model")
if self.top_p is not None:
raise ValueError("Top P must be None without model")
if self.reasoning_budget_tokens is not None:
raise ValueError("Reasoning budget must be None without model")
if self.reasoning_effort is not None:
raise ValueError("Reasoning effort must be None without model")
return self


Expand Down
7 changes: 7 additions & 0 deletions basilisk/config/main_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ class GeneralSettings(BaseModel):
"""General settings for BasiliskLLM."""

language: str = Field(default="auto")
model_metadata_cache_ttl_seconds: int = Field(
default=3600,
ge=60,
le=86400,
description="TTL for model metadata cache",
)
advanced_mode: bool = Field(default=False)
log_level: LogLevelEnum = Field(default=LogLevelEnum.INFO)
automatic_update_mode: AutomaticUpdateModeEnum = Field(
Expand All @@ -49,6 +55,7 @@ class ConversationSettings(BaseModel):
auto_save_draft: bool = Field(default=True)
reopen_last_conversation: bool = Field(default=False)
last_active_conversation_id: int | None = Field(default=None)
show_reasoning_blocks: bool = Field(default=True)


class ImagesSettings(BaseModel):
Expand Down
69 changes: 69 additions & 0 deletions basilisk/conversation/content_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Utilities for message content processing."""

from __future__ import annotations

import re

START_BLOCK_REASONING = "<think>"
END_REASONING = "</think>"

_THINK_BLOCK_PATTERN = re.compile(r"```think\s*\n(.*?)\n```\s*", re.DOTALL)
_REASONING_BLOCK_PATTERN = re.compile(
rf"{re.escape(START_BLOCK_REASONING)}\s*\n(.*?)\n{re.escape(END_REASONING)}\s*",
re.DOTALL,
)


def split_reasoning_and_content(text: str) -> tuple[str | None, str]:
"""Split content into reasoning and official response.

Handles legacy format where reasoning was concatenated as ```think...```
before the response. Used when loading from DB or after streaming.

Args:
text: Content that may contain ```think...``` block.

Returns:
Tuple of (reasoning, content). If no think block, returns (None, text).
"""
if not text:
return None, text or ""
match = _THINK_BLOCK_PATTERN.search(text)
if not match:
return None, text
reasoning = match.group(1).strip()
content = (_THINK_BLOCK_PATTERN.sub("", text) or "").strip()
return reasoning or None, content


def format_response_for_display(
reasoning: str | None, content: str, show_reasoning: bool
) -> str:
"""Format response for display (reasoning + content or content only)."""
if show_reasoning and reasoning:
return f"{START_BLOCK_REASONING}\n{reasoning}\n{END_REASONING}\n\n{content}"
return content


def split_reasoning_and_content_from_display(
text: str,
) -> tuple[str | None, str]:
"""Split display text (<think>...</think> format) into reasoning and content.

Used when parsing user-edited response text (e.g. in edit block dialog).

Args:
text: Display text that may contain <think>...</think> block.

Returns:
Tuple of (reasoning, content). If no block, returns (None, text).
"""
if not text:
return None, text or ""
# Try <think> format first, then legacy ```think
match = _REASONING_BLOCK_PATTERN.search(text)
if not match:
return split_reasoning_and_content(text)
reasoning = match.group(1).strip()
content = (_REASONING_BLOCK_PATTERN.sub("", text) or "").strip()
return reasoning or None, content
Loading
Loading