diff --git a/examples/transcription/transcription/config.py b/examples/transcription/transcription/config.py index 827ce37..cb0c13d 100644 --- a/examples/transcription/transcription/config.py +++ b/examples/transcription/transcription/config.py @@ -4,8 +4,8 @@ FISHJAM_ID = os.getenv("FISHJAM_ID", "") FISHJAM_TOKEN = os.environ["FISHJAM_MANAGEMENT_TOKEN"] -TRANSCRIPTION_MODEL = "gemini-live-2.5-flash-preview" +TRANSCRIPTION_MODEL = "gemini-2.5-flash-native-audio-preview-09-2025" TRANSCRIPTION_CONFIG = LiveConnectConfig( - response_modalities=[Modality.TEXT], + response_modalities=[Modality.AUDIO], input_audio_transcription=AudioTranscriptionConfig(), ) diff --git a/examples/transcription/transcription/room.py b/examples/transcription/transcription/room.py index 4a02ae1..d5fd571 100644 --- a/examples/transcription/transcription/room.py +++ b/examples/transcription/transcription/room.py @@ -1,5 +1,6 @@ -from fishjam import FishjamClient, Room +from fishjam import AgentOptions, FishjamClient, Room from fishjam.errors import NotFoundError +from fishjam.integrations.gemini import GeminiIntegration from transcription.worker import BackgroundWorker from .agent import TranscriptionAgent @@ -27,7 +28,10 @@ def _create_room(self): def _create_agent(self): self.agent = TranscriptionAgent( self.room.id, - fishjam.create_agent(self.room.id), + fishjam.create_agent( + self.room.id, + AgentOptions(output=GeminiIntegration.GEMINI_INPUT_AUDIO_SETTINGS), + ), self._worker, ) diff --git a/examples/transcription/transcription/transcription.py b/examples/transcription/transcription/transcription.py index ee8e72d..b71905d 100644 --- a/examples/transcription/transcription/transcription.py +++ b/examples/transcription/transcription/transcription.py @@ -1,16 +1,17 @@ from asyncio import Event, Queue, TaskGroup from typing import Callable -from google import genai from google.genai.live import AsyncSession from google.genai.types import Blob +from fishjam.integrations.gemini import GeminiIntegration + from .config import TRANSCRIPTION_CONFIG, TRANSCRIPTION_MODEL class TranscriptionSession: def __init__(self, on_text: Callable[[str], None]): - self._gemini = genai.Client() + self._gemini = GeminiIntegration.create_client() self._audio_queue = Queue[bytes]() self._end_event = Event() self._model = TRANSCRIPTION_MODEL @@ -43,7 +44,10 @@ async def _send_loop(self, session: AsyncSession): while True: audio_frame = await self._audio_queue.get() await session.send_realtime_input( - audio=Blob(data=audio_frame, mime_type="audio/pcm;rate=16000") + audio=Blob( + data=audio_frame, + mime_type=GeminiIntegration.GEMINI_AUDIO_MIME_TYPE, + ) ) async def _recv_loop(self, session: AsyncSession): diff --git a/fishjam/__init__.py b/fishjam/__init__.py index 3778e85..ed2ca4b 100644 --- a/fishjam/__init__.py +++ b/fishjam/__init__.py @@ -8,7 +8,7 @@ # pylint: disable=locally-disabled, no-name-in-module, import-error # Exceptions and Server Messages -from fishjam import agent, errors, events, peer, room +from fishjam import agent, errors, events, integrations, peer, room, version from fishjam._openapi_client.models import PeerMetadata # API @@ -24,6 +24,8 @@ RoomOptions, ) +__version__ = version.__version__ + __all__ = [ "FishjamClient", "FishjamNotifier", @@ -40,6 +42,8 @@ "room", "peer", "agent", + "integrations", ] + __docformat__ = "restructuredtext" diff --git a/fishjam/integrations/__init__.py b/fishjam/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py new file mode 100644 index 0000000..7731452 --- /dev/null +++ b/fishjam/integrations/gemini.py @@ -0,0 +1,116 @@ +try: + from google import genai + from google.auth.credentials import Credentials + from google.genai import types + from google.genai.client import DebugConfig +except ImportError: + raise ImportError( + "To use the Fishjam Gemini integration, you need to import the `gemini` extra. " + "Install it with `pip install 'fishjam-server-sdk[gemini]'`" + ) + +from typing import Optional, Union + +from fishjam import AgentOutputOptions +from fishjam.agent import OutgoingAudioTrackOptions +from fishjam.events import TrackEncoding +from fishjam.version import get_version + + +def _get_headers(): + return {"x-goog-api-client": f"fishjam-python-server-sdk/{get_version()}"} + + +def _add_fishjam_header( + http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]], +) -> Union[types.HttpOptions, types.HttpOptionsDict]: + if http_options is None: + return _add_fishjam_header_none() + if isinstance(http_options, types.HttpOptions): + return _add_fishjam_header_object(http_options) + return _add_fishjam_header_dict(http_options) + + +def _add_fishjam_header_object(http_options: types.HttpOptions) -> types.HttpOptions: + http_options.headers = (http_options.headers or {}) | _get_headers() + return http_options + + +def _add_fishjam_header_dict( + http_options: types.HttpOptionsDict, +) -> types.HttpOptionsDict: + headers = (http_options.get("headers") or {}) | _get_headers() + return http_options | {"headers": headers} + + +def _add_fishjam_header_none() -> types.HttpOptionsDict: + return {"headers": _get_headers()} + + +class _GeminiIntegration: + def create_client( + self, + vertexai: Optional[bool] = None, + api_key: Optional[str] = None, + credentials: Optional[Credentials] = None, + project: Optional[str] = None, + location: Optional[str] = None, + debug_config: Optional[DebugConfig] = None, + http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]] = None, + ): + """Creates and configures a Fishjam-compatible Google GenAI Client. + + See `genai.Client` for configuration options. + + Returns: + genai.Client: An instantiated and configured Gemini client. + """ + full_http_options = _add_fishjam_header(http_options) + + return genai.Client( + vertexai=vertexai, + api_key=api_key, + credentials=credentials, + project=project, + location=location, + debug_config=debug_config, + http_options=full_http_options, + ) + + @property + def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions: + """Audio configuration required for Gemini input. + + Gemini consumes PCM16 audio at 16,000 Hz. + + Returns: + AgentOutputOptions: Agent options compatible with the Gemini Live API. + """ + return AgentOutputOptions( + audio_format="pcm16", + audio_sample_rate=16_000, + ) + + @property + def GEMINI_OUTPUT_AUDIO_SETTINGS(self) -> OutgoingAudioTrackOptions: + """Audio configuration for an agent's output track. + + Gemini produces PCM16 audio at 24,000 Hz. + + Returns: + OutgoingAudioTrackOptions: Track options compatible with the Gemini Live API + """ + return OutgoingAudioTrackOptions( + encoding=TrackEncoding.TRACK_ENCODING_PCM16, + sample_rate=24_000, + channels=1, + ) + + @property + def GEMINI_AUDIO_MIME_TYPE(self) -> str: + """The mime type for Gemini audio input.""" + return "audio/pcm;rate=16000" + + +GeminiIntegration = _GeminiIntegration() +"""Integration with the Gemini Live API.""" diff --git a/fishjam/version.py b/fishjam/version.py new file mode 100644 index 0000000..1f076a2 --- /dev/null +++ b/fishjam/version.py @@ -0,0 +1,7 @@ +from importlib.metadata import version + +__version__ = version("fishjam-server-sdk") + + +def get_version(): + return __version__ diff --git a/pyproject.toml b/pyproject.toml index aa8c356..f1c1669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,9 @@ generate_docusaurus = "scripts:generate_docusaurus" update_client = "scripts:update_client" room_manager = "scripts:start_room_manager" +[project.optional-dependencies] +gemini = ["google-genai>=1.43.0"] + [dependency-groups] dev = [ "betterproto[compiler]== 2.0.0b6", @@ -55,7 +58,12 @@ test = [ default-groups = ["dev", "test"] [tool.uv.workspace] -members = ["examples/transcription", ".", "examples/poet_chat", "examples/selective_subscription"] +members = [ + "examples/transcription", + ".", + "examples/poet_chat", + "examples/selective_subscription", +] [tool.hatch.build.targets.sdist] include = ["fishjam"] @@ -86,8 +94,6 @@ convention = "google" "scripts.py" = ["D"] - - [tool.pytest.ini_options] markers = [ "file_component_sources: Tests requiring files uploaded for File Component", diff --git a/tests/Dockerfile b/tests/Dockerfile index 4a7c2a0..52c6349 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -17,8 +17,6 @@ COPY examples/selective_subscription/pyproject.toml ./examples/selective_subscri COPY uv.lock . -RUN uv sync --locked --no-install-project +RUN uv sync --locked --no-install-project --all-extras COPY . /app - -RUN uv sync --locked diff --git a/tests/test_gemini.py b/tests/test_gemini.py new file mode 100644 index 0000000..da55498 --- /dev/null +++ b/tests/test_gemini.py @@ -0,0 +1,116 @@ +from unittest.mock import MagicMock, patch + +import pytest +from google.genai import types + +from fishjam.integrations.gemini import GeminiIntegration +from fishjam.version import get_version + + +@pytest.fixture +def version(): + return get_version() + + +@patch("google.genai.Client") +def test_create_client_passes_all_args(mock_client_cls: MagicMock, version: str): + dummy_credentials = MagicMock() + dummy_debug_config = MagicMock() + + GeminiIntegration.create_client( + vertexai=True, + api_key="test-key", + credentials=dummy_credentials, + project="my-project", + location="us-central1", + debug_config=dummy_debug_config, + ) + + mock_client_cls.assert_called_once() + + kwargs = mock_client_cls.call_args.kwargs + + assert kwargs["vertexai"] is True + assert kwargs["api_key"] == "test-key" + assert kwargs["credentials"] is dummy_credentials + assert kwargs["project"] == "my-project" + assert kwargs["location"] == "us-central1" + assert kwargs["debug_config"] is dummy_debug_config + + assert kwargs["http_options"] == { + "headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"} + } + + +@patch("google.genai.Client") +def test_create_client_with_dict_options_no_headers( + mock_client_cls: MagicMock, version: str +): + GeminiIntegration.create_client(http_options={"timeout": 30}) + + mock_client_cls.assert_called_once() + + assert mock_client_cls.call_args.kwargs["http_options"] == { + "timeout": 30, + "headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"}, + } + + +@patch("google.genai.Client") +def test_create_client_with_dict_options_existing_headers( + mock_client_cls: MagicMock, version: str +): + GeminiIntegration.create_client( + http_options={ + "headers": { + "existing-header": "value", + "x-goog-api-client": "other", + } + } + ) + + mock_client_cls.assert_called_once() + + assert mock_client_cls.call_args.kwargs["http_options"] == { + "headers": { + "existing-header": "value", + "x-goog-api-client": f"fishjam-python-server-sdk/{version}", + }, + } + + +@patch("google.genai.Client") +def test_create_client_with_object_options(mock_client_cls: MagicMock, version: str): + http_options = types.HttpOptions() + + GeminiIntegration.create_client(http_options=http_options) + + mock_client_cls.assert_called_once() + + # Verify the object passed has the correct headers set + actual_options = mock_client_cls.call_args.kwargs["http_options"] + assert actual_options.headers == { + "x-goog-api-client": f"fishjam-python-server-sdk/{version}" + } + + +@patch("google.genai.Client") +def test_create_client_with_object_options_existing_headers( + mock_client_cls: MagicMock, version: str +): + http_options = types.HttpOptions( + headers={ + "user-header": "123", + "x-goog-api-client": "other", + } + ) + + GeminiIntegration.create_client(http_options=http_options) + + mock_client_cls.assert_called_once() + + actual_options = mock_client_cls.call_args.kwargs["http_options"] + assert actual_options.headers == { + "user-header": "123", + "x-goog-api-client": f"fishjam-python-server-sdk/{version}", + } diff --git a/uv.lock b/uv.lock index 377ea3f..a946687 100644 --- a/uv.lock +++ b/uv.lock @@ -308,6 +308,11 @@ dependencies = [ { name = "websockets" }, ] +[package.optional-dependencies] +gemini = [ + { name = "google-genai" }, +] + [package.dev-dependencies] dev = [ { name = "betterproto", extra = ["compiler"] }, @@ -332,11 +337,13 @@ requires-dist = [ { name = "attrs", specifier = ">=21.3.0" }, { name = "betterproto", specifier = "==2.0.0b6" }, { name = "flask-cors", specifier = ">=6.0.1,<7" }, + { name = "google-genai", marker = "extra == 'gemini'", specifier = ">=1.43.0" }, { name = "httpx", specifier = ">=0.28.0,<0.29.0" }, { name = "python-dateutil", specifier = ">=2.8.2,<3" }, { name = "urllib3", specifier = ">=1.25.3,<2" }, { name = "websockets", specifier = "~=15.0" }, ] +provides-extras = ["gemini"] [package.metadata.requires-dev] dev = [