Skip to content

Commit 09afd37

Browse files
Add Gemini integration with optional dependencies (#57)
* Add Gemini integration with optional dependencies * Change exports * Add tests * Update example * Formatting * Install extras in tests * Add mime type * Add output settings
1 parent 8183f0a commit 09afd37

File tree

11 files changed

+276
-14
lines changed

11 files changed

+276
-14
lines changed

examples/transcription/transcription/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
FISHJAM_ID = os.getenv("FISHJAM_ID", "")
66
FISHJAM_TOKEN = os.environ["FISHJAM_MANAGEMENT_TOKEN"]
7-
TRANSCRIPTION_MODEL = "gemini-live-2.5-flash-preview"
7+
TRANSCRIPTION_MODEL = "gemini-2.5-flash-native-audio-preview-09-2025"
88
TRANSCRIPTION_CONFIG = LiveConnectConfig(
9-
response_modalities=[Modality.TEXT],
9+
response_modalities=[Modality.AUDIO],
1010
input_audio_transcription=AudioTranscriptionConfig(),
1111
)

examples/transcription/transcription/room.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from fishjam import FishjamClient, Room
1+
from fishjam import AgentOptions, FishjamClient, Room
22
from fishjam.errors import NotFoundError
3+
from fishjam.integrations.gemini import GeminiIntegration
34
from transcription.worker import BackgroundWorker
45

56
from .agent import TranscriptionAgent
@@ -27,7 +28,10 @@ def _create_room(self):
2728
def _create_agent(self):
2829
self.agent = TranscriptionAgent(
2930
self.room.id,
30-
fishjam.create_agent(self.room.id),
31+
fishjam.create_agent(
32+
self.room.id,
33+
AgentOptions(output=GeminiIntegration.GEMINI_INPUT_AUDIO_SETTINGS),
34+
),
3135
self._worker,
3236
)
3337

examples/transcription/transcription/transcription.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
from asyncio import Event, Queue, TaskGroup
22
from typing import Callable
33

4-
from google import genai
54
from google.genai.live import AsyncSession
65
from google.genai.types import Blob
76

7+
from fishjam.integrations.gemini import GeminiIntegration
8+
89
from .config import TRANSCRIPTION_CONFIG, TRANSCRIPTION_MODEL
910

1011

1112
class TranscriptionSession:
1213
def __init__(self, on_text: Callable[[str], None]):
13-
self._gemini = genai.Client()
14+
self._gemini = GeminiIntegration.create_client()
1415
self._audio_queue = Queue[bytes]()
1516
self._end_event = Event()
1617
self._model = TRANSCRIPTION_MODEL
@@ -43,7 +44,10 @@ async def _send_loop(self, session: AsyncSession):
4344
while True:
4445
audio_frame = await self._audio_queue.get()
4546
await session.send_realtime_input(
46-
audio=Blob(data=audio_frame, mime_type="audio/pcm;rate=16000")
47+
audio=Blob(
48+
data=audio_frame,
49+
mime_type=GeminiIntegration.GEMINI_AUDIO_MIME_TYPE,
50+
)
4751
)
4852

4953
async def _recv_loop(self, session: AsyncSession):

fishjam/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# pylint: disable=locally-disabled, no-name-in-module, import-error
99

1010
# Exceptions and Server Messages
11-
from fishjam import agent, errors, events, peer, room
11+
from fishjam import agent, errors, events, integrations, peer, room, version
1212
from fishjam._openapi_client.models import PeerMetadata
1313

1414
# API
@@ -24,6 +24,8 @@
2424
RoomOptions,
2525
)
2626

27+
__version__ = version.__version__
28+
2729
__all__ = [
2830
"FishjamClient",
2931
"FishjamNotifier",
@@ -40,6 +42,8 @@
4042
"room",
4143
"peer",
4244
"agent",
45+
"integrations",
4346
]
4447

48+
4549
__docformat__ = "restructuredtext"

fishjam/integrations/__init__.py

Whitespace-only changes.

fishjam/integrations/gemini.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
try:
2+
from google import genai
3+
from google.auth.credentials import Credentials
4+
from google.genai import types
5+
from google.genai.client import DebugConfig
6+
except ImportError:
7+
raise ImportError(
8+
"To use the Fishjam Gemini integration, you need to import the `gemini` extra. "
9+
"Install it with `pip install 'fishjam-server-sdk[gemini]'`"
10+
)
11+
12+
from typing import Optional, Union
13+
14+
from fishjam import AgentOutputOptions
15+
from fishjam.agent import OutgoingAudioTrackOptions
16+
from fishjam.events import TrackEncoding
17+
from fishjam.version import get_version
18+
19+
20+
def _get_headers():
21+
return {"x-goog-api-client": f"fishjam-python-server-sdk/{get_version()}"}
22+
23+
24+
def _add_fishjam_header(
25+
http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]],
26+
) -> Union[types.HttpOptions, types.HttpOptionsDict]:
27+
if http_options is None:
28+
return _add_fishjam_header_none()
29+
if isinstance(http_options, types.HttpOptions):
30+
return _add_fishjam_header_object(http_options)
31+
return _add_fishjam_header_dict(http_options)
32+
33+
34+
def _add_fishjam_header_object(http_options: types.HttpOptions) -> types.HttpOptions:
35+
http_options.headers = (http_options.headers or {}) | _get_headers()
36+
return http_options
37+
38+
39+
def _add_fishjam_header_dict(
40+
http_options: types.HttpOptionsDict,
41+
) -> types.HttpOptionsDict:
42+
headers = (http_options.get("headers") or {}) | _get_headers()
43+
return http_options | {"headers": headers}
44+
45+
46+
def _add_fishjam_header_none() -> types.HttpOptionsDict:
47+
return {"headers": _get_headers()}
48+
49+
50+
class _GeminiIntegration:
51+
def create_client(
52+
self,
53+
vertexai: Optional[bool] = None,
54+
api_key: Optional[str] = None,
55+
credentials: Optional[Credentials] = None,
56+
project: Optional[str] = None,
57+
location: Optional[str] = None,
58+
debug_config: Optional[DebugConfig] = None,
59+
http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]] = None,
60+
):
61+
"""Creates and configures a Fishjam-compatible Google GenAI Client.
62+
63+
See `genai.Client` for configuration options.
64+
65+
Returns:
66+
genai.Client: An instantiated and configured Gemini client.
67+
"""
68+
full_http_options = _add_fishjam_header(http_options)
69+
70+
return genai.Client(
71+
vertexai=vertexai,
72+
api_key=api_key,
73+
credentials=credentials,
74+
project=project,
75+
location=location,
76+
debug_config=debug_config,
77+
http_options=full_http_options,
78+
)
79+
80+
@property
81+
def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions:
82+
"""Audio configuration required for Gemini input.
83+
84+
Gemini consumes PCM16 audio at 16,000 Hz.
85+
86+
Returns:
87+
AgentOutputOptions: Agent options compatible with the Gemini Live API.
88+
"""
89+
return AgentOutputOptions(
90+
audio_format="pcm16",
91+
audio_sample_rate=16_000,
92+
)
93+
94+
@property
95+
def GEMINI_OUTPUT_AUDIO_SETTINGS(self) -> OutgoingAudioTrackOptions:
96+
"""Audio configuration for an agent's output track.
97+
98+
Gemini produces PCM16 audio at 24,000 Hz.
99+
100+
Returns:
101+
OutgoingAudioTrackOptions: Track options compatible with the Gemini Live API
102+
"""
103+
return OutgoingAudioTrackOptions(
104+
encoding=TrackEncoding.TRACK_ENCODING_PCM16,
105+
sample_rate=24_000,
106+
channels=1,
107+
)
108+
109+
@property
110+
def GEMINI_AUDIO_MIME_TYPE(self) -> str:
111+
"""The mime type for Gemini audio input."""
112+
return "audio/pcm;rate=16000"
113+
114+
115+
GeminiIntegration = _GeminiIntegration()
116+
"""Integration with the Gemini Live API."""

fishjam/version.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from importlib.metadata import version
2+
3+
__version__ = version("fishjam-server-sdk")
4+
5+
6+
def get_version():
7+
return __version__

pyproject.toml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ generate_docusaurus = "scripts:generate_docusaurus"
3333
update_client = "scripts:update_client"
3434
room_manager = "scripts:start_room_manager"
3535

36+
[project.optional-dependencies]
37+
gemini = ["google-genai>=1.43.0"]
38+
3639
[dependency-groups]
3740
dev = [
3841
"betterproto[compiler]== 2.0.0b6",
@@ -55,7 +58,12 @@ test = [
5558
default-groups = ["dev", "test"]
5659

5760
[tool.uv.workspace]
58-
members = ["examples/transcription", ".", "examples/poet_chat", "examples/selective_subscription"]
61+
members = [
62+
"examples/transcription",
63+
".",
64+
"examples/poet_chat",
65+
"examples/selective_subscription",
66+
]
5967

6068
[tool.hatch.build.targets.sdist]
6169
include = ["fishjam"]
@@ -86,8 +94,6 @@ convention = "google"
8694
"scripts.py" = ["D"]
8795

8896

89-
90-
9197
[tool.pytest.ini_options]
9298
markers = [
9399
"file_component_sources: Tests requiring files uploaded for File Component",

tests/Dockerfile

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ COPY examples/selective_subscription/pyproject.toml ./examples/selective_subscri
1717

1818
COPY uv.lock .
1919

20-
RUN uv sync --locked --no-install-project
20+
RUN uv sync --locked --no-install-project --all-extras
2121

2222
COPY . /app
23-
24-
RUN uv sync --locked

tests/test_gemini.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import pytest
4+
from google.genai import types
5+
6+
from fishjam.integrations.gemini import GeminiIntegration
7+
from fishjam.version import get_version
8+
9+
10+
@pytest.fixture
11+
def version():
12+
return get_version()
13+
14+
15+
@patch("google.genai.Client")
16+
def test_create_client_passes_all_args(mock_client_cls: MagicMock, version: str):
17+
dummy_credentials = MagicMock()
18+
dummy_debug_config = MagicMock()
19+
20+
GeminiIntegration.create_client(
21+
vertexai=True,
22+
api_key="test-key",
23+
credentials=dummy_credentials,
24+
project="my-project",
25+
location="us-central1",
26+
debug_config=dummy_debug_config,
27+
)
28+
29+
mock_client_cls.assert_called_once()
30+
31+
kwargs = mock_client_cls.call_args.kwargs
32+
33+
assert kwargs["vertexai"] is True
34+
assert kwargs["api_key"] == "test-key"
35+
assert kwargs["credentials"] is dummy_credentials
36+
assert kwargs["project"] == "my-project"
37+
assert kwargs["location"] == "us-central1"
38+
assert kwargs["debug_config"] is dummy_debug_config
39+
40+
assert kwargs["http_options"] == {
41+
"headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"}
42+
}
43+
44+
45+
@patch("google.genai.Client")
46+
def test_create_client_with_dict_options_no_headers(
47+
mock_client_cls: MagicMock, version: str
48+
):
49+
GeminiIntegration.create_client(http_options={"timeout": 30})
50+
51+
mock_client_cls.assert_called_once()
52+
53+
assert mock_client_cls.call_args.kwargs["http_options"] == {
54+
"timeout": 30,
55+
"headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"},
56+
}
57+
58+
59+
@patch("google.genai.Client")
60+
def test_create_client_with_dict_options_existing_headers(
61+
mock_client_cls: MagicMock, version: str
62+
):
63+
GeminiIntegration.create_client(
64+
http_options={
65+
"headers": {
66+
"existing-header": "value",
67+
"x-goog-api-client": "other",
68+
}
69+
}
70+
)
71+
72+
mock_client_cls.assert_called_once()
73+
74+
assert mock_client_cls.call_args.kwargs["http_options"] == {
75+
"headers": {
76+
"existing-header": "value",
77+
"x-goog-api-client": f"fishjam-python-server-sdk/{version}",
78+
},
79+
}
80+
81+
82+
@patch("google.genai.Client")
83+
def test_create_client_with_object_options(mock_client_cls: MagicMock, version: str):
84+
http_options = types.HttpOptions()
85+
86+
GeminiIntegration.create_client(http_options=http_options)
87+
88+
mock_client_cls.assert_called_once()
89+
90+
# Verify the object passed has the correct headers set
91+
actual_options = mock_client_cls.call_args.kwargs["http_options"]
92+
assert actual_options.headers == {
93+
"x-goog-api-client": f"fishjam-python-server-sdk/{version}"
94+
}
95+
96+
97+
@patch("google.genai.Client")
98+
def test_create_client_with_object_options_existing_headers(
99+
mock_client_cls: MagicMock, version: str
100+
):
101+
http_options = types.HttpOptions(
102+
headers={
103+
"user-header": "123",
104+
"x-goog-api-client": "other",
105+
}
106+
)
107+
108+
GeminiIntegration.create_client(http_options=http_options)
109+
110+
mock_client_cls.assert_called_once()
111+
112+
actual_options = mock_client_cls.call_args.kwargs["http_options"]
113+
assert actual_options.headers == {
114+
"user-header": "123",
115+
"x-goog-api-client": f"fishjam-python-server-sdk/{version}",
116+
}

0 commit comments

Comments
 (0)