Skip to content

Commit 7bcd2aa

Browse files
Add transcription example
1 parent 7a4cf6e commit 7bcd2aa

File tree

18 files changed

+947
-197
lines changed

18 files changed

+947
-197
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ jobs:
88
strategy: &python-matrix
99
matrix:
1010
python-version:
11-
- "3.10"
1211
- "3.11"
1312
- "3.12"
13+
- "3.13"
1414
name: static-checks
1515
steps:
1616
- name: Checkout code
@@ -28,7 +28,7 @@ jobs:
2828
enable-cache: true
2929

3030
- name: Install project dependencies
31-
run: uv sync --locked --all-extras --dev
31+
run: uv sync --locked --all-extras --dev --all-packages
3232

3333
- name: Format
3434
run: uv run format_check
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

examples/transcription/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Transcription demo
2+
3+
This directory contains a demo app, which uses [Fishjam](https://fishjam.io) and [Gemini Live API](https://ai.google.dev/gemini-api/docs/live)
4+
for real-time transcription of ongoing calls.
5+
6+
The application contains an HTTP server written in [FastAPI](https://fastapi.tiangolo.com/)
7+
and uses [uv](https://docs.astral.sh/uv/) for dependency management.
8+
9+
## Running
10+
11+
Make sure to [install uv](https://docs.astral.sh/uv/getting-started/installation/) if you don't have it already.
12+
13+
Once you have `uv` installed, fetch the dependencies with
14+
15+
```bash
16+
uv sync --all-packages
17+
```
18+
19+
To run the app, you will need 3 environment variables:
20+
21+
- `FISHJAM_ID`: Your Fishjam ID, which you can get on the [Fishjam website](https://fishjam.io/app)
22+
- `FISHJAM_MANAGEMENT_TOKEN`: Your Fishjam managemen token, which you can get on the [Fishjam website](https://fishjam.io/app)
23+
- `GEMINI_API_KEY`: An API key for the Gemini API. You can generate one on the [Gemini website](https://aistudio.google.com/app/apikey).
24+
25+
Once you have these variables, you can run the demo with
26+
27+
```bash
28+
FISHJAM_ID=<your-fishjam-id> \
29+
FISHJAM_MANAGEMENT_TOKEN=<your-management-token> \
30+
GEMINI_API_KEY=<your-api-token> \
31+
uv run fastapi dev
32+
```
33+
34+
Now, you can create peer tokens by going to <http://localhost:3000>.
35+
You can then use the [minimal-react](https://github.com/fishjam-cloud/web-client-sdk/tree/main/examples/react-client)
36+
demo app to connect as these peers and see your transcriptions live in the console!

examples/transcription/main.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from contextlib import asynccontextmanager
2+
from typing import Annotated
3+
4+
from fastapi import Depends, FastAPI
5+
from transcription.notifier import make_notifier
6+
from transcription.room import RoomService, fishjam
7+
from transcription.worker import async_worker
8+
9+
from fishjam import PeerOptions, SubscribeOptions
10+
11+
_room_service: RoomService | None = None
12+
13+
14+
def get_room_service():
15+
if not _room_service:
16+
raise RuntimeError("Application skipped lifespan events!")
17+
return _room_service
18+
19+
20+
@asynccontextmanager
21+
async def lifespan(_app: FastAPI):
22+
async with async_worker() as worker:
23+
global _room_service
24+
_room_service = RoomService(worker)
25+
notifier = make_notifier(_room_service)
26+
worker.run_in_background(notifier.connect())
27+
28+
yield
29+
30+
31+
app = FastAPI(lifespan=lifespan)
32+
33+
34+
@app.get("/")
35+
def get_peer(room_service: Annotated[RoomService, Depends(get_room_service)]):
36+
_peer, token = fishjam.create_peer(
37+
room_service.get_room().id,
38+
PeerOptions(subscribe=SubscribeOptions()),
39+
)
40+
return token
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[project]
2+
name = "transcription"
3+
version = "0.1.0"
4+
description = "Fishjam transcription demo"
5+
readme = "README.md"
6+
requires-python = ">=3.11"
7+
dependencies = [
8+
"fastapi[standard]==0.116.0",
9+
"fishjam-server-sdk",
10+
"google-genai>=1.31.0",
11+
]
12+
13+
[tool.uv.sources]
14+
fishjam-server-sdk = { workspace = true }

examples/transcription/transcription/__init__.py

Whitespace-only changes.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import asyncio
2+
3+
from fishjam.agent import Agent, AgentResponseTrackData
4+
from transcription.worker import BackgroundWorker
5+
6+
from .transcription import TranscriptionSession
7+
8+
9+
class TranscriptionAgent:
10+
def __init__(self, room_id: str, agent: Agent, worker: BackgroundWorker):
11+
self._room_id = room_id
12+
self._agent = agent
13+
self._sessions: dict[str, TranscriptionSession] = {}
14+
self._disconnect = asyncio.Event()
15+
self._worker = worker
16+
17+
@agent.on_track_data
18+
def _(track_data: AgentResponseTrackData):
19+
if track_data.peer_id not in self._sessions:
20+
return
21+
self._sessions[track_data.peer_id].transcribe(track_data.data)
22+
23+
async def _start(self):
24+
async with self._agent:
25+
print(f"Agent connected to room {self._room_id}")
26+
await self._disconnect.wait()
27+
self._disconnect.clear()
28+
print(f"Agent disconnected from room {self._room_id}")
29+
30+
def _stop(self):
31+
self._disconnect.set()
32+
33+
def _handle_transcription(self, peer_id: str, text: str):
34+
print(f"Peer {peer_id} in room {self._room_id} said: {text}")
35+
36+
def on_peer_enter(self, peer_id: str):
37+
if peer_id in self._sessions:
38+
return
39+
40+
if len(self._sessions) == 0:
41+
self._worker.run_in_background(self._start())
42+
43+
session = TranscriptionSession(lambda t: self._handle_transcription(peer_id, t))
44+
self._sessions[peer_id] = session
45+
self._worker.run_in_background(session.start(peer_id))
46+
47+
def on_peer_leave(self, peer_id: str):
48+
if peer_id not in self._sessions:
49+
return
50+
51+
self._sessions[peer_id].end()
52+
self._sessions.pop(peer_id)
53+
54+
if len(self._sessions) == 0:
55+
self._stop()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import os
2+
3+
from google.genai.types import AudioTranscriptionConfig, LiveConnectConfig, Modality
4+
5+
FISHJAM_ID = os.environ["FISHJAM_ID"]
6+
FISHJAM_TOKEN = os.environ["FISHJAM_MANAGEMENT_TOKEN"]
7+
FISHJAM_URL = os.getenv("FISHJAM_URL")
8+
TRANSCRIPTION_MODEL = "gemini-live-2.5-flash-preview"
9+
TRANSCRIPTION_CONFIG = LiveConnectConfig(
10+
response_modalities=[Modality.TEXT],
11+
input_audio_transcription=AudioTranscriptionConfig(),
12+
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from fishjam import FishjamNotifier
2+
from fishjam.events import ServerMessagePeerConnected, ServerMessagePeerDisconnected
3+
from fishjam.events.allowed_notifications import AllowedNotification
4+
5+
from .config import FISHJAM_ID, FISHJAM_TOKEN, FISHJAM_URL
6+
from .room import RoomService
7+
8+
9+
def make_notifier(room_service: RoomService):
10+
notifier = FishjamNotifier(FISHJAM_ID, FISHJAM_TOKEN, fishjam_url=FISHJAM_URL)
11+
12+
@notifier.on_server_notification
13+
def _(notification: AllowedNotification):
14+
match notification:
15+
case ServerMessagePeerConnected(peer_id=peer_id, room_id=room_id):
16+
handle_peer_connected(peer_id, room_id)
17+
18+
case ServerMessagePeerDisconnected(peer_id=peer_id, room_id=room_id):
19+
handle_peer_disconnected(peer_id, room_id)
20+
21+
def handle_peer_connected(peer_id: str, room_id: str):
22+
if room_id == room_service.room.id:
23+
room_service.agent.on_peer_enter(peer_id)
24+
25+
def handle_peer_disconnected(peer_id: str, room_id: str):
26+
if room_id == room_service.room.id:
27+
room_service.agent.on_peer_leave(peer_id)
28+
29+
return notifier
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from fishjam import FishjamClient, Room
2+
from fishjam.errors import NotFoundError
3+
from transcription.worker import BackgroundWorker
4+
5+
from .agent import TranscriptionAgent
6+
from .config import FISHJAM_ID, FISHJAM_TOKEN, FISHJAM_URL
7+
8+
fishjam = FishjamClient(
9+
FISHJAM_ID,
10+
FISHJAM_TOKEN,
11+
fishjam_url=FISHJAM_URL,
12+
)
13+
14+
15+
class RoomService:
16+
def __init__(self, worker: BackgroundWorker):
17+
self._worker = worker
18+
self._create_room()
19+
20+
def get_room(self) -> Room:
21+
try:
22+
self.room = fishjam.get_room(self.room.id)
23+
except NotFoundError:
24+
self._create_room()
25+
return self.room
26+
27+
def _create_room(self):
28+
self.room = fishjam.create_room()
29+
self._create_agent()
30+
31+
def _create_agent(self):
32+
self.agent = TranscriptionAgent(
33+
self.room.id,
34+
fishjam.create_agent(self.room.id),
35+
self._worker,
36+
)
37+
38+
def get_agent(self):
39+
return self.agent

0 commit comments

Comments
 (0)