|
19 | 19 | "metadata": {},
|
20 | 20 | "source": [
|
21 | 21 | "### 📊 Quick-look\n",
|
22 |
| - "| Mode | Latency to **first token** | Best for (real examples) | What you still need to handle / key limits |\n", |
23 |
| - "|--------------------------------|-------------------------|--------------------------------------------------------------|-----------------------------------------------------------|\n", |
24 |
| - "| File upload + `stream=False` (blocking) | seconds | Voicemail, meeting recordings | • No partial results, users see nothing until file finishes <br>• Max 25 MB per request (you must chunk long audio) |\n", |
25 |
| - "| File upload + `stream=True` | subseconds | Voice memos in mobile apps | • Still requires a completed file <br>• You implement progress bars / chunked uploads |\n", |
26 |
| - "| Realtime WebSocket | subseconds | Live captions in webinars | • Audio must be pcm16, g711_ulaw, or g711_alaw <br>• Session ≤ 30 min, reconnect & stitch <br>• You handle speaker-turn formatting to build the full transcript |\n", |
27 |
| - "| Agents SDK VoicePipeline | subseconds | Internal help-desk assistant | • Python-only beta <br>• API surface may change |" |
| 22 | + "| Mode | Latency to **first token** | Best for (real examples) | Advantages | What you still need to handle / key limits |\n", |
| 23 | + "|--------------------------------|---------------------------|--------------------------------------------------------------|-----------------------------------------------------------|-----------------------------------------------------------|\n", |
| 24 | + "| File upload + `stream=False` (blocking) | seconds | Voicemail, meeting recordings | Simple to set up | • No partial results, users see nothing until file finishes <br>• Max 25 MB per request (you must chunk long audio) |\n", |
| 25 | + "| File upload + `stream=True` | subseconds | Voice memos in mobile apps | Simple to set up & provides a “live” feel via token streaming | • Still requires a completed file <br>• You implement progress bars / chunked uploads |\n", |
| 26 | + "| Realtime WebSocket | subseconds | Live captions in webinars | True real-time; accepts a continuous audio stream | • Audio must be pcm16, g711_ulaw, or g711_alaw <br>• Session ≤ 30 min, reconnect & stitch <br>• You handle speaker-turn formatting to build the full transcript |\n", |
| 27 | + "| Agents SDK VoicePipeline | subseconds | Internal help-desk assistant | Real-time streaming and easy to build agentic workflows | • Python-only beta <br>• API surface may change |" |
28 | 28 | ]
|
29 | 29 | },
|
30 | 30 | {
|
|
88 | 88 | "import os\n",
|
89 | 89 | "import time\n",
|
90 | 90 | "from typing import List\n",
|
| 91 | + "from pathlib import Path\n", |
91 | 92 | "\n",
|
92 | 93 | "# ─── Third-Party ───────────────────────────────────────────────────────────────\n",
|
93 | 94 | "import nest_asyncio\n",
|
|
103 | 104 | " VoicePipeline,\n",
|
104 | 105 | " VoicePipelineConfig,\n",
|
105 | 106 | ")\n",
|
| 107 | + "from IPython.display import Audio, display\n", |
106 | 108 | "# ───────────────────────────────────────────────────────────────────────────────\n",
|
107 | 109 | "nest_asyncio.apply()\n",
|
108 | 110 | "\n",
|
|
184 | 186 | }
|
185 | 187 | ],
|
186 | 188 | "source": [
|
187 |
| - "from IPython.display import Audio, display\n", |
188 |
| - "from pathlib import Path\n", |
189 | 189 | "AUDIO_PATH = Path('./data/sample_audio_files/lotsoftimes-78085.mp3') # change me\n",
|
190 | 190 | "MODEL_NAME = \"gpt-4o-transcribe\"\n",
|
191 | 191 | "\n",
|
|
617 | 617 | "[User]: Like these next few links.\n",
|
618 | 618 | "[Assistant]: Comme ces quelques liens suivants."
|
619 | 619 | ]
|
| 620 | + }, |
| 621 | + { |
| 622 | + "name": "stderr", |
| 623 | + "output_type": "stream", |
| 624 | + "text": [ |
| 625 | + "Error processing turns: no close frame received or sent\n" |
| 626 | + ] |
620 | 627 | }
|
621 | 628 | ],
|
622 | 629 | "source": [
|
|
0 commit comments