|
19 | 19 | "metadata": {}, |
20 | 20 | "source": [ |
21 | 21 | "### 📊 Quick-look\n", |
22 | | - "| Mode | Latency to **first token** | Best for (real examples) | What you still need to handle / key limits |\n", |
23 | | - "|--------------------------------|-------------------------|--------------------------------------------------------------|-----------------------------------------------------------|\n", |
24 | | - "| File upload + `stream=False` (blocking) | seconds | Voicemail, meeting recordings | • No partial results, users see nothing until file finishes <br>• Max 25 MB per request (you must chunk long audio) |\n", |
25 | | - "| File upload + `stream=True` | subseconds | Voice memos in mobile apps | • Still requires a completed file <br>• You implement progress bars / chunked uploads |\n", |
26 | | - "| Realtime WebSocket | subseconds | Live captions in webinars | • Audio must be pcm16, g711_ulaw, or g711_alaw <br>• Session ≤ 30 min, reconnect & stitch <br>• You handle speaker-turn formatting to build the full transcript |\n", |
27 | | - "| Agents SDK VoicePipeline | subseconds | Internal help-desk assistant | • Python-only beta <br>• API surface may change |" |
| 22 | + "| Mode | Latency to **first token** | Best for (real examples) | Advantages | What you still need to handle / key limits |\n", |
| 23 | + "|--------------------------------|---------------------------|--------------------------------------------------------------|-----------------------------------------------------------|-----------------------------------------------------------|\n", |
| 24 | + "| File upload + `stream=False` (blocking) | seconds | Voicemail, meeting recordings | Simple to set up | • No partial results, users see nothing until file finishes <br>• Max 25 MB per request (you must chunk long audio) |\n", |
| 25 | + "| File upload + `stream=True` | subseconds | Voice memos in mobile apps | Simple to set up & provides a “live” feel via token streaming | • Still requires a completed file <br>• You implement progress bars / chunked uploads |\n", |
| 26 | + "| Realtime WebSocket | subseconds | Live captions in webinars | True real-time; accepts a continuous audio stream | • Audio must be pcm16, g711_ulaw, or g711_alaw <br>• Session ≤ 30 min, reconnect & stitch <br>• You handle speaker-turn formatting to build the full transcript |\n", |
| 27 | + "| Agents SDK VoicePipeline | subseconds | Internal help-desk assistant | Real-time streaming and easy to build agentic workflows | • Python-only beta <br>• API surface may change |" |
28 | 28 | ] |
29 | 29 | }, |
30 | 30 | { |
|
88 | 88 | "import os\n", |
89 | 89 | "import time\n", |
90 | 90 | "from typing import List\n", |
| 91 | + "from pathlib import Path\n", |
91 | 92 | "\n", |
92 | 93 | "# ─── Third-Party ───────────────────────────────────────────────────────────────\n", |
93 | 94 | "import nest_asyncio\n", |
|
103 | 104 | " VoicePipeline,\n", |
104 | 105 | " VoicePipelineConfig,\n", |
105 | 106 | ")\n", |
| 107 | + "from IPython.display import Audio, display\n", |
106 | 108 | "# ───────────────────────────────────────────────────────────────────────────────\n", |
107 | 109 | "nest_asyncio.apply()\n", |
108 | 110 | "\n", |
|
184 | 186 | } |
185 | 187 | ], |
186 | 188 | "source": [ |
187 | | - "from IPython.display import Audio, display\n", |
188 | | - "from pathlib import Path\n", |
189 | 189 | "AUDIO_PATH = Path('./data/sample_audio_files/lotsoftimes-78085.mp3') # change me\n", |
190 | 190 | "MODEL_NAME = \"gpt-4o-transcribe\"\n", |
191 | 191 | "\n", |
|
617 | 617 | "[User]: Like these next few links.\n", |
618 | 618 | "[Assistant]: Comme ces quelques liens suivants." |
619 | 619 | ] |
| 620 | + }, |
| 621 | + { |
| 622 | + "name": "stderr", |
| 623 | + "output_type": "stream", |
| 624 | + "text": [ |
| 625 | + "Error processing turns: no close frame received or sent\n" |
| 626 | + ] |
620 | 627 | } |
621 | 628 | ], |
622 | 629 | "source": [ |
|
0 commit comments