|
80 | 80 | "outputs": [], |
81 | 81 | "source": [ |
82 | 82 | "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl\n", |
83 | | - "%pip install --quiet git+https://github.com/mozilla-ai/document-to-podcast.git@text-to-speech-model\n", |
| 83 | + "%pip install --quiet git+https://github.com/mozilla-ai/document-to-podcast.git\n", |
84 | 84 | "%pip install --quiet phonemizer" |
85 | 85 | ] |
86 | 86 | }, |
|
163 | 163 | "print(clean_text[:200])" |
164 | 164 | ] |
165 | 165 | }, |
166 | | - { |
167 | | - "cell_type": "markdown", |
168 | | - "metadata": {}, |
169 | | - "source": [ |
170 | | - "## Downloading and loading models" |
171 | | - ] |
172 | | - }, |
173 | 166 | { |
174 | 167 | "cell_type": "markdown", |
175 | 168 | "metadata": {}, |
176 | 169 | "source": [ |
177 | 170 | "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-2-podcast-script-generation)" |
178 | 171 | ] |
179 | 172 | }, |
180 | | - { |
181 | | - "cell_type": "markdown", |
182 | | - "metadata": {}, |
183 | | - "source": [ |
184 | | - "For this demo, we are using the following models:\n", |
185 | | - " - [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n", |
186 | | - " - [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)" |
187 | | - ] |
188 | | - }, |
189 | | - { |
190 | | - "cell_type": "markdown", |
191 | | - "metadata": {}, |
192 | | - "source": [ |
193 | | - "You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/) for more information on how to use different models." |
194 | | - ] |
195 | | - }, |
196 | | - { |
197 | | - "cell_type": "code", |
198 | | - "execution_count": null, |
199 | | - "metadata": {}, |
200 | | - "outputs": [], |
201 | | - "source": [ |
202 | | - "from document_to_podcast.inference.model_loaders import (\n", |
203 | | - " load_llama_cpp_model,\n", |
204 | | - " load_tts_model,\n", |
205 | | - ")\n", |
206 | | - "\n", |
207 | | - "text_model = load_llama_cpp_model(\n", |
208 | | - " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", |
209 | | - ")\n", |
210 | | - "speech_model = load_tts_model(\"hexgrad/kLegacy/v0.19/kokoro-v0_19.pth\")" |
211 | | - ] |
212 | | - }, |
213 | | - { |
214 | | - "cell_type": "code", |
215 | | - "execution_count": null, |
216 | | - "metadata": {}, |
217 | | - "outputs": [], |
218 | | - "source": [ |
219 | | - "max_characters = text_model.n_ctx() * 4\n", |
220 | | - "if len(clean_text) > max_characters:\n", |
221 | | - " print(\n", |
222 | | - " f\"Input text is too big ({len(clean_text)}).\"\n", |
223 | | - " f\" Using only a subset of it ({max_characters}).\"\n", |
224 | | - " )\n", |
225 | | - " clean_text = clean_text[:max_characters]" |
226 | | - ] |
227 | | - }, |
228 | 173 | { |
229 | 174 | "cell_type": "markdown", |
230 | 175 | "metadata": {}, |
|
310 | 255 | "print(system_prompt)" |
311 | 256 | ] |
312 | 257 | }, |
| 258 | + { |
| 259 | + "cell_type": "markdown", |
| 260 | + "metadata": {}, |
| 261 | + "source": [ |
| 262 | + "## Downloading and loading models" |
| 263 | + ] |
| 264 | + }, |
| 265 | + { |
| 266 | + "cell_type": "markdown", |
| 267 | + "metadata": {}, |
| 268 | + "source": [ |
| 269 | + "For this demo, we are using the following models:\n", |
| 270 | + " - [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n", |
| 271 | + " - [hexgrad/Kokoro-82M](https://github.com/hexgrad/kokoro)" |
| 272 | + ] |
| 273 | + }, |
| 274 | + { |
| 275 | + "cell_type": "markdown", |
| 276 | + "metadata": {}, |
| 277 | + "source": [ |
| 278 | + "You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/) for more information on how to use different models." |
| 279 | + ] |
| 280 | + }, |
| 281 | + { |
| 282 | + "cell_type": "code", |
| 283 | + "execution_count": null, |
| 284 | + "metadata": {}, |
| 285 | + "outputs": [], |
| 286 | + "source": [ |
| 287 | + "from document_to_podcast.inference.model_loaders import (\n", |
| 288 | + " load_llama_cpp_model,\n", |
| 289 | + " load_tts_model,\n", |
| 290 | + ")\n", |
| 291 | + "\n", |
| 292 | + "if speakers[0][\"voice_profile\"][0] != speakers[1][\"voice_profile\"][0]:\n", |
| 293 | + " raise ValueError(\n", |
| 294 | + " \"Both Kokoro speakers need to have the same language code. \"\n", |
| 295 | + " \"More info here https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md\"\n", |
| 296 | + " )\n", |
| 297 | + "# Get which language is used for generation from the first character of the Kokoro voice profile\n", |
| 298 | + "language_code = speakers[0][\"voice_profile\"][0]\n", |
| 299 | + "\n", |
| 300 | + "text_model = load_llama_cpp_model(\n", |
| 301 | + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", |
| 302 | + ")\n", |
| 303 | + "speech_model = load_tts_model(\"hexgrad/Kokoro-82M\", **{\"lang_code\": language_code})" |
| 304 | + ] |
| 305 | + }, |
| 306 | + { |
| 307 | + "cell_type": "code", |
| 308 | + "execution_count": null, |
| 309 | + "metadata": {}, |
| 310 | + "outputs": [], |
| 311 | + "source": [ |
| 312 | + "max_characters = text_model.n_ctx() * 4\n", |
| 313 | + "if len(clean_text) > max_characters:\n", |
| 314 | + " print(\n", |
| 315 | + " f\"Input text is too big ({len(clean_text)}).\"\n", |
| 316 | + " f\" Using only a subset of it ({max_characters}).\"\n", |
| 317 | + " )\n", |
| 318 | + " clean_text = clean_text[:max_characters]" |
| 319 | + ] |
| 320 | + }, |
313 | 321 | { |
314 | 322 | "cell_type": "markdown", |
315 | 323 | "metadata": {}, |
|
405 | 413 | } |
406 | 414 | ], |
407 | 415 | "metadata": { |
| 416 | + "kernelspec": { |
| 417 | + "display_name": "Python 3 (ipykernel)", |
| 418 | + "language": "python", |
| 419 | + "name": "python3" |
| 420 | + }, |
408 | 421 | "language_info": { |
409 | | - "name": "python" |
| 422 | + "codemirror_mode": { |
| 423 | + "name": "ipython", |
| 424 | + "version": 3 |
| 425 | + }, |
| 426 | + "file_extension": ".py", |
| 427 | + "mimetype": "text/x-python", |
| 428 | + "name": "python", |
| 429 | + "nbconvert_exporter": "python", |
| 430 | + "pygments_lexer": "ipython3", |
| 431 | + "version": "3.12.3" |
410 | 432 | } |
411 | 433 | }, |
412 | 434 | "nbformat": 4, |
413 | | - "nbformat_minor": 0 |
| 435 | + "nbformat_minor": 4 |
414 | 436 | } |
0 commit comments