enh(text-to-text): Update default to Qwen2.5-3B-Instruct. (#93)

daavoo · web-flow · commit 82f77794c852 · 2025-01-16T15:02:04.000+01:00
It has 2x context lenght and has better score in https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ The architecture of this codebase focuses on modularity and adaptability, meanin
 
 ### text-to-text
 
-We are using the [llama.cpp](https://github.com/ggerganov/llama.cpp) library, which supports open source models optimized for local inference and minimal hardware requirements. The default text-to-text model in this repo is the open source [OLMoE-7B-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct) from [AllenAI](https://allenai.org/).
+We are using the [llama.cpp](https://github.com/ggerganov/llama.cpp) library, which supports open source models optimized for local inference and minimal hardware requirements. The default text-to-text model in this repo is the open source [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF).
 
 For the complete list of models supported out-of-the-box, visit this [link](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#text-only).
 
diff --git a/demo/app.py b/demo/app.py
@@ -22,7 +22,7 @@
 @st.cache_resource
 def load_text_to_text_model():
     return load_llama_cpp_model(
-        model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"
+        model_id="bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
     )
 
 
@@ -117,7 +117,7 @@ def gen_button_clicked():
 
     st.markdown(
         "For this demo, we are using the following models: \n"
-        "- [OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct-GGUF)\n"
+        "- [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n"
         "- [OuteAI/OuteTTS-0.2-500M](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
     )
     st.markdown(
diff --git a/demo/notebook.ipynb b/demo/notebook.ipynb
@@ -172,7 +172,7 @@
    "metadata": {},
    "source": [
     "For this demo, we are using the following models:\n",
-    "  - [OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct-GGUF)\n",
+    "  - [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n",
     "  - [OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
    ]
   },
@@ -195,7 +195,7 @@
     ")\n",
     "\n",
     "text_model = load_llama_cpp_model(\n",
-    "    \"allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf\"\n",
+    "    \"bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf\"\n",
     ")\n",
     "speech_model = load_tts_model(\"OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf\")"
    ]
diff --git a/docs/step-by-step-guide.md b/docs/step-by-step-guide.md
@@ -106,7 +106,7 @@ from document_to_podcast.inference.text_to_text import text_to_text, text_to_tex
 
 # Load the model
 model = load_llama_cpp_model(
-    "allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"
+    "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
 )
 
 # Define your input and system prompt
diff --git a/example_data/config.yaml b/example_data/config.yaml
@@ -1,6 +1,6 @@
 input_file: "example_data/introducing-mozilla-ai-investing-in-trustworthy-ai.html"
 output_folder: "example_data/"
-text_to_text_model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"
+text_to_text_model: "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
 text_to_speech_model: "OuteAI/OuteTTS-0.1-350M-GGUF/OuteTTS-0.1-350M-FP16.gguf"
 text_to_text_prompt: |
   You are a podcast scriptwriter generating engaging and natural-sounding conversations in JSON format. 
diff --git a/src/document_to_podcast/cli.py b/src/document_to_podcast/cli.py
@@ -11,7 +11,6 @@
     Speaker,
     DEFAULT_PROMPT,
     DEFAULT_SPEAKERS,
-    TTS_LOADERS,
 )
 from document_to_podcast.inference.model_loaders import (
     load_llama_cpp_model,
@@ -27,9 +26,9 @@
 def document_to_podcast(
     input_file: str | None = None,
     output_folder: str | None = None,
-    text_to_text_model: str = "allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf",
+    text_to_text_model: str = "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf",
     text_to_text_prompt: str = DEFAULT_PROMPT,
-    text_to_speech_model: TTS_LOADERS = "OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf",
+    text_to_speech_model: str = "OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf",
     speakers: list[Speaker] | None = None,
     outetts_language: str = "en",  # Only applicable to OuteTTS models
     from_config: str | None = None,
@@ -59,7 +58,7 @@ def document_to_podcast(
 
             Need to be a gguf file.
 
-            Defaults to `allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf`.
+            Defaults to `bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf`.
 
         text_to_text_prompt (str, optional): The prompt for the text-to-text model.
             Defaults to DEFAULT_PROMPT.
diff --git a/src/document_to_podcast/inference/model_loaders.py b/src/document_to_podcast/inference/model_loaders.py
@@ -10,7 +10,7 @@ def load_llama_cpp_model(model_id: str) -> Llama:
     Loads the given model_id using Llama.from_pretrained.
 
     Examples:
-        >>> model = load_llama_cpp_model("allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf")
+        >>> model = load_llama_cpp_model("bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf")
 
     Args:
         model_id (str): The model id to load.

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`@st.cache_resource`
`23`	`23`	`def load_text_to_text_model():`
`24`	`24`	`return load_llama_cpp_model(`
`25`		`- model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"`
	`25`	`+ model_id="bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"`
`26`	`26`	`)`
`27`	`27`
`28`	`28`
`@@ -117,7 +117,7 @@ def gen_button_clicked():`
`117`	`117`
`118`	`118`	`st.markdown(`
`119`	`119`	`"For this demo, we are using the following models: \n"`
`120`		`- "- [OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct-GGUF)\n"`
	`120`	`+ "- [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n"`
`121`	`121`	`"- [OuteAI/OuteTTS-0.2-500M](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"`
`122`	`122`	`)`
`123`	`123`	`st.markdown(`
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ from document_to_podcast.inference.text_to_text import text_to_text, text_to_tex`
`106`	`106`
`107`	`107`	`# Load the model`
`108`	`108`	`model = load_llama_cpp_model(`
`109`		`- "allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"`
	`109`	`+ "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"`
`110`	`110`	`)`
`111`	`111`
`112`	`112`	`# Define your input and system prompt`