Use bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf as default model

daavoo · daavoo · commit 3334a0b4b525 · 2025-02-10T10:50:09.000+01:00
diff --git a/demo/app.py b/demo/app.py
@@ -23,7 +23,7 @@
 @st.cache_resource
 def load_text_to_text_model():
     return load_llama_cpp_model(
-        model_id="bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
+        model_id="bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"
     )
 
 
diff --git a/demo/notebook.ipynb b/demo/notebook.ipynb
@@ -205,7 +205,7 @@
     ")\n",
     "\n",
     "text_model = load_llama_cpp_model(\n",
-    "    \"bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf\"\n",
+    "    \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n",
     ")\n",
     "speech_model = load_tts_model(\"hexgrad/kLegacy/v0.19/kokoro-v0_19.pth\")"
    ]
diff --git a/docs/step-by-step-guide.md b/docs/step-by-step-guide.md
@@ -106,7 +106,7 @@ from document_to_podcast.inference.text_to_text import text_to_text, text_to_tex
 
 # Load the model
 model = load_llama_cpp_model(
-    "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
+    "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"
 )
 
 # Define your input and system prompt
diff --git a/example_data/config.yaml b/example_data/config.yaml
@@ -1,6 +1,6 @@
 input_file: "example_data/introducing-mozilla-ai-investing-in-trustworthy-ai.html"
 output_folder: "example_data/"
-text_to_text_model: "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
+text_to_text_model: "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"
 text_to_speech_model: "OuteAI/OuteTTS-0.1-350M-GGUF/OuteTTS-0.1-350M-FP16.gguf"
 text_to_text_prompt: |
   You are a podcast scriptwriter generating engaging and natural-sounding conversations in JSON format. 
diff --git a/src/document_to_podcast/cli.py b/src/document_to_podcast/cli.py
@@ -26,7 +26,7 @@
 def document_to_podcast(
     input_file: str | None = None,
     output_folder: str | None = None,
-    text_to_text_model: str = "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf",
+    text_to_text_model: str = "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf",
     text_to_text_prompt: str = DEFAULT_PROMPT,
     text_to_speech_model: str = "OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf",
     speakers: list[Speaker] | None = None,
@@ -58,7 +58,7 @@ def document_to_podcast(
 
             Need to be a gguf file.
 
-            Defaults to `bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf`.
+            Defaults to `bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf`.
 
         text_to_text_prompt (str, optional): The prompt for the text-to-text model.
             Defaults to DEFAULT_PROMPT.
diff --git a/src/document_to_podcast/inference/model_loaders.py b/src/document_to_podcast/inference/model_loaders.py
@@ -10,7 +10,7 @@ def load_llama_cpp_model(model_id: str) -> Llama:
     Loads the given model_id using Llama.from_pretrained.
 
     Examples:
-        >>> model = load_llama_cpp_model("bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf")
+        >>> model = load_llama_cpp_model("bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf")
 
     Args:
         model_id (str): The model id to load.

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`	`@st.cache_resource`
`24`	`24`	`def load_text_to_text_model():`
`25`	`25`	`return load_llama_cpp_model(`
`26`		`- model_id="bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"`
	`26`	`+ model_id="bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"`
`27`	`27`	`)`
`28`	`28`
`29`	`29`
Original file line number	Diff line number	Diff line change
`@@ -205,7 +205,7 @@`
`205`	`205`	`")\n",`
`206`	`206`	`"\n",`
`207`	`207`	`"text_model = load_llama_cpp_model(\n",`
`208`		`- " \"bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf\"\n",`
	`208`	`+ " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n",`
`209`	`209`	`")\n",`
`210`	`210`	`"speech_model = load_tts_model(\"hexgrad/kLegacy/v0.19/kokoro-v0_19.pth\")"`
`211`	`211`	`]`
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ from document_to_podcast.inference.text_to_text import text_to_text, text_to_tex`
`106`	`106`
`107`	`107`	`# Load the model`
`108`	`108`	`model = load_llama_cpp_model(`
`109`		`- "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"`
	`109`	`+ "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"`
`110`	`110`	`)`
`111`	`111`
`112`	`112`	`# Define your input and system prompt`