Fix pipy build

vietanhdev · vietanhdev · commit d06708e16f76 · 2025-12-06T17:15:21.000+07:00
diff --git a/llama_assistant/model_handler.py b/llama_assistant/model_handler.py
@@ -9,6 +9,8 @@
     Llava15ChatHandler,
     Llava16ChatHandler,
 )
+from huggingface_hub import hf_hub_download
+from tqdm import tqdm
 
 from llama_assistant import config
 from llama_assistant.agent import RAGAgent
@@ -86,54 +88,88 @@ def load_agent(
         if model.is_online():
             if model.model_type == "text" or model.model_type == "text-reasoning":
                 print("load online model")
-                loaded_model = Llama.from_pretrained(
+                # Download with progress bar
+                model_path = hf_hub_download(
                     repo_id=model.repo_id,
                     filename=model.filename,
+                    resume_download=True,
+                    tqdm_class=tqdm,
+                )
+                loaded_model = Llama(
+                    model_path=model_path,
                     n_gpu_layers=-1,
                     n_ctx=generation_setting["context_len"],
                 )
             elif model.model_type == "image":
                 if "moondream2" in model.model_id:
+                    print("Downloading vision model projector...")
                     chat_handler = MoondreamChatHandler.from_pretrained(
                         repo_id="vikhyatk/moondream2",
                         filename="*mmproj*",
                     )
-                    loaded_model = Llama.from_pretrained(
+                    print("Downloading main model...")
+                    model_path = hf_hub_download(
                         repo_id=model.repo_id,
                         filename=model.filename,
+                        resume_download=True,
+                        tqdm_class=tqdm,
+                    )
+                    loaded_model = Llama(
+                        model_path=model_path,
                         chat_handler=chat_handler,
                         n_ctx=generation_setting["context_len"],
                     )
                 elif "MiniCPM" in model.model_id:
+                    print("Downloading vision model projector...")
                     chat_handler = MiniCPMv26ChatHandler.from_pretrained(
                         repo_id=model.repo_id,
                         filename="*mmproj*",
                     )
-                    loaded_model = Llama.from_pretrained(
+                    print("Downloading main model...")
+                    model_path = hf_hub_download(
                         repo_id=model.repo_id,
                         filename=model.filename,
+                        resume_download=True,
+                        tqdm_class=tqdm,
+                    )
+                    loaded_model = Llama(
+                        model_path=model_path,
                         chat_handler=chat_handler,
                         n_ctx=generation_setting["context_len"],
                     )
                 elif "llava-v1.5" in model.model_id:
+                    print("Downloading vision model projector...")
                     chat_handler = Llava15ChatHandler.from_pretrained(
                         repo_id=model.repo_id,
                         filename="*mmproj*",
                     )
-                    loaded_model = Llama.from_pretrained(
+                    print("Downloading main model...")
+                    model_path = hf_hub_download(
                         repo_id=model.repo_id,
                         filename=model.filename,
+                        resume_download=True,
+                        tqdm_class=tqdm,
+                    )
+                    loaded_model = Llama(
+                        model_path=model_path,
                         chat_handler=chat_handler,
                         n_ctx=generation_setting["context_len"],
                     )
                 elif "llava-v1.6" in model.model_id:
+                    print("Downloading vision model projector...")
                     chat_handler = Llava16ChatHandler.from_pretrained(
                         repo_id=model.repo_id,
                         filename="*mmproj*",
                     )
-                    loaded_model = Llama.from_pretrained(
+                    print("Downloading main model...")
+                    model_path = hf_hub_download(
                         repo_id=model.repo_id,
                         filename=model.filename,
+                        resume_download=True,
+                        tqdm_class=tqdm,
+                    )
+                    loaded_model = Llama(
+                        model_path=model_path,
                         chat_handler=chat_handler,
                         n_ctx=generation_setting["context_len"],
                     )
diff --git a/llama_assistant/speech_recognition_thread.py b/llama_assistant/speech_recognition_thread.py
@@ -7,7 +7,7 @@
 import pyaudio
 import wave
 
-from whispercpp import Whisper
+from whispercpp_kit import WhisperCPP
 from llama_assistant.config import llama_assistant_dir
 
 
@@ -22,8 +22,9 @@ def __init__(self):
         self.stop_listening = False
         self.recording = False
 
-        # Initialize Whisper model
-        self.whisper = Whisper("tiny")
+        # Initialize Whisper model with progress
+        print("Initializing Whisper model (downloading if needed)...")
+        self.whisper = WhisperCPP(model_name="tiny")
 
         # Create temporary folder for audio files
         self.tmp_audio_folder = llama_assistant_dir / "tmp_audio"
@@ -74,13 +75,10 @@ def run(self):
             wf.close()
 
             # Transcribe audio
-            res = self.whisper.transcribe(str(tmp_filepath))
-            transcription = self.whisper.extract_text(res)
-
-            if isinstance(transcription, list):
-                # Remove all "[BLANK_AUDIO]" from the transcription
-                transcription = " ".join(transcription)
-                transcription = re.sub(r"\[BLANK_AUDIO\]", "", transcription)
+            transcription = self.whisper.transcribe(str(tmp_filepath))
+            
+            # Remove all "[BLANK_AUDIO]" from the transcription
+            transcription = re.sub(r"\[BLANK_AUDIO\]", "", transcription)
 
             if transcription.strip():  # Only emit if there's non-empty transcription
                 self.finished.emit(transcription)
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llama-assistant"
-version = "0.1.43"
+version = "0.1.44"
 authors = [
     {name = "Viet-Anh Nguyen", email = "vietanh.dev@gmail.com"},
 ]
@@ -42,7 +42,7 @@ dependencies = [
     "mistune==3.0.2",
     "paddlepaddle==2.6.2",
     "paddleocr==2.9.1",
-    "whispercpp @ git+https://github.com/stlukey/whispercpp.py"
+    "whispercpp-kit>=0.1.0",
 ]
 dynamic = []
 
diff --git a/requirements.txt b/requirements.txt
@@ -16,4 +16,4 @@ docx2txt==0.8
 mistune==3.0.2
 paddlepaddle==2.6.2
 paddleocr==3.3.2
-whispercpp @ git+https://github.com/stlukey/whispercpp.py
+whispercpp-kit>=0.1.0