|
| 1 | +import time |
1 | 2 | from typing import List, cast
|
2 | 3 | from llama_cpp import CreateCompletionResponse, Llama
|
3 | 4 | from open_codex.interfaces.llm_agent import LLMAgent
|
4 | 5 | import contextlib
|
5 | 6 | import os
|
6 |
| - |
| 7 | +from huggingface_hub import hf_hub_download # type: ignore |
7 | 8 |
|
8 | 9 | class AgentPhi4Mini(LLMAgent):
|
9 | 10 | def __init__(self, system_prompt: str):
|
| 11 | + model_filename = "Phi-4-mini-instruct-Q3_K_L.gguf" |
| 12 | + repo_id = "lmstudio-community/Phi-4-mini-instruct-GGUF" |
| 13 | + |
| 14 | + print("\n⏬ Downloading model from Hugging Face...") |
| 15 | + start = time.time() |
| 16 | + model_path:str = hf_hub_download( |
| 17 | + repo_id=repo_id, |
| 18 | + filename=model_filename, |
| 19 | + local_dir=os.path.expanduser("~/.cache/open-codex"), |
| 20 | + local_dir_use_symlinks=False, |
| 21 | + resume_download=True |
| 22 | + ) |
| 23 | + end = time.time() |
| 24 | + print(f"✅ Model downloaded in {end - start:.2f}s\n") |
| 25 | + |
10 | 26 | # suppress the stderr output from llama_cpp
|
11 | 27 | # this is a workaround for the llama_cpp library
|
12 | 28 | # which prints a lot of warnings and errors to stderr
|
13 | 29 | # when loading the model
|
14 | 30 | # this is a temporary solution until the library is fixed
|
15 | 31 | with AgentPhi4Mini.suppress_native_stderr():
|
16 |
| - self.llm: Llama = Llama.from_pretrained( # type: ignore |
17 |
| - repo_id="lmstudio-community/Phi-4-mini-instruct-GGUF", |
18 |
| - filename="Phi-4-mini-instruct-Q3_K_L.gguf", |
19 |
| - additional_files=[], |
20 |
| - ) |
| 32 | + self.llm: Llama = Llama(model_path=model_path) # type: ignore |
| 33 | + |
21 | 34 | self.system_prompt = system_prompt
|
22 | 35 |
|
| 36 | + |
23 | 37 | def one_shot_mode(self, user_input: str) -> str:
|
24 | 38 | chat_history = [{"role": "system", "content": self.system_prompt}]
|
25 | 39 | chat_history.append({"role": "user", "content": user_input})
|
|
0 commit comments