Skip to content

Commit 9d6d384

Browse files
committed
displaying model download duration
1 parent f5a2537 commit 9d6d384

File tree

1 file changed

+20
-6
lines changed

1 file changed

+20
-6
lines changed

src/open_codex/agents/phi_4_mini.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,39 @@
1+
import time
12
from typing import List, cast
23
from llama_cpp import CreateCompletionResponse, Llama
34
from open_codex.interfaces.llm_agent import LLMAgent
45
import contextlib
56
import os
6-
7+
from huggingface_hub import hf_hub_download # type: ignore
78

89
class AgentPhi4Mini(LLMAgent):
910
def __init__(self, system_prompt: str):
11+
model_filename = "Phi-4-mini-instruct-Q3_K_L.gguf"
12+
repo_id = "lmstudio-community/Phi-4-mini-instruct-GGUF"
13+
14+
print("\n⏬ Downloading model from Hugging Face...")
15+
start = time.time()
16+
model_path:str = hf_hub_download(
17+
repo_id=repo_id,
18+
filename=model_filename,
19+
local_dir=os.path.expanduser("~/.cache/open-codex"),
20+
local_dir_use_symlinks=False,
21+
resume_download=True
22+
)
23+
end = time.time()
24+
print(f"✅ Model downloaded in {end - start:.2f}s\n")
25+
1026
# suppress the stderr output from llama_cpp
1127
# this is a workaround for the llama_cpp library
1228
# which prints a lot of warnings and errors to stderr
1329
# when loading the model
1430
# this is a temporary solution until the library is fixed
1531
with AgentPhi4Mini.suppress_native_stderr():
16-
self.llm: Llama = Llama.from_pretrained( # type: ignore
17-
repo_id="lmstudio-community/Phi-4-mini-instruct-GGUF",
18-
filename="Phi-4-mini-instruct-Q3_K_L.gguf",
19-
additional_files=[],
20-
)
32+
self.llm: Llama = Llama(model_path=model_path) # type: ignore
33+
2134
self.system_prompt = system_prompt
2235

36+
2337
def one_shot_mode(self, user_input: str) -> str:
2438
chat_history = [{"role": "system", "content": self.system_prompt}]
2539
chat_history.append({"role": "user", "content": user_input})

0 commit comments

Comments
 (0)