File tree Expand file tree Collapse file tree 3 files changed +24
-2
lines changed Expand file tree Collapse file tree 3 files changed +24
-2
lines changed Original file line number Diff line number Diff line change @@ -577,6 +577,12 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format cha
577577That will format the prompt according to how model expects it. You can find the prompt format in the model card.
578578For possible options, see [ llama_cpp/llama_chat_format.py] ( llama_cpp/llama_chat_format.py ) and look for lines starting with "@register_chat_format".
579579
580+ If you have ` huggingface-hub ` installed, you can also use the ` --hf_model_repo_id ` flag to load a model from the Hugging Face Hub.
581+
582+ ``` bash
583+ python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model ' *q8_0.gguf'
584+ ```
585+
580586### Web Server Features
581587
582588- [ Local Copilot replacement] ( https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion )
Original file line number Diff line number Diff line change @@ -120,9 +120,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
120120 kv_overrides [key ] = float (value )
121121 else :
122122 raise ValueError (f"Unknown value type { value_type } " )
123+
124+ import functools
123125
124- _model = llama_cpp .Llama (
125- model_path = settings .model ,
126+ kwargs = {}
127+
128+ if settings .hf_model_repo_id is not None :
129+ create_fn = functools .partial (llama_cpp .Llama .from_pretrained , repo_id = settings .hf_model_repo_id , filename = settings .model )
130+ else :
131+ create_fn = llama_cpp .Llama
132+ kwargs ["model_path" ] = settings .model
133+
134+
135+ _model = create_fn (
136+ ** kwargs ,
126137 # Model Params
127138 n_gpu_layers = settings .n_gpu_layers ,
128139 main_gpu = settings .main_gpu ,
Original file line number Diff line number Diff line change @@ -143,6 +143,11 @@ class ModelSettings(BaseSettings):
143143 default = None ,
144144 description = "The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained()." ,
145145 )
146+ # Loading from HuggingFace Model Hub
147+ hf_model_repo_id : Optional [str ] = Field (
148+ default = None ,
149+ description = "The model repo id to use for the HuggingFace tokenizer model." ,
150+ )
146151 # Speculative Decoding
147152 draft_model : Optional [str ] = Field (
148153 default = None ,
You can’t perform that action at this time.
0 commit comments