@@ -65,6 +65,7 @@ class Model:
6565 model_name : str | None
6666 metadata_override : Path | None
6767 dir_model_card : Path
68+ remote_hf_model_id : str | None
6869
6970 # subclasses should define this!
7071 model_arch : gguf .MODEL_ARCH
@@ -73,7 +74,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
7374 use_temp_file : bool = False , eager : bool = False ,
7475 metadata_override : Path | None = None , model_name : str | None = None ,
7576 split_max_tensors : int = 0 , split_max_size : int = 0 , dry_run : bool = False ,
76- small_first_shard : bool = False , hparams : dict [str , Any ] | None = None ):
77+ small_first_shard : bool = False , hparams : dict [str , Any ] | None = None , remote_hf_model_id : str | None = None ):
7778 if type (self ) is Model :
7879 raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
7980
@@ -83,11 +84,24 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
8384 self .is_big_endian = is_big_endian
8485 self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
8586 self .use_temp_file = use_temp_file
86- self .lazy = not eager
87- self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
88- self .is_safetensors = len (self .part_names ) > 0
89- if not self .is_safetensors :
90- self .part_names = Model .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
87+ self .lazy = not eager or (remote_hf_model_id is not None )
88+ self .remote_hf_model_id = remote_hf_model_id
89+ if remote_hf_model_id is not None :
90+ self .is_safetensors = True
91+
92+ def get_remote_tensors () -> Iterator [tuple [str , Tensor ]]:
93+ logger .info (f"Using remote model with HuggingFace id: { remote_hf_model_id } " )
94+ remote_tensors = gguf .utility .SafetensorRemote .get_list_tensors_hf_model (remote_hf_model_id )
95+ self .tensor_names = set (name for name in remote_tensors .keys ())
96+ for name , remote_tensor in gguf .utility .SafetensorRemote .get_list_tensors_hf_model (remote_hf_model_id ).items ():
97+ yield (name , LazyTorchTensor .from_remote_tensor (remote_tensor ))
98+
99+ self .get_tensors = get_remote_tensors
100+ else :
101+ self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
102+ self .is_safetensors = len (self .part_names ) > 0
103+ if not self .is_safetensors :
104+ self .part_names = Model .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
91105 self .hparams = Model .load_hparams (self .dir_model ) if hparams is None else hparams
92106 self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" ])
93107 self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
@@ -393,6 +407,10 @@ def prepare_metadata(self, vocab_only: bool):
393407
394408 self .metadata = gguf .Metadata .load (self .metadata_override , self .dir_model_card , self .model_name , total_params )
395409
410+ # If we are using HF model id, set the metadata name to the model id
411+ if self .remote_hf_model_id :
412+ self .metadata .name = self .remote_hf_model_id
413+
396414 # Fallback to model directory name if metadata name is still missing
397415 if self .metadata .name is None :
398416 self .metadata .name = self .dir_model .name
@@ -5420,6 +5438,14 @@ def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
54205438 lazy = cls (meta = cls .meta_with_dtype_and_shape (dtype , shape ), args = (st_slice ,), func = lambda s : s [:])
54215439 return cast (torch .Tensor , lazy )
54225440
5441+ @classmethod
5442+ def from_remote_tensor (cls , remote_tensor : gguf .utility .RemoteTensor ):
5443+ dtype = cls ._dtype_str_map [remote_tensor .dtype ]
5444+ shape = remote_tensor .shape
5445+ meta = cls .meta_with_dtype_and_shape (dtype , shape )
5446+ lazy = cls (meta = meta , args = (remote_tensor ,), func = lambda r : torch .frombuffer (r .data (), dtype = dtype ).reshape (shape ))
5447+ return cast (torch .Tensor , lazy )
5448+
54235449 @classmethod
54245450 def __torch_function__ (cls , func , types , args = (), kwargs = None ):
54255451 del types # unused
@@ -5497,6 +5523,10 @@ def parse_args() -> argparse.Namespace:
54975523 "--print-supported-models" , action = "store_true" ,
54985524 help = "Print the supported models"
54995525 )
5526+ parser .add_argument (
5527+ "--remote" , action = "store_true" ,
5528+ help = "(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token." ,
5529+ )
55005530
55015531 args = parser .parse_args ()
55025532 if not args .print_supported_models and args .model is None :
@@ -5537,6 +5567,14 @@ def main() -> None:
55375567
55385568 dir_model = args .model
55395569
5570+ if args .remote :
5571+ from huggingface_hub import snapshot_download
5572+ local_dir = snapshot_download (
5573+ repo_id = str (dir_model ),
5574+ allow_patterns = ["LICENSE" , "*.json" , "*.md" , "*.txt" , "tokenizer.model" ])
5575+ dir_model = Path (local_dir )
5576+ logger .info (f"Downloaded config and tokenizer to { local_dir } " )
5577+
55405578 if not dir_model .is_dir ():
55415579 logger .error (f'Error: { args .model } is not a directory' )
55425580 sys .exit (1 )
@@ -5558,6 +5596,9 @@ def main() -> None:
55585596
55595597 if args .outfile is not None :
55605598 fname_out = args .outfile
5599+ elif args .remote :
5600+ # if remote, use the model ID as the output file name
5601+ fname_out = Path ("./" + str (args .model ).replace ("/" , "-" ) + "-{ftype}.gguf" )
55615602 else :
55625603 fname_out = dir_model
55635604
@@ -5580,7 +5621,8 @@ def main() -> None:
55805621 metadata_override = args .metadata , model_name = args .model_name ,
55815622 split_max_tensors = args .split_max_tensors ,
55825623 split_max_size = split_str_to_n_bytes (args .split_max_size ), dry_run = args .dry_run ,
5583- small_first_shard = args .no_tensor_first_split )
5624+ small_first_shard = args .no_tensor_first_split ,
5625+ remote_hf_model_id = str (args .model ) if args .remote else None )
55845626
55855627 if args .vocab_only :
55865628 logger .info ("Exporting model vocab..." )
0 commit comments