@@ -65,7 +65,8 @@ class Model:
65
65
# subclasses should define this!
66
66
model_arch : gguf .MODEL_ARCH
67
67
68
- def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool , model_name : str | None ):
68
+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool ,
69
+ model_name : str | None , split_max_tensors : int = 0 , split_max_size : int = 0 , dry_run : bool = False , small_first_shard : bool = False ):
69
70
if type (self ) is Model :
70
71
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
71
72
self .dir_model = dir_model
@@ -96,7 +97,8 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
96
97
ftype_lw : str = ftype_up .lower ()
97
98
# allow templating the file name with the output ftype, useful with the "auto" ftype
98
99
self .fname_out = fname_out .parent / fname_out .name .format (ftype_lw , outtype = ftype_lw , ftype = ftype_lw , OUTTYPE = ftype_up , FTYPE = ftype_up )
99
- self .gguf_writer = gguf .GGUFWriter (path = None , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
100
+ self .gguf_writer = gguf .GGUFWriter (path = None , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file ,
101
+ split_max_tensors = split_max_tensors , split_max_size = split_max_size , dry_run = dry_run , small_first_shard = small_first_shard )
100
102
101
103
@classmethod
102
104
def __init_subclass__ (cls ):
@@ -332,6 +334,8 @@ def write(self):
332
334
self .gguf_writer .close ()
333
335
334
336
def write_vocab (self ):
337
+ if len (self .gguf_writer .tensors ) != 1 :
338
+ raise ValueError ('Splitting the vocabulary is not supported' )
335
339
self .gguf_writer .write_header_to_file (self .fname_out )
336
340
self .gguf_writer .write_kv_data_to_file ()
337
341
self .gguf_writer .close ()
@@ -2971,10 +2975,44 @@ def parse_args() -> argparse.Namespace:
2971
2975
"--verbose" , action = "store_true" ,
2972
2976
help = "increase output verbosity" ,
2973
2977
)
2978
+ parser .add_argument (
2979
+ "--split-max-tensors" , type = int , default = 0 ,
2980
+ help = "max tensors in each split" ,
2981
+ )
2982
+ parser .add_argument (
2983
+ "--split-max-size" , type = str , default = "0" ,
2984
+ help = "max size per split N(M|G)" ,
2985
+ )
2986
+ parser .add_argument (
2987
+ "--dry-run" , action = "store_true" ,
2988
+ help = "only print out a split plan and exit, without writing any new files" ,
2989
+ )
2990
+ parser .add_argument (
2991
+ "--no-tensor-first-split" , action = "store_true" ,
2992
+ help = "do not add tensors to the first split (disabled by default)"
2993
+ )
2974
2994
2975
2995
return parser .parse_args ()
2976
2996
2977
2997
2998
+ def split_str_to_n_bytes (split_str : str ) -> int :
2999
+ if split_str .endswith ("K" ):
3000
+ n = int (split_str [:- 1 ]) * 1000
3001
+ elif split_str .endswith ("M" ):
3002
+ n = int (split_str [:- 1 ]) * 1000 * 1000
3003
+ elif split_str .endswith ("G" ):
3004
+ n = int (split_str [:- 1 ]) * 1000 * 1000 * 1000
3005
+ elif split_str .isnumeric ():
3006
+ n = int (split_str )
3007
+ else :
3008
+ raise ValueError (f"Invalid split size: { split_str } , must be a number, optionally followed by K, M, or G" )
3009
+
3010
+ if n < 0 :
3011
+ raise ValueError (f"Invalid split size: { split_str } , must be positive" )
3012
+
3013
+ return n
3014
+
3015
+
2978
3016
def main () -> None :
2979
3017
args = parse_args ()
2980
3018
@@ -3007,6 +3045,10 @@ def main() -> None:
3007
3045
"auto" : gguf .LlamaFileType .GUESSED ,
3008
3046
}
3009
3047
3048
+ if args .use_temp_file and (args .split_max_tensors > 0 or args .split_max_size != "0" ):
3049
+ logger .error ("Error: Cannot use temp file when splitting" )
3050
+ sys .exit (1 )
3051
+
3010
3052
if args .outfile is not None :
3011
3053
fname_out = args .outfile
3012
3054
else :
@@ -3024,7 +3066,10 @@ def main() -> None:
3024
3066
logger .error (f"Model { hparams ['architectures' ][0 ]} is not supported" )
3025
3067
sys .exit (1 )
3026
3068
3027
- model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy , args .model_name )
3069
+ model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file ,
3070
+ args .no_lazy , args .model_name , split_max_tensors = args .split_max_tensors ,
3071
+ split_max_size = split_str_to_n_bytes (args .split_max_size ), dry_run = args .dry_run ,
3072
+ small_first_shard = args .no_tensor_first_split )
3028
3073
3029
3074
logger .info ("Set model parameters" )
3030
3075
model_instance .set_gguf_parameters ()
@@ -3035,13 +3080,13 @@ def main() -> None:
3035
3080
model_instance .gguf_writer .add_quantization_version (gguf .GGML_QUANT_VERSION )
3036
3081
3037
3082
if args .vocab_only :
3038
- logger .info (f "Exporting model vocab to ' { model_instance . fname_out } ' " )
3083
+ logger .info ("Exporting model vocab... " )
3039
3084
model_instance .write_vocab ()
3085
+ logger .info ("Model vocab successfully exported." )
3040
3086
else :
3041
- logger .info (f "Exporting model to ' { model_instance . fname_out } ' " )
3087
+ logger .info ("Exporting model... " )
3042
3088
model_instance .write ()
3043
-
3044
- logger .info (f"Model successfully exported to '{ model_instance .fname_out } '" )
3089
+ logger .info ("Model successfully exported." )
3045
3090
3046
3091
3047
3092
if __name__ == '__main__' :
0 commit comments