1717 #print("add path", str(Path(__file__).parent.parent))
1818 sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
1919
20- from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys , NamedObject # noqa: E402
20+ from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys # noqa: E402
2121
2222logger = logging .getLogger ("gguf-addfile" )
2323
@@ -49,10 +49,10 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
4949 if len (field .types ) == 1 :
5050 curr_type = field .types [0 ]
5151 if curr_type == GGUFValueType .STRING :
52- print ( ' = {0}' . format ( repr ( str ( bytes ( field .parts [ - 1 ]), encoding = 'utf8' )[: 60 ])), end = '' )
53- elif curr_type == GGUFValueType . NAMEDOBJECT :
54- print ( ' = {0}' . format ( repr ( str ( bytes ( field . parts [ 4 ]), encoding = 'utf8' )[: 60 ])), end = '' )
55- print (', {0}' . format ( int ( field . parts [ 5 ]))[: 20 ] , end = '' )
52+ if not field .name [ 0 ] == Keys . General . FILE_MARK :
53+ print ( ' = {0}' . format ( repr ( str ( bytes ( field . parts [ - 1 ]), encoding = 'utf8' )[: 60 ])), end = '' )
54+ else :
55+ print (' = binary data' , end = '' )
5656 elif field .types [0 ] in reader .gguf_scalar_to_np :
5757 print (' = {0}' .format (field .parts [- 1 ][0 ]), end = '' )
5858 print ()
@@ -88,16 +88,17 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
8888 continue
8989 itype = field .types [- 1 ]
9090 if itype == GGUFValueType .STRING :
91- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
92- elif itype == GGUFValueType .NAMEDOBJECT :
93- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
91+ if not field .name [0 ] == Keys .General .FILE_MARK :
92+ curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
93+ else :
94+ curr ["value" ] = [bytes (field .parts [idx ]) for idx in field .data ]
9495 else :
9596 curr ["value" ] = [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
9697 elif field .types [0 ] == GGUFValueType .STRING :
97- curr [ "value" ] = str ( bytes ( field .parts [ - 1 ]), encoding = "utf-8" )
98- elif field . types [ 0 ] == GGUFValueType . NAMEDOBJECT :
99- curr [ "value" ] = str ( bytes ( field . parts [ 4 ]), encoding = "utf-8" )
100- curr ["value" ] = int (field .parts [5 ])
98+ if not field .name [ 0 ] == Keys . General . FILE_MARK :
99+ curr [ "value" ] = str ( bytes ( field . parts [ - 1 ]), encoding = "utf-8" )
100+ else :
101+ curr ["value" ] = bytes (field .parts [- 1 ])
101102 else :
102103 curr ["value" ] = field .parts [- 1 ].tolist ()[0 ]
103104 if not args .no_tensors :
@@ -135,15 +136,17 @@ def decode_field(field: ReaderField) -> Any:
135136 sub_type = field .types [- 1 ]
136137
137138 if sub_type == GGUFValueType .STRING :
138- return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
139- elif sub_type == GGUFValueType .NAMEDOBJECT :
140- return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
139+ if not field .name [0 ] == Keys .General .FILE_MARK :
140+ return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
141+ else :
142+ return [bytes (field .parts [idx ]) for idx in field .data ]
141143 else :
142144 return [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
143145 if main_type == GGUFValueType .STRING :
144- return str (bytes (field .parts [- 1 ]), encoding = 'utf8' )
145- elif main_type == GGUFValueType .NAMEDOBJECT :
146- return str (bytes (field .parts [4 ]), encoding = 'utf8' )
146+ if not field .name [0 ] == Keys .General .FILE_MARK :
147+ return str (bytes (field .parts [- 1 ]), encoding = 'utf8' )
148+ else :
149+ return bytes (field .parts [- 1 ])
147150 else :
148151 return field .parts [- 1 ][0 ]
149152
@@ -156,7 +159,7 @@ def get_field_data(reader: GGUFReader, key: str) -> Any:
156159 return decode_field (field )
157160
158161
159- def copy_with_new_metadata (reader : gguf .GGUFReader , writer : gguf .GGUFWriter , new_metadata : Mapping [str , str ], array : NamedObject [ Any ] | None = None ) -> None :
162+ def copy_with_new_metadata (reader : gguf .GGUFReader , writer : gguf .GGUFWriter , new_metadata : Mapping [str , str ]) -> None :
160163 for field in reader .fields .values ():
161164 # Suppress virtual fields and fields written by GGUFWriter
162165 if field .name == Keys .General .ARCHITECTURE or field .name .startswith ('GGUF.' ):
@@ -186,18 +189,11 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
186189 writer .add_chat_template (new_metadata [Keys .Tokenizer .CHAT_TEMPLATE ])
187190 del new_metadata [Keys .Tokenizer .CHAT_TEMPLATE ]
188191
189- if array is None :
190- for key , name in new_metadata .items ():
191- logger .debug (f'Adding { key } : { name } ' )
192- # named object
193- with open (name , "rb" ) as f :
194- val = f .read ()
195- writer .add_namedobject (key , val , name )
196- else :
197- for key , name in new_metadata .items ():
198- logger .debug (f'Adding array { key } : { name } ' )
199- # named object
200- writer .add_namedobject (key , 'val' , name , array = array )
192+ for key , name in new_metadata .items ():
193+ logger .debug (f'Adding { key } : { name } ' )
194+ with open (name , "rb" ) as f :
195+ val = f .read ()
196+ writer .add_object (key , val )
201197
202198 for tensor in reader .tensors :
203199 # Dimensions are written in reverse order, so flip them first
@@ -219,7 +215,6 @@ def main() -> None:
219215 parser .add_argument ("input" , type = str , help = "GGUF format model input filename" )
220216 parser .add_argument ("output" , type = str , help = "GGUF format model output filename" )
221217 parser .add_argument ("addfiles" , type = str , nargs = '+' , help = "add filenames ..." )
222- parser .add_argument ("--array" , action = "store_true" , help = "add files to namedobject array" )
223218 parser .add_argument ("--no-tensors" , action = "store_true" , help = "Don't dump tensor metadata" )
224219 parser .add_argument ("--json" , action = "store_true" , help = "Produce JSON output" )
225220 parser .add_argument ("--json-array" , action = "store_true" , help = "Include full array values in JSON output (long)" )
@@ -242,27 +237,12 @@ def main() -> None:
242237
243238 logger .info (f'* Adding: { args .addfiles } ' )
244239 new_metadata = {}
245- count = 0
246- if args .array is False :
247- for path in args .addfiles :
248- count += 1
249- key = Keys .General .NAMEDOBJECT + Keys .General .CONNECT + str (count )
250- new_metadata [key ] = path
251- logger .info (f'* Adding: { key } = { path } ' )
252- copy_with_new_metadata (reader , writer , new_metadata )
253- else :
254- key = Keys .General .NAMEDOBJECT
255- # array is dummy
256- new_metadata [key ] = 'array'
257- files = []
258- for path in args .addfiles :
259- with open (path , "rb" ) as f :
260- val = f .read ()
261- #print(f'files[{count}] = {path}')
262- files .append (NamedObject (path , val ))
263- logger .info (f'* Adding: { key } [{ count } ] = { path } ' )
264- count += 1
265- copy_with_new_metadata (reader , writer , new_metadata , array = files )
240+ for path in args .addfiles :
241+ # add FILE_MARK to key
242+ key = Keys .General .FILE_MARK + path
243+ new_metadata [key ] = path
244+ logger .info (f'* Adding: { key } = { path } ' )
245+ copy_with_new_metadata (reader , writer , new_metadata )
266246
267247 if args .json :
268248 dump_metadata_json (reader , args )
0 commit comments