1414
1515# Necessary to load the local gguf package
1616if "NO_LOCAL_GGUF" not in os .environ and (Path (__file__ ).parent .parent .parent / 'gguf-py' ).exists ():
17- #print("add path", str(Path(__file__).parent.parent))
1817 sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
1918
2019from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys # noqa: E402
@@ -31,87 +30,6 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
3130 return (host_endian , file_endian )
3231
3332
34- # For more information about what field.parts and field.data represent,
35- # please see the comments in the modify_gguf.py example.
36- def dump_metadata (reader : GGUFReader , args : argparse .Namespace ) -> None :
37- host_endian , file_endian = get_file_host_endian (reader )
38- print (f'* File is { file_endian } endian, script is running on a { host_endian } endian host.' )
39- print (f'\n * Dumping { len (reader .fields )} key/value pair(s)' )
40- for n , field in enumerate (reader .fields .values (), 1 ):
41- if not field .types :
42- pretty_type = 'N/A'
43- elif field .types [0 ] == GGUFValueType .ARRAY :
44- nest_count = len (field .types ) - 1
45- pretty_type = '[' * nest_count + str (field .types [- 1 ].name ) + ']' * nest_count
46- else :
47- pretty_type = str (field .types [- 1 ].name )
48- print (f' { n :5} : { pretty_type :11} | { len (field .data ):8} | { field .name } ' , end = '' )
49- if len (field .types ) == 1 :
50- curr_type = field .types [0 ]
51- if curr_type == GGUFValueType .STRING :
52- if not field .name [0 ] == Keys .General .FILE_MARK :
53- print (' = {0}' .format (repr (str (bytes (field .parts [- 1 ]), encoding = 'utf8' )[:60 ])), end = '' )
54- else :
55- print (' = binary data' , end = '' )
56- elif field .types [0 ] in reader .gguf_scalar_to_np :
57- print (' = {0}' .format (field .parts [- 1 ][0 ]), end = '' )
58- print ()
59- if args .no_tensors :
60- return
61- print (f'\n * Dumping { len (reader .tensors )} tensor(s)' )
62- for n , tensor in enumerate (reader .tensors , 1 ):
63- prettydims = ', ' .join ('{0:5}' .format (d ) for d in list (tensor .shape ) + [1 ] * (4 - len (tensor .shape )))
64- print (f' { n :5} : { tensor .n_elements :10} | { prettydims } | { tensor .tensor_type .name :7} | { tensor .name } ' )
65-
66-
67- def dump_metadata_json (reader : GGUFReader , args : argparse .Namespace ) -> None :
68- import json
69- host_endian , file_endian = get_file_host_endian (reader )
70- metadata : dict [str , Any ] = {}
71- tensors : dict [str , Any ] = {}
72- result = {
73- "filename" : args .input ,
74- "endian" : file_endian ,
75- "metadata" : metadata ,
76- "tensors" : tensors ,
77- }
78- for idx , field in enumerate (reader .fields .values ()):
79- curr : dict [str , Any ] = {
80- "index" : idx ,
81- "type" : field .types [0 ].name if field .types else 'UNKNOWN' ,
82- "offset" : field .offset ,
83- }
84- metadata [field .name ] = curr
85- if field .types [:1 ] == [GGUFValueType .ARRAY ]:
86- curr ["array_types" ] = [t .name for t in field .types ][1 :]
87- if not args .json_array :
88- continue
89- itype = field .types [- 1 ]
90- if itype == GGUFValueType .STRING :
91- if not field .name [0 ] == Keys .General .FILE_MARK :
92- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
93- else :
94- curr ["value" ] = [bytes (field .parts [idx ]) for idx in field .data ]
95- else :
96- curr ["value" ] = [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
97- elif field .types [0 ] == GGUFValueType .STRING :
98- if not field .name [0 ] == Keys .General .FILE_MARK :
99- curr ["value" ] = str (bytes (field .parts [- 1 ]), encoding = "utf-8" )
100- else :
101- curr ["value" ] = bytes (field .parts [- 1 ])
102- else :
103- curr ["value" ] = field .parts [- 1 ].tolist ()[0 ]
104- if not args .no_tensors :
105- for idx , tensor in enumerate (reader .tensors ):
106- tensors [tensor .name ] = {
107- "index" : idx ,
108- "shape" : tensor .shape .tolist (),
109- "type" : tensor .tensor_type .name ,
110- "offset" : tensor .field .offset ,
111- }
112- json .dump (result , sys .stdout )
113-
114-
11533def get_byteorder (reader : GGUFReader ) -> GGUFEndian :
11634 if np .uint32 (1 ) == np .uint32 (1 ).newbyteorder ("<" ):
11735 # Host is little endian
@@ -215,9 +133,6 @@ def main() -> None:
215133 parser .add_argument ("input" , type = str , help = "GGUF format model input filename" )
216134 parser .add_argument ("output" , type = str , help = "GGUF format model output filename" )
217135 parser .add_argument ("addfiles" , type = str , nargs = '+' , help = "add filenames ..." )
218- parser .add_argument ("--no-tensors" , action = "store_true" , help = "Don't dump tensor metadata" )
219- parser .add_argument ("--json" , action = "store_true" , help = "Produce JSON output" )
220- parser .add_argument ("--json-array" , action = "store_true" , help = "Include full array values in JSON output (long)" )
221136 parser .add_argument ("--verbose" , action = "store_true" , help = "Increase output verbosity" )
222137 args = parser .parse_args (None if len (sys .argv ) > 1 else ["--help" ])
223138 logging .basicConfig (level = logging .DEBUG if args .verbose else logging .INFO )
@@ -244,15 +159,6 @@ def main() -> None:
244159 logger .info (f'* Adding: { key } = { path } ' )
245160 copy_with_new_metadata (reader , writer , new_metadata )
246161
247- if args .json :
248- dump_metadata_json (reader , args )
249- else :
250- dump_metadata (reader , args )
251-
252- logger .info (f'* Reading: { args .output } ' )
253- reader = GGUFReader (args .output , 'r' )
254- dump_metadata (reader , args )
255-
256162
257163if __name__ == '__main__' :
258164 main ()
0 commit comments