Skip to content

Commit 73a168b

Browse files
author
katsu560
committed
remove NAMEDOBJECT, use key and STRING value
1 parent 33cf5b3 commit 73a168b

File tree

1 file changed

+33
-53
lines changed

1 file changed

+33
-53
lines changed

examples/yolo/gguf-addfile.py

Lines changed: 33 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#print("add path", str(Path(__file__).parent.parent))
1818
sys.path.insert(0, str(Path(__file__).parent.parent))
1919

20-
from gguf import GGUFReader, GGUFWriter, ReaderField, GGUFEndian, GGUFValueType, Keys, NamedObject # noqa: E402
20+
from gguf import GGUFReader, GGUFWriter, ReaderField, GGUFEndian, GGUFValueType, Keys # noqa: E402
2121

2222
logger = logging.getLogger("gguf-addfile")
2323

@@ -49,10 +49,10 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
4949
if len(field.types) == 1:
5050
curr_type = field.types[0]
5151
if curr_type == GGUFValueType.STRING:
52-
print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '')
53-
elif curr_type == GGUFValueType.NAMEDOBJECT:
54-
print(' = {0}'.format(repr(str(bytes(field.parts[4]), encoding='utf8')[:60])), end = '')
55-
print(', {0}'.format(int(field.parts[5]))[:20], end = '')
52+
if not field.name[0] == Keys.General.FILE_MARK:
53+
print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '')
54+
else:
55+
print(' = binary data', end = '')
5656
elif field.types[0] in reader.gguf_scalar_to_np:
5757
print(' = {0}'.format(field.parts[-1][0]), end = '')
5858
print()
@@ -88,16 +88,17 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
8888
continue
8989
itype = field.types[-1]
9090
if itype == GGUFValueType.STRING:
91-
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
92-
elif itype == GGUFValueType.NAMEDOBJECT:
93-
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
91+
if not field.name[0] == Keys.General.FILE_MARK:
92+
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
93+
else:
94+
curr["value"] = [bytes(field.parts[idx]) for idx in field.data]
9495
else:
9596
curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
9697
elif field.types[0] == GGUFValueType.STRING:
97-
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
98-
elif field.types[0] == GGUFValueType.NAMEDOBJECT:
99-
curr["value"] = str(bytes(field.parts[4]), encoding="utf-8")
100-
curr["value"] = int(field.parts[5])
98+
if not field.name[0] == Keys.General.FILE_MARK:
99+
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
100+
else:
101+
curr["value"] = bytes(field.parts[-1])
101102
else:
102103
curr["value"] = field.parts[-1].tolist()[0]
103104
if not args.no_tensors:
@@ -135,15 +136,17 @@ def decode_field(field: ReaderField) -> Any:
135136
sub_type = field.types[-1]
136137

137138
if sub_type == GGUFValueType.STRING:
138-
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
139-
elif sub_type == GGUFValueType.NAMEDOBJECT:
140-
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
139+
if not field.name[0] == Keys.General.FILE_MARK:
140+
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
141+
else:
142+
return [bytes(field.parts[idx]) for idx in field.data]
141143
else:
142144
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
143145
if main_type == GGUFValueType.STRING:
144-
return str(bytes(field.parts[-1]), encoding='utf8')
145-
elif main_type == GGUFValueType.NAMEDOBJECT:
146-
return str(bytes(field.parts[4]), encoding='utf8')
146+
if not field.name[0] == Keys.General.FILE_MARK:
147+
return str(bytes(field.parts[-1]), encoding='utf8')
148+
else:
149+
return bytes(field.parts[-1])
147150
else:
148151
return field.parts[-1][0]
149152

@@ -156,7 +159,7 @@ def get_field_data(reader: GGUFReader, key: str) -> Any:
156159
return decode_field(field)
157160

158161

159-
def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], array: NamedObject[Any] | None = None) -> None:
162+
def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str]) -> None:
160163
for field in reader.fields.values():
161164
# Suppress virtual fields and fields written by GGUFWriter
162165
if field.name == Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
@@ -186,18 +189,11 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
186189
writer.add_chat_template(new_metadata[Keys.Tokenizer.CHAT_TEMPLATE])
187190
del new_metadata[Keys.Tokenizer.CHAT_TEMPLATE]
188191

189-
if array is None:
190-
for key, name in new_metadata.items():
191-
logger.debug(f'Adding {key}: {name}')
192-
# named object
193-
with open(name, "rb") as f:
194-
val = f.read()
195-
writer.add_namedobject(key, val, name)
196-
else:
197-
for key, name in new_metadata.items():
198-
logger.debug(f'Adding array {key}: {name}')
199-
# named object
200-
writer.add_namedobject(key, 'val', name, array=array)
192+
for key, name in new_metadata.items():
193+
logger.debug(f'Adding {key}: {name}')
194+
with open(name, "rb") as f:
195+
val = f.read()
196+
writer.add_object(key, val)
201197

202198
for tensor in reader.tensors:
203199
# Dimensions are written in reverse order, so flip them first
@@ -219,7 +215,6 @@ def main() -> None:
219215
parser.add_argument("input", type=str, help="GGUF format model input filename")
220216
parser.add_argument("output", type=str, help="GGUF format model output filename")
221217
parser.add_argument("addfiles", type=str, nargs='+', help="add filenames ...")
222-
parser.add_argument("--array", action="store_true", help="add files to namedobject array")
223218
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
224219
parser.add_argument("--json", action="store_true", help="Produce JSON output")
225220
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
@@ -242,27 +237,12 @@ def main() -> None:
242237

243238
logger.info(f'* Adding: {args.addfiles}')
244239
new_metadata = {}
245-
count = 0
246-
if args.array is False:
247-
for path in args.addfiles:
248-
count += 1
249-
key = Keys.General.NAMEDOBJECT + Keys.General.CONNECT + str(count)
250-
new_metadata[key] = path
251-
logger.info(f'* Adding: {key} = {path}')
252-
copy_with_new_metadata(reader, writer, new_metadata)
253-
else:
254-
key = Keys.General.NAMEDOBJECT
255-
# array is dummy
256-
new_metadata[key] = 'array'
257-
files = []
258-
for path in args.addfiles:
259-
with open(path, "rb") as f:
260-
val = f.read()
261-
#print(f'files[{count}] = {path}')
262-
files.append(NamedObject(path, val))
263-
logger.info(f'* Adding: {key}[{count}] = {path}')
264-
count += 1
265-
copy_with_new_metadata(reader, writer, new_metadata, array=files)
240+
for path in args.addfiles:
241+
# add FILE_MARK to key
242+
key = Keys.General.FILE_MARK + path
243+
new_metadata[key] = path
244+
logger.info(f'* Adding: {key} = {path}')
245+
copy_with_new_metadata(reader, writer, new_metadata)
266246

267247
if args.json:
268248
dump_metadata_json(reader, args)

0 commit comments

Comments
 (0)