Skip to content

Commit 289e8c3

Browse files
committed
also handle positional instances
1 parent a517997 commit 289e8c3

File tree

2 files changed

+188
-27
lines changed

2 files changed

+188
-27
lines changed

serialize_py/codegen_result.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,8 @@
77
def get_root(_io=None, check=True):
88
if not _io:
99
_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))
10-
root = kaitaistruct_sqlite3.Sqlite3(_io)
11-
# try to fix root._write
12-
# https://github.com/kaitai-io/kaitai_struct/issues/1245
13-
root.pages__to_write = False
14-
root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(root._io, root, root._root)
10+
root = kaitaistruct_sqlite3.Sqlite3(_io=_io, _parent=None, _root=None)
11+
root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(_io=root._io, _parent=root, _root=root._root)
1512
def init_header(header):
1613
header.magic = b'SQLite format 3\x00'
1714
header.page_size_raw = 4096 # 0x1000
@@ -23,9 +20,10 @@ def init_header(header):
2320
header.leaf_payload_fraction = 32 # 0x20
2421
header.file_change_counter = 1
2522
header.num_pages = 2
26-
header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(root._io, header, header._root)
23+
header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(_io=root._io, _parent=header, _root=header._root)
2724
def init_first_freelist_trunk_page(first_freelist_trunk_page):
2825
first_freelist_trunk_page.page_number = 0
26+
first_freelist_trunk_page.page = None
2927
init_first_freelist_trunk_page(header.first_freelist_trunk_page)
3028
header.num_freelist_pages = 0
3129
header.schema_cookie = 1
@@ -40,6 +38,33 @@ def init_first_freelist_trunk_page(first_freelist_trunk_page):
4038
header.version_valid_for = 1
4139
header.sqlite_version_number = 3050001 # 0x2e8a11
4240
init_header(root.header)
41+
root.pages = []
42+
root.pages.append(kaitaistruct_sqlite3.Sqlite3.BtreePage(page_number=1, _io=root._io, _parent=root, _root=root._root))
43+
def init_page(page):
44+
page.page_type = kaitaistruct_sqlite3.Sqlite3.BtreePageType.table_leaf_page # 13 = 0xd
45+
page.first_freeblock = 0
46+
page.num_cells = 1
47+
page.ofs_cell_content_area_raw = 4044 # 0xfcc
48+
page.num_frag_free_bytes = 0
49+
page.cell_pointers = []
50+
page.cell_pointers.append(kaitaistruct_sqlite3.Sqlite3.CellPointer(_io=root._io, _parent=page, _root=page._root))
51+
def init_cell_pointer(cell_pointer):
52+
cell_pointer.ofs_content = 4044 # 0xfcc
53+
init_cell_pointer(page.cell_pointers[0])
54+
page.cell_content_area = b'2\x01\x06\x17\x15\x15\x01Itabletesttest\x02CREATE TABLE test (id INTEGER)'
55+
page.reserved_space = None
56+
init_page(root.pages[0])
57+
root.pages.append(kaitaistruct_sqlite3.Sqlite3.BtreePage(page_number=2, _io=root._io, _parent=root, _root=root._root))
58+
def init_page(page):
59+
page.page_type = kaitaistruct_sqlite3.Sqlite3.BtreePageType.table_leaf_page # 13 = 0xd
60+
page.first_freeblock = 0
61+
page.num_cells = 0
62+
page.ofs_cell_content_area_raw = 4096 # 0x1000
63+
page.num_frag_free_bytes = 0
64+
page.cell_pointers = []
65+
page.cell_content_area = b''
66+
page.reserved_space = None
67+
init_page(root.pages[1])
4368
if check:
4469
root._check()
4570
return root

serialize_py/kaitai_serialize_codegen.py

Lines changed: 157 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,9 @@ def get_keys(obj):
8888
def f(k):
8989
if k[0] == "_": return False
9090
if "A" <= k[0] <= "Z": return False
91-
if k in ("close", "from_bytes", "from_file", "from_io", "pages__to_write"): return False
91+
if k in ("close", "from_bytes", "from_file", "from_io"): return False
92+
# https://doc.kaitai.io/user_guide.html#_instances_data_beyond_the_sequence
93+
if k.endswith("__to_write"): return False
9294
return True
9395
keys = list(filter(f, keys))
9496
return keys
@@ -133,6 +135,24 @@ def get_seq(obj):
133135
continue
134136
return seq
135137

138+
def get_instances(obj):
139+
# TODO upstream: this should be simpler
140+
if not hasattr(obj, "_fetch_instances"):
141+
return []
142+
_fetch_instances = getattr(obj, "_fetch_instances")
143+
lines, firstlineno = inspect.getsourcelines(_fetch_instances)
144+
lines.pop(0) # "def _fetch_instances(self):"
145+
instances = []
146+
for line in lines:
147+
line = line.rstrip()
148+
# print("line", line)
149+
# line: _ = self.pages
150+
m = re.match(r"\s+_ = self\.(\w+)", line)
151+
if m:
152+
instances.append(m[1])
153+
continue
154+
return instances
155+
136156
def parse_enum_map(lines):
137157
enum_map = dict()
138158
line0 = lines.pop(0)
@@ -150,6 +170,8 @@ def parse_enum_map(lines):
150170
return enum_map
151171

152172
def get_local_key(key, global_names):
173+
# # handle array item keys like "some_array[123]"
174+
# key = key.replace("[", "_").replace("]", "_")
153175
num = 1
154176
local_key = key
155177
while local_key in global_names:
@@ -221,6 +243,9 @@ class E # 0 # E
221243
# class FormatVersion(IntEnum):
222244

223245

246+
debug_init_types = False
247+
248+
224249
def codegen(
225250
obj,
226251
out,
@@ -236,6 +261,7 @@ def codegen(
236261
module_map={},
237262
global_names=[],
238263
):
264+
print("codegen obj", obj)
239265
global val # fix print_value
240266
mod = obj.__class__.__module__
241267
# member = obj.__class__.__name__ # DatabaseHeader
@@ -272,18 +298,53 @@ def codegen(
272298
print(f"{ind}def get_{root_name}(_io=None, check=True):", file=out)
273299
print(f"{ind}{ids}if not _io:", file=out)
274300
print(f"{ind}{ids}{ids}_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))", file=out)
275-
print(f"{ind}{ids}{on} = {mod}.{member}(_io)", file=out)
276-
# TODO remove. this works only for sqlite3.ksy
277-
print(f"{ind}{ids}# try to fix root._write", file=out)
278-
print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out)
279-
print(f"{ind}{ids}{on}.pages__to_write = False", file=out)
280-
# root.pages__to_write = True
301+
# TODO also pass parameters to root.__init__
302+
"""
303+
val_params = []
304+
if hasattr(val, "__init__"):
305+
val_init_sig = inspect.signature(val.__init__)
306+
# ...
307+
"""
308+
309+
# print(f"{ind}{ids}{on} = {mod}.{member}(_io=_io)", file=out)
310+
on_parent_root = f"{on_parent}._root" if on_parent else "None"
311+
print(f"{ind}{ids}{on} = {mod}.{member}(_io=_io, _parent={on_parent}, _root={on_parent_root})", file=out)
312+
313+
# print(f"{ind}{ids}assert {on}._root == {on}", file=out) # debug
314+
315+
if parse_page_by_page:
316+
# TODO remove. this works only for sqlite3.ksy
317+
print(f"{ind}{ids}# try to fix root._write", file=out)
318+
print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out)
319+
print(f"{ind}{ids}{on}.pages__to_write = False", file=out)
320+
# root.pages__to_write = True
281321
# else:
282322
# print(f"{ind}{ids}# non-root init", file=out)
283323
# print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out)
284-
for key in get_seq(obj):
324+
# TODO? interleave "seq" and "instance" keys
325+
# TODO rename to seq_key?
326+
# for key in get_seq(obj):
327+
key_stack = get_seq(obj) + get_instances(obj)
328+
while key_stack:
329+
key = key_stack.pop(0)
285330
# print(f"{ind}{ids}# key {key}", file=out)
286-
val = getattr(obj, key)
331+
print("key", key) # debug
332+
val_is_list_item = False
333+
if key.endswith("]"):
334+
# val is a list item
335+
val_is_list_item = True
336+
m = re.fullmatch(r"(\w+)\[(\d+)\]", key)
337+
val_arr_name, val_arr_idx = m.groups()
338+
val_arr_idx = int(val_arr_idx)
339+
val_arr = getattr(obj, val_arr_name)
340+
val = val_arr[val_arr_idx]
341+
else:
342+
# FIXME get_seq also returns items where the "if" condition is false
343+
# val = getattr(obj, key)
344+
try:
345+
val = getattr(obj, key)
346+
except AttributeError:
347+
continue
287348
"""
288349
print("key", repr(key))
289350
print("val", repr(val), dir(val))
@@ -298,15 +359,30 @@ def codegen(
298359

299360
# builtin types: int, bytes, ...
300361
if mod == "builtins":
362+
if debug_init_types:
363+
print(f"{ind}{ids}# builtin type {type(val).__name__}", file=out)
301364
if isinstance(val, int) and val > 10:
302365
print(f"{ind}{ids}{on}.{key} = {val!r} # {hex(val)}", file=out)
303366
continue
304367
if isinstance(val, bytes) and val == len(val) * b"\x00":
305368
# compress null bytes
306369
# TODO partial compression of bytestrings
307-
print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out)
370+
if len(val) == 0:
371+
print(f"{ind}{ids}{on}.{key} = b''", file=out)
372+
else:
373+
print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out)
374+
continue
375+
if isinstance(val, list):
376+
print(f"{ind}{ids}{on}.{key} = []", file=out)
377+
new_keys = []
378+
for item_idx in range(len(val)):
379+
new_keys.append(f"{key}[{item_idx}]")
380+
# recursion via stack
381+
key_stack = new_keys + key_stack
382+
# TODO
383+
# print(f"{ind}{ids}{on}.{key}.append({xxxxxxx})", file=out)
308384
continue
309-
# bytes, ...
385+
# bytes, str, ...
310386
print(f"{ind}{ids}{on}.{key} = {val!r}", file=out)
311387
continue
312388

@@ -327,6 +403,8 @@ def codegen(
327403
m = re.match(r"\s*class (\w+)\(([A-Z][A-Za-z0-9]*Enum)\):", lines[0].rstrip())
328404
if m:
329405
enum_name, enum_type = m.groups()
406+
if debug_init_types:
407+
print(f"{ind}{ids}# enum type {enum_name}", file=out)
330408
enum_map = enum_map_map.get(enum_name) # read cache
331409
if not enum_map:
332410
enum_map = parse_enum_map(lines)
@@ -346,27 +424,58 @@ def codegen(
346424
print(f"{ind}{ids}{on}.{key} = {mod}.{enum_qualname}.{enum_key} # {val_str}", file=out)
347425
continue
348426

349-
# TODO handle list types
350-
# m = ...
351-
# if m:
352-
# ...
353-
# continue
354-
355427
# user-defined types
428+
if debug_init_types:
429+
print(f"{ind}{ids}# user-defined type {member}", file=out)
356430
# https://doc.kaitai.io/serialization.html#_user_defined_types
357431
# print(f"{ind}{ids}{on}.{key} = root.{member}(root._io, {on}, {on}._root)", file=out) # short
358432
# print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long
359-
print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long
433+
# print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long
434+
val_params = []
435+
if hasattr(val, "__init__"):
436+
val_init_sig = inspect.signature(val.__init__)
437+
if str(val_init_sig) != "(_io=None, _parent=None, _root=None)":
438+
# print("val_init_sig", repr(val_init_sig))
439+
# val.__init__ has extra args
440+
# example: page_number in "(page_number, _io=None, _parent=None, _root=None)"
441+
for param_name in val_init_sig.parameters.keys():
442+
# print(f"param_name {param_name}")
443+
if param_name in ("_io", "_parent", "_root"):
444+
continue
445+
# FIXME handle user-defined types via recursion
446+
# example:
447+
"""
448+
def get_page_number():
449+
# ...
450+
pages.append(BtreePage(page_number=get_page_number(), _io=root._io, _parent=root, _root=root._root))
451+
"""
452+
param_val = getattr(val, param_name)
453+
val_params.append(f"{param_name}={param_val}")
454+
val_params = "".join(map(lambda arg: arg + ", ", val_params))
455+
if val_is_list_item:
456+
print(f"{ind}{ids}{on}.{val_arr_name}.append({mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root))", file=out) # long
457+
else:
458+
print(f"{ind}{ids}{on}.{key} = {mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root)", file=out) # long
459+
def get_singular_name(plural_name):
460+
# vals -> val
461+
# val_list -> val
462+
if plural_name.endswith("_list"): return plural_name[:-5]
463+
if plural_name.endswith("_array"): return plural_name[:-6]
464+
if plural_name.endswith("s"): return plural_name[:-1]
465+
return plural_name
360466
# avoid shadowing global variables
361-
local_key = get_local_key(key, global_names)
467+
if val_is_list_item:
468+
local_key = get_local_key(get_singular_name(val_arr_name), global_names)
469+
else:
470+
local_key = get_local_key(key, global_names)
362471
# print(f"{ind}{ids}if 1:", file=out) # no block scope
363472
# print(f"{ind}{ids}if {local_key} := {on}.{key}:", file=out) # no block scope
364473
# TypeError: 'int' object does not support the context manager protocol
365474
# print(f"{ind}{ids}with {on}.{key} as {local_key}:", file=out) # context # no block scope?
366475
# create block scope
367476
# this is required to avoid name collisions between scopes
368477
# https://stackoverflow.com/a/45210833/10440128
369-
print(f"{ind}{ids}def init_{key}({local_key}):", file=out) # "init_" prefix
478+
print(f"{ind}{ids}def init_{local_key}({local_key}):", file=out) # "init_" prefix
370479
# print(f"{ind}{ids}def {key}_init({local_key}):", file=out) # "_init" suffix
371480
# recursion
372481
codegen(
@@ -382,9 +491,36 @@ def codegen(
382491
module_map,
383492
global_names,
384493
)
385-
print(f"{ind}{ids}init_{key}({on}.{key})", file=out) # "init_" prefix
494+
495+
if val_is_list_item:
496+
print(f"{ind}{ids}init_{local_key}({on}.{val_arr_name}[{val_arr_idx}])", file=out) # "init_" prefix
497+
else:
498+
print(f"{ind}{ids}init_{local_key}({on}.{key})", file=out) # "init_" prefix
499+
386500
# print(f"{ind}{ids}{key}_init({local_key})", file=out) # "_init" suffix
387501

502+
# for instance_key in get_instances(obj):
503+
if 0:
504+
# print(f"{ind}{ids}# instance_key {instance_key}", file=out)
505+
val = getattr(obj, instance_key)
506+
"""
507+
print("instance_key", repr(instance_key))
508+
print("val", repr(val), dir(val))
509+
print_value("val.__class__.__module__")
510+
print_value("val.__class__.__qualname__")
511+
"""
512+
# obj.__class__.__module__ == 'builtins'
513+
# TODO rename to "mod_name"
514+
mod = val.__class__.__module__
515+
# TODO rename to "member_name"
516+
member = val.__class__.__qualname__
517+
518+
print("obj", obj)
519+
print("FIXME instance_key", instance_key, val, mod, member)
520+
# FIXME instance_key page 0 builtins int
521+
# FIXME instance_key page None builtins NoneType
522+
raise 123
523+
388524
# some user-defined types need this
389525
# example: AttributeError: 'VlqBase128Be' object has no attribute 'groups'
390526
# but this breaks other cases...

0 commit comments

Comments
 (0)