@@ -88,7 +88,9 @@ def get_keys(obj):
8888 def f (k ):
8989 if k [0 ] == "_" : return False
9090 if "A" <= k [0 ] <= "Z" : return False
91- if k in ("close" , "from_bytes" , "from_file" , "from_io" , "pages__to_write" ): return False
91+ if k in ("close" , "from_bytes" , "from_file" , "from_io" ): return False
92+ # https://doc.kaitai.io/user_guide.html#_instances_data_beyond_the_sequence
93+ if k .endswith ("__to_write" ): return False
9294 return True
9395 keys = list (filter (f , keys ))
9496 return keys
@@ -133,6 +135,24 @@ def get_seq(obj):
133135 continue
134136 return seq
135137
138+ def get_instances (obj ):
139+ # TODO upstream: this should be simpler
140+ if not hasattr (obj , "_fetch_instances" ):
141+ return []
142+ _fetch_instances = getattr (obj , "_fetch_instances" )
143+ lines , firstlineno = inspect .getsourcelines (_fetch_instances )
144+ lines .pop (0 ) # "def _fetch_instances(self):"
145+ instances = []
146+ for line in lines :
147+ line = line .rstrip ()
148+ # print("line", line)
149+ # line: _ = self.pages
150+ m = re .match (r"\s+_ = self\.(\w+)" , line )
151+ if m :
152+ instances .append (m [1 ])
153+ continue
154+ return instances
155+
136156def parse_enum_map (lines ):
137157 enum_map = dict ()
138158 line0 = lines .pop (0 )
@@ -150,6 +170,8 @@ def parse_enum_map(lines):
150170 return enum_map
151171
152172def get_local_key (key , global_names ):
173+ # # handle array item keys like "some_array[123]"
174+ # key = key.replace("[", "_").replace("]", "_")
153175 num = 1
154176 local_key = key
155177 while local_key in global_names :
@@ -221,6 +243,9 @@ class E # 0 # E
221243 # class FormatVersion(IntEnum):
222244
223245
246+ debug_init_types = False
247+
248+
224249def codegen (
225250 obj ,
226251 out ,
@@ -236,6 +261,7 @@ def codegen(
236261 module_map = {},
237262 global_names = [],
238263):
264+ print ("codegen obj" , obj )
239265 global val # fix print_value
240266 mod = obj .__class__ .__module__
241267 # member = obj.__class__.__name__ # DatabaseHeader
@@ -272,18 +298,53 @@ def codegen(
272298 print (f"{ ind } def get_{ root_name } (_io=None, check=True):" , file = out )
273299 print (f"{ ind } { ids } if not _io:" , file = out )
274300 print (f"{ ind } { ids } { ids } _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))" , file = out )
275- print (f"{ ind } { ids } { on } = { mod } .{ member } (_io)" , file = out )
276- # TODO remove. this works only for sqlite3.ksy
277- print (f"{ ind } { ids } # try to fix root._write" , file = out )
278- print (f"{ ind } { ids } # https://github.com/kaitai-io/kaitai_struct/issues/1245" , file = out )
279- print (f"{ ind } { ids } { on } .pages__to_write = False" , file = out )
280- # root.pages__to_write = True
301+ # TODO also pass parameters to root.__init__
302+ """
303+ val_params = []
304+ if hasattr(val, "__init__"):
305+ val_init_sig = inspect.signature(val.__init__)
306+ # ...
307+ """
308+
309+ # print(f"{ind}{ids}{on} = {mod}.{member}(_io=_io)", file=out)
310+ on_parent_root = f"{ on_parent } ._root" if on_parent else "None"
311+ print (f"{ ind } { ids } { on } = { mod } .{ member } (_io=_io, _parent={ on_parent } , _root={ on_parent_root } )" , file = out )
312+
313+ # print(f"{ind}{ids}assert {on}._root == {on}", file=out) # debug
314+
315+ if parse_page_by_page :
316+ # TODO remove. this works only for sqlite3.ksy
317+ print (f"{ ind } { ids } # try to fix root._write" , file = out )
318+ print (f"{ ind } { ids } # https://github.com/kaitai-io/kaitai_struct/issues/1245" , file = out )
319+ print (f"{ ind } { ids } { on } .pages__to_write = False" , file = out )
320+ # root.pages__to_write = True
281321 # else:
282322 # print(f"{ind}{ids}# non-root init", file=out)
283323 # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out)
284- for key in get_seq (obj ):
324+ # TODO? interleave "seq" and "instance" keys
325+ # TODO rename to seq_key?
326+ # for key in get_seq(obj):
327+ key_stack = get_seq (obj ) + get_instances (obj )
328+ while key_stack :
329+ key = key_stack .pop (0 )
285330 # print(f"{ind}{ids}# key {key}", file=out)
286- val = getattr (obj , key )
331+ print ("key" , key ) # debug
332+ val_is_list_item = False
333+ if key .endswith ("]" ):
334+ # val is a list item
335+ val_is_list_item = True
336+ m = re .fullmatch (r"(\w+)\[(\d+)\]" , key )
337+ val_arr_name , val_arr_idx = m .groups ()
338+ val_arr_idx = int (val_arr_idx )
339+ val_arr = getattr (obj , val_arr_name )
340+ val = val_arr [val_arr_idx ]
341+ else :
342+ # FIXME get_seq also returns items where the "if" condition is false
343+ # val = getattr(obj, key)
344+ try :
345+ val = getattr (obj , key )
346+ except AttributeError :
347+ continue
287348 """
288349 print("key", repr(key))
289350 print("val", repr(val), dir(val))
@@ -298,15 +359,30 @@ def codegen(
298359
299360 # builtin types: int, bytes, ...
300361 if mod == "builtins" :
362+ if debug_init_types :
363+ print (f"{ ind } { ids } # builtin type { type (val ).__name__ } " , file = out )
301364 if isinstance (val , int ) and val > 10 :
302365 print (f"{ ind } { ids } { on } .{ key } = { val !r} # { hex (val )} " , file = out )
303366 continue
304367 if isinstance (val , bytes ) and val == len (val ) * b"\x00 " :
305368 # compress null bytes
306369 # TODO partial compression of bytestrings
307- print (f"{ ind } { ids } { on } .{ key } = { len (val )} * b'\\ x00'" , file = out )
370+ if len (val ) == 0 :
371+ print (f"{ ind } { ids } { on } .{ key } = b''" , file = out )
372+ else :
373+ print (f"{ ind } { ids } { on } .{ key } = { len (val )} * b'\\ x00'" , file = out )
374+ continue
375+ if isinstance (val , list ):
376+ print (f"{ ind } { ids } { on } .{ key } = []" , file = out )
377+ new_keys = []
378+ for item_idx in range (len (val )):
379+ new_keys .append (f"{ key } [{ item_idx } ]" )
380+ # recursion via stack
381+ key_stack = new_keys + key_stack
382+ # TODO
383+ # print(f"{ind}{ids}{on}.{key}.append({xxxxxxx})", file=out)
308384 continue
309- # bytes, ...
385+ # bytes, str, ...
310386 print (f"{ ind } { ids } { on } .{ key } = { val !r} " , file = out )
311387 continue
312388
@@ -327,6 +403,8 @@ def codegen(
327403 m = re .match (r"\s*class (\w+)\(([A-Z][A-Za-z0-9]*Enum)\):" , lines [0 ].rstrip ())
328404 if m :
329405 enum_name , enum_type = m .groups ()
406+ if debug_init_types :
407+ print (f"{ ind } { ids } # enum type { enum_name } " , file = out )
330408 enum_map = enum_map_map .get (enum_name ) # read cache
331409 if not enum_map :
332410 enum_map = parse_enum_map (lines )
@@ -346,27 +424,58 @@ def codegen(
346424 print (f"{ ind } { ids } { on } .{ key } = { mod } .{ enum_qualname } .{ enum_key } # { val_str } " , file = out )
347425 continue
348426
349- # TODO handle list types
350- # m = ...
351- # if m:
352- # ...
353- # continue
354-
355427 # user-defined types
428+ if debug_init_types :
429+ print (f"{ ind } { ids } # user-defined type { member } " , file = out )
356430 # https://doc.kaitai.io/serialization.html#_user_defined_types
357431 # print(f"{ind}{ids}{on}.{key} = root.{member}(root._io, {on}, {on}._root)", file=out) # short
358432 # print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long
359- print (f"{ ind } { ids } { on } .{ key } = { mod } .{ member } (root._io, { on } , { on } ._root)" , file = out ) # long
433+ # print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long
434+ val_params = []
435+ if hasattr (val , "__init__" ):
436+ val_init_sig = inspect .signature (val .__init__ )
437+ if str (val_init_sig ) != "(_io=None, _parent=None, _root=None)" :
438+ # print("val_init_sig", repr(val_init_sig))
439+ # val.__init__ has extra args
440+ # example: page_number in "(page_number, _io=None, _parent=None, _root=None)"
441+ for param_name in val_init_sig .parameters .keys ():
442+ # print(f"param_name {param_name}")
443+ if param_name in ("_io" , "_parent" , "_root" ):
444+ continue
445+ # FIXME handle user-defined types via recursion
446+ # example:
447+ """
448+ def get_page_number():
449+ # ...
450+ pages.append(BtreePage(page_number=get_page_number(), _io=root._io, _parent=root, _root=root._root))
451+ """
452+ param_val = getattr (val , param_name )
453+ val_params .append (f"{ param_name } ={ param_val } " )
454+ val_params = "" .join (map (lambda arg : arg + ", " , val_params ))
455+ if val_is_list_item :
456+ print (f"{ ind } { ids } { on } .{ val_arr_name } .append({ mod } .{ member } ({ val_params } _io=root._io, _parent={ on } , _root={ on } ._root))" , file = out ) # long
457+ else :
458+ print (f"{ ind } { ids } { on } .{ key } = { mod } .{ member } ({ val_params } _io=root._io, _parent={ on } , _root={ on } ._root)" , file = out ) # long
459+ def get_singular_name (plural_name ):
460+ # vals -> val
461+ # val_list -> val
462+ if plural_name .endswith ("_list" ): return plural_name [:- 5 ]
463+ if plural_name .endswith ("_array" ): return plural_name [:- 6 ]
464+ if plural_name .endswith ("s" ): return plural_name [:- 1 ]
465+ return plural_name
360466 # avoid shadowing global variables
361- local_key = get_local_key (key , global_names )
467+ if val_is_list_item :
468+ local_key = get_local_key (get_singular_name (val_arr_name ), global_names )
469+ else :
470+ local_key = get_local_key (key , global_names )
362471 # print(f"{ind}{ids}if 1:", file=out) # no block scope
363472 # print(f"{ind}{ids}if {local_key} := {on}.{key}:", file=out) # no block scope
364473 # TypeError: 'int' object does not support the context manager protocol
365474 # print(f"{ind}{ids}with {on}.{key} as {local_key}:", file=out) # context # no block scope?
366475 # create block scope
367476 # this is required to avoid name collisions between scopes
368477 # https://stackoverflow.com/a/45210833/10440128
369- print (f"{ ind } { ids } def init_{ key } ({ local_key } ):" , file = out ) # "init_" prefix
478+ print (f"{ ind } { ids } def init_{ local_key } ({ local_key } ):" , file = out ) # "init_" prefix
370479 # print(f"{ind}{ids}def {key}_init({local_key}):", file=out) # "_init" suffix
371480 # recursion
372481 codegen (
@@ -382,9 +491,36 @@ def codegen(
382491 module_map ,
383492 global_names ,
384493 )
385- print (f"{ ind } { ids } init_{ key } ({ on } .{ key } )" , file = out ) # "init_" prefix
494+
495+ if val_is_list_item :
496+ print (f"{ ind } { ids } init_{ local_key } ({ on } .{ val_arr_name } [{ val_arr_idx } ])" , file = out ) # "init_" prefix
497+ else :
498+ print (f"{ ind } { ids } init_{ local_key } ({ on } .{ key } )" , file = out ) # "init_" prefix
499+
386500 # print(f"{ind}{ids}{key}_init({local_key})", file=out) # "_init" suffix
387501
502+ # for instance_key in get_instances(obj):
503+ if 0 :
504+ # print(f"{ind}{ids}# instance_key {instance_key}", file=out)
505+ val = getattr (obj , instance_key )
506+ """
507+ print("instance_key", repr(instance_key))
508+ print("val", repr(val), dir(val))
509+ print_value("val.__class__.__module__")
510+ print_value("val.__class__.__qualname__")
511+ """
512+ # obj.__class__.__module__ == 'builtins'
513+ # TODO rename to "mod_name"
514+ mod = val .__class__ .__module__
515+ # TODO rename to "member_name"
516+ member = val .__class__ .__qualname__
517+
518+ print ("obj" , obj )
519+ print ("FIXME instance_key" , instance_key , val , mod , member )
520+ # FIXME instance_key page 0 builtins int
521+ # FIXME instance_key page None builtins NoneType
522+ raise 123
523+
388524 # some user-defined types need this
389525 # example: AttributeError: 'VlqBase128Be' object has no attribute 'groups'
390526 # but this breaks other cases...
0 commit comments