Skip to content

Commit b9d5530

Browse files
author
rocky
committed
Towards getting 1.5 unmarshaling correct
1 parent 8f73703 commit b9d5530

File tree

6 files changed

+79
-28
lines changed

6 files changed

+79
-28
lines changed

xdis/bytecode.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,13 @@ def dis(
612612
if isinstance(filename, UnicodeForPython3):
613613
filename = str(filename)
614614

615+
if isinstance(co.co_code, str):
616+
co_code = co.co_code.encode('latin-1')
617+
else:
618+
co_code = co.co_code
619+
615620
self.disassemble_bytes(
616-
co.co_code,
621+
co_code,
617622
varnames=co.co_varnames,
618623
names=co.co_names,
619624
constants=co.co_consts,

xdis/codetype/code15.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) Copyright 2020, 2023 by Rocky Bernstein
1+
# (C) Copyright 2020, 2023, 2025 by Rocky Bernstein
22
#
33
# This program is free software; you can redistribute it and/or
44
# modify it under the terms of the GNU General Public License
@@ -72,7 +72,7 @@ def __init__(
7272
# This messes up decompilers somehow.
7373
# self.decode_lineno_tab()
7474
self.fieldtypes = Code15FieldTypes
75-
if type(self) == Code15:
75+
if type(self) is Code15:
7676
self.check()
7777
return
7878

xdis/load.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,15 @@ def write_bytecode_file(
410410
if isinstance(code_obj, types.CodeType):
411411
fp.write(marshal.dumps(code_obj))
412412
else:
413-
fp.write(xdis.marsh.dumps(code_obj, python_version=version_tuple))
413+
code_sequence = xdis.marsh.dumps(code_obj, python_version=version_tuple)
414+
if isinstance(code_sequence, str):
415+
# Python 1.x uses code strings, not bytes. To get this into bytes needed by
416+
# fp.write, encode the string using 'latin-1' and 'unicode_escape' to convert escape sequences
417+
# into the raw byte values. 'latin-1' is a single-byte encoding that works well for this.
418+
code_bytes = code_sequence.encode('latin-1').decode('unicode_escape').encode('latin-1')
419+
else:
420+
code_bytes = code_sequence
421+
fp.write(code_bytes)
414422
fp.close()
415423

416424

xdis/marsh.py

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from types import CodeType, EllipsisType
3131
from typing import Optional
3232

33-
from xdis.codetype import Code2, Code3
33+
from xdis.codetype import Code2, Code3, Code15
3434
from xdis.unmarshal import long
3535
from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE, version_tuple_to_str
3636

@@ -92,7 +92,7 @@ def Ord(c):
9292

9393
class _Marshaller:
9494
"""Python marshalling routine that runs in Python 2 and Python 3.
95-
We also extend to allow for xdis Code2 and Code3 types and instances.
95+
We also extend to allow for xdis Code15, Code2, and Code3 types and instances.
9696
"""
9797

9898
dispatch = {}
@@ -122,6 +122,9 @@ def dump(self, x) -> None:
122122
elif isinstance(x, Code2):
123123
self.dispatch[Code2](self, x)
124124
return
125+
elif isinstance(x, Code15):
126+
self.dispatch[Code15](self, x)
127+
return
125128
else:
126129
for tp in type(x).mro():
127130
func = self.dispatch.get(tp)
@@ -260,16 +263,12 @@ def dump_string(self, x) -> None:
260263
self.w_long(len(x))
261264
self._write(x)
262265

263-
if PYTHON_VERSION_TRIPLE > (2, 5):
264-
dispatch[bytes] = dump_string
265-
dispatch[bytearray] = dump_string
266+
dispatch[bytes] = dump_string
267+
dispatch[bytearray] = dump_string
266268

267-
def dump_unicode(self, x) -> None:
268-
self._write(TYPE_UNICODE)
269-
if not PYTHON3 and self.python_version < (3, 0):
270-
s = x.encode("utf8")
271-
else:
272-
s = x
269+
def dump_unicode(self, s) -> None:
270+
type_code = TYPE_STRING if self.python_version < (2, 0) else TYPE_UNICODE
271+
self._write(type_code)
273272
self.w_long(len(s))
274273
self._write(s)
275274

@@ -315,6 +314,43 @@ def dump_dict(self, x) -> None:
315314

316315
dispatch[dict] = dump_dict
317316

317+
def dump_code15(self, x) -> None:
318+
# Careful here: many Python 2 code objects are strings,
319+
# but Python 3 marshaling, by default, will dump strings as
320+
# unicode. Force marsaling this type as string.
321+
322+
self._write(TYPE_CODE)
323+
self.w_short(x.co_argcount)
324+
self.w_short(x.co_nlocals)
325+
self.w_short(x.co_stacksize)
326+
self.w_short(x.co_flags)
327+
self.dump_string(x.co_code)
328+
329+
# If running in a Python3 interpreter, some constants will get
330+
# converted from string to unicode. For now, let's see if
331+
# that's okay.
332+
self.dump(x.co_consts)
333+
334+
# The tuple "names" in Python 1.x must have string entries
335+
self._write(TYPE_TUPLE)
336+
self.w_long(len(x.co_names))
337+
for name in x.co_names:
338+
self.dump_string(name)
339+
340+
# The tuple "varnames" in Python 1.x also must have string entries
341+
self._write(TYPE_TUPLE)
342+
self.w_long(len(x.co_varnames))
343+
for name in x.co_varnames:
344+
self.dump_string(name)
345+
346+
self.dump_string(x.co_filename)
347+
self.dump_string(x.co_name)
348+
self.w_long(x.co_firstlineno)
349+
self.dump_string(x.co_lnotab)
350+
return
351+
352+
dispatch[Code15] = dump_code15
353+
318354
def dump_code2(self, x) -> None:
319355
# Careful here: many Python 2 code objects are strings,
320356
# but Python 3 marshaling, by default, will dump strings as
@@ -1103,11 +1139,15 @@ def dumps(
11031139
buf = []
11041140
for b in buffer:
11051141
if isinstance(b, str) and PYTHON3:
1106-
try:
1107-
s2b = bytes(ord(b[j]) for j in range(len(b)))
1108-
except ValueError:
1109-
s2b = b.encode("utf-8")
1110-
buf.append(s2b)
1142+
if python_version < (2, 0):
1143+
# Python 1.x has no notion of Unicode. It uses strings.
1144+
buf.append(b)
1145+
else:
1146+
try:
1147+
s2b = bytes(ord(b[j]) for j in range(len(b)))
1148+
except ValueError:
1149+
s2b = b.encode("utf-8")
1150+
buf.append(s2b)
11111151
elif isinstance(b, bytearray):
11121152
buf.append(str(b))
11131153
else:

xdis/opcodes/opcode_14.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) Copyright 2018-2021, 2023 by Rocky Bernstein
1+
# (C) Copyright 2018-2021, 2023, 2025 by Rocky Bernstein
22
#
33
# This program is free software; you can redistribute it and/or
44
# modify it under the terms of the GNU General Public License
@@ -68,6 +68,7 @@ def findlinestarts(co, dup_lines: bool=False):
6868
lineno = code[offset] + code[offset + 1] * 256
6969
yield (offset + 2, lineno)
7070
pass
71+
7172
if op >= loc["HAVE_ARGUMENT"]:
7273
offset += 2
7374
pass

xdis/unmarshal.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,7 @@ def compat_str(s: Union[str, bytes]) -> Union[str, bytes]:
9999
This handles working with strings between Python2 and Python3.
100100
"""
101101
if isinstance(s, bytes):
102-
try:
103-
return s.decode("utf-8")
104-
except UnicodeDecodeError:
105-
# If not Unicode, return bytes,
106-
# and it will get converted to str when needed.
107-
return s
102+
return s.decode("utf-8", errors="ignore")
108103
elif not isinstance(s, str):
109104
return str(s)
110105
else:
@@ -509,7 +504,9 @@ def t_code(self, save_ref, bytes_for_s: bool = False):
509504
# of the string.
510505
co_code_offset_in_file = self.fp.tell() + 5
511506

512-
co_code = self.r_object(bytes_for_s=True)
507+
# bytes_for_code = self.version_tuple >= (2, 0)
508+
bytes_for_code = True
509+
co_code = self.r_object(bytes_for_s=bytes_for_code)
513510

514511
# FIXME: Check/verify that is true:
515512
bytes_for_s = self.version_tuple > (3, 0)

0 commit comments

Comments
 (0)