Skip to content

Commit 634bbb3

Browse files
author
rocky
committed
Handle Python2 intern better
1 parent cbb9f07 commit 634bbb3

File tree

7 files changed

+52
-35
lines changed

7 files changed

+52
-35
lines changed
462 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../01_extended_arg.pyc

xdis/codetype/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE):
142142
co_lnotab=line_table,
143143
co_freevars=code.co_freevars, # not in 1.x
144144
co_cellvars=code.co_cellvars, # not in 1.x
145+
146+
# THINK ABOUT: If collection_order isn't defined, i.e. native code
147+
# type, should we try to extract it?
148+
collection_order=code.collection_order if hasattr(code, "collection_order") else {},
149+
reference_objects=code.reference_objects if hasattr(code, "reference_objects") else set(),
150+
145151
)
146152
else:
147153
# 1.0 .. 1.5

xdis/codetype/code20.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import types
1818
from copy import deepcopy
1919
from types import CodeType
20+
from typing import Any, Dict, Set, Tuple, Union
2021

2122
from xdis.codetype.code15 import Code15, Code15FieldTypes
2223
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
@@ -55,20 +56,22 @@ class Code2(Code15):
5556

5657
def __init__(
5758
self,
58-
co_argcount,
59-
co_nlocals,
60-
co_stacksize,
59+
co_argcount: int,
60+
co_nlocals: int,
61+
co_stacksize: int,
6162
co_flags,
6263
co_code,
6364
co_consts,
6465
co_names,
6566
co_varnames,
6667
co_filename,
67-
co_name,
68-
co_firstlineno,
68+
co_name: str,
69+
co_firstlineno: int,
6970
co_lnotab,
7071
co_freevars,
7172
co_cellvars,
73+
collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {},
74+
reference_objects: Set[Any] = set(),
7275
) -> None:
7376
# Keyword argument parameters in the call below is more robust.
7477
# Since things change around, robustness is good.
@@ -89,6 +92,19 @@ def __init__(
8992
self.co_freevars = co_freevars
9093
self.co_cellvars = co_cellvars
9194
self.fieldtypes = Code2FieldTypes
95+
96+
# The following fields are mostly useful in marshaling a code object.
97+
# Keeping marshal order exactly the same is useful in round-trip marshal
98+
# testing; but it may also have other benefits.
99+
100+
# By saving the order in sets, frozensets, and dictionary keys,
101+
# these collections can be written in the same order that appeared
102+
# in unmarshalling (if that's how the code object was created).
103+
self.collection_order = collection_order
104+
105+
# Keeping track of which objects were referenced, allows
106+
self.reference_objects = reference_objects
107+
92108
if type(self) is Code2:
93109
self.check()
94110
return

xdis/codetype/code30.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ class Code3(Code2):
4646

4747
def __init__(
4848
self,
49-
co_argcount,
50-
co_kwonlyargcount,
51-
co_nlocals,
52-
co_stacksize,
49+
co_argcount: int,
50+
co_kwonlyargcount: int,
51+
co_nlocals: int,
52+
co_stacksize: int,
5353
co_flags,
5454
co_code,
5555
co_consts,
@@ -81,23 +81,12 @@ def __init__(
8181
co_lnotab=co_lnotab,
8282
co_freevars=co_freevars,
8383
co_cellvars=co_cellvars,
84+
collection_order=collection_order,
85+
reference_objects=reference_objects,
8486
)
8587
self.co_kwonlyargcount = co_kwonlyargcount
8688
self.fieldtypes = Code3FieldTypes
8789

88-
# The following fields are mostly useful in marshaling a code object.
89-
# Keeping marshal order exactly the same is useful in round-trip marshal
90-
# testing; but it may also have other benefits.
91-
92-
# By saving the order in sets, frozensets, and dictionary keys,
93-
# these collections can be written in the same order that appeared
94-
# in unmarshalling (if that's how the code object was created).
95-
self.collection_order = collection_order
96-
97-
# Keeping track of which objects were referenced, allows
98-
self.reference_objects = reference_objects
99-
100-
10190
if type(self) is Code3:
10291
self.check()
10392
return

xdis/marsh.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def dump_collection(self, type_code: str, bag: Union[frozenset, set, dict]) -> N
299299
for each in collection:
300300
self.dump(each)
301301

302-
def dump_complex(self, x) -> None:
302+
def dump_complex(self, x, _) -> None:
303303
write = self._write
304304
write(TYPE_COMPLEX)
305305
s = repr(x.real)
@@ -551,16 +551,20 @@ def dump_stopiter(self, x) -> None:
551551

552552
dispatch[type(StopIteration)] = dump_stopiter
553553

554-
def dump_string(self, x, flag_ref: int = 0) -> None:
554+
def dump_string(self, s, flag_ref: int = 0) -> None:
555555
# Python 3.11 seems to add the object ref flag bit for strings.
556-
type_string = (
557-
TYPE_STRING
558-
if self.python_version < (3, 11)
559-
else chr(ord(TYPE_STRING) | flag_ref)
560-
)
561-
self._write(type_string)
562-
self.w_long(len(x))
563-
self._write(x)
556+
if self.python_version >= (3, 11):
557+
type_code = chr(ord(TYPE_STRING) | flag_ref)
558+
if (3, 0) <= self.python_version < (3, 11):
559+
type_code = TYPE_STRING
560+
else:
561+
# Python 2.x.
562+
# FIXME: save string somewhere if it isn't in string table.
563+
type_code = TYPE_INTERNED if s in self.reference_objects else TYPE_STRING
564+
565+
self._write(type_code)
566+
self.w_long(len(s))
567+
self._write(s)
564568

565569
dispatch[bytes] = dump_string
566570
dispatch[bytearray] = dump_string
@@ -585,8 +589,10 @@ def dump_tuple(self, tuple_object: tuple, flag_ref: int = 0) -> None:
585589
def dump_unicode(self, s, flag_ref: int = 0) -> None:
586590
if self.python_version < (2, 0):
587591
type_code = TYPE_STRING
588-
elif (2, 0) <= self.python_version <= (3, 0):
589-
type_code = TYPE_INTERNED
592+
elif (2, 0) <= self.python_version < (3, 0):
593+
# FIXME: probably need to save string somewhere
594+
# if it isn't in string table.
595+
type_code = TYPE_INTERNED if s in self.reference_objects else TYPE_STRING
590596
else:
591597
type_code = TYPE_UNICODE
592598

xdis/unmarshal.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,6 @@ def t_short_ASCII_interned(self, save_ref, bytes_for_s: bool = False):
398398
self.intern_strings.append(interned)
399399
return self.r_ref(interned, save_ref)
400400

401-
# Since Python 3.4
402401
def t_interned(self, save_ref, bytes_for_s: bool = False):
403402
strsize = unpack("<i", self.fp.read(4))[0]
404403
interned = compat_str(self.fp.read(strsize))

0 commit comments

Comments
 (0)