Skip to content

Commit 7f13cb2

Browse files
author
rocky
committed
Match collection ordering in unmarshal to marshal
1 parent c4034f2 commit 7f13cb2

File tree

8 files changed

+72
-28
lines changed

8 files changed

+72
-28
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../06_frozenset.pyc

xdis/codetype/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import types
1818
from collections import namedtuple
19-
from typing import Optional
19+
from typing import Any, Dict, Optional, Tuple, Union
2020

2121
from xdis.codetype.base import CodeBase
2222
from xdis.codetype.code13 import Code13
@@ -60,6 +60,10 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE):
6060
line_table,
6161
code.co_freevars,
6262
code.co_cellvars,
63+
64+
# THINK ABOUT: If collection_order isn't defined, i.e. native code
65+
# type, should we try to extract it?
66+
code.collection_order if hasattr(code, "collection_order") else {}
6367
)
6468
elif version_tuple < (3, 10):
6569
return Code38(
@@ -203,7 +207,7 @@ def portableCodeType(version_tuple=PYTHON_VERSION_TRIPLE):
203207
# In contrast to Code3, Code2, etc. you can use CodeTypeUnint for building
204208
# an incomplete code type, which might be converted to another code type
205209
# later.
206-
CodeTypeUnionFields = tuple(Code311FieldNames.split())
210+
CodeTypeUnionFields = tuple(Code311FieldNames.split() + ["collection_order"])
207211
CodeTypeUnion = namedtuple("CodeTypeUnion", CodeTypeUnionFields)
208212

209213

@@ -230,6 +234,7 @@ def to_portable(
230234
co_cellvars: tuple[None]=(None,), # 2.0+
231235
co_exceptiontable=None, # 3.11+
232236
version_triple=PYTHON_VERSION_TRIPLE,
237+
collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {},
233238
):
234239
code = CodeTypeUnion(
235240
co_argcount=co_argcount,
@@ -250,6 +255,7 @@ def to_portable(
250255
co_freevars=co_freevars,
251256
co_cellvars=co_cellvars,
252257
co_exceptiontable=co_exceptiontable,
258+
collection_order=collection_order,
253259
)
254260
return codeType2Portable(code, version_triple)
255261

xdis/codetype/code20.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __init__(
7272
) -> None:
7373
# Keyword argument parameters in the call below is more robust.
7474
# Since things change around, robustness is good.
75-
super(Code2, self).__init__(
75+
super().__init__(
7676
co_argcount=co_argcount,
7777
co_nlocals=co_nlocals,
7878
co_stacksize=co_stacksize,
@@ -89,7 +89,7 @@ def __init__(
8989
self.co_freevars = co_freevars
9090
self.co_cellvars = co_cellvars
9191
self.fieldtypes = Code2FieldTypes
92-
if type(self) == Code2:
92+
if type(self) is Code2:
9393
self.check()
9494
return
9595

xdis/codetype/code30.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import types
1818
from copy import deepcopy
1919
from types import CodeType
20-
from typing import Dict, List, Union
20+
from typing import Any, Dict, Tuple, Union
2121

2222
from xdis.codetype.code20 import Code2, Code2FieldTypes
2323
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
@@ -61,7 +61,7 @@ def __init__(
6161
co_lnotab,
6262
co_freevars,
6363
co_cellvars,
64-
collection_order: Dict[Union[set, frozenset, dict], List[str]] = {}
64+
collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {}
6565
) -> None:
6666
# Keyword argument parameters in the call below is more robust.
6767
# Since things change around, robustness is good.

xdis/codetype/code310.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import struct
1818
import types
1919
from copy import deepcopy
20+
from typing import Any, Dict, Tuple, Union
2021

2122
from xdis.codetype.code38 import Code38
2223
from xdis.cross_types import UnicodeForPython3
@@ -96,6 +97,7 @@ def __init__(
9697
co_linetable,
9798
co_freevars,
9899
co_cellvars,
100+
collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {}
99101
) -> None:
100102
# Keyword argument parameters in the call below is more robust.
101103
# Since things change around, robustness is good.
@@ -116,6 +118,11 @@ def __init__(
116118
self.co_stacksize = co_stacksize
117119
self.co_varnames = co_varnames
118120
self.fieldtypes = Code310FieldTypes
121+
122+
# It is helpful to save the order in sets, frozensets and dictionary keys,
123+
# so that on writing a bytecode file we can duplicate this order.
124+
self.collection_order = collection_order
125+
119126
if type(self) is Code310:
120127
self.check()
121128

xdis/codetype/code38.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import types
1818
from copy import deepcopy
19-
from typing import Dict, List, Union
19+
from typing import Any, Dict, Tuple, Union
2020

2121
from xdis.codetype.code30 import Code3, Code3FieldTypes
2222
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
@@ -84,7 +84,7 @@ def __init__(
8484
co_lnotab,
8585
co_freevars,
8686
co_cellvars,
87-
collection_order: Dict[Union[set, frozenset, dict], List[str]] = {}
87+
collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {}
8888
) -> None:
8989
# Keyword argument parameters in the call below is more robust.
9090
# Since things change around, robustness is good.

xdis/marsh.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import types
2929
from sys import intern
3030
from types import CodeType, EllipsisType
31-
from typing import Optional
31+
from typing import Any, Optional, Set, Union
3232

3333
from xdis.codetype import Code2, Code3, Code15
3434
from xdis.unmarshal import (
@@ -88,11 +88,16 @@ class _Marshaller:
8888
dispatch = {}
8989

9090
def __init__(
91-
self, writefunc, python_version: tuple, is_pypy: Optional[bool] = None
91+
self,
92+
writefunc,
93+
python_version: tuple,
94+
is_pypy: Optional[bool] = None,
95+
collection_order={},
9296
) -> None:
9397
self._write = writefunc
9498
self.python_version = python_version
9599
self.is_pypy = is_pypy
100+
self.collection_order = collection_order
96101

97102
def dump(self, x) -> None:
98103
if (
@@ -417,6 +422,7 @@ def dump_code3(self, x) -> None:
417422

418423
dispatch[Code3] = dump_code3
419424

425+
# FIXME: this is wrong.
420426
try:
421427
if PYTHON3:
422428
dispatch[types.CodeType] = dump_code3
@@ -425,22 +431,32 @@ def dump_code3(self, x) -> None:
425431
except NameError:
426432
pass
427433

428-
def dump_set(self, x) -> None:
429-
self._write(TYPE_SET)
430-
self.w_long(len(x))
431-
for each in x:
434+
def dump_collection(self, type_code: str, bag: Union[frozenset, set, dict]) -> None:
435+
"""
436+
Save marshalled version of frozenset fs.
437+
Use self.collection_order, to ensure that the order
438+
or set elements that may have appeared from unmarshalling the appears
439+
the same way. This helps roundtrip checking, among possibly other things.
440+
"""
441+
self._write(type_code)
442+
self.w_long(len(bag))
443+
collection = self.collection_order.get(bag, bag)
444+
for each in collection:
432445
self.dump(each)
433446

434-
try:
435-
dispatch[set] = dump_set
436-
except NameError:
437-
pass
447+
def dump_set(self, s: Set[Any]) -> None:
448+
"""
449+
Save marshalled version of set s.
450+
"""
451+
self.dump_collection(TYPE_SET, s)
438452

439-
def dump_frozenset(self, x) -> None:
440-
self._write(TYPE_FROZENSET)
441-
self.w_long(len(x))
442-
for each in x:
443-
self.dump(each)
453+
dispatch[set] = dump_set
454+
455+
def dump_frozenset(self, fs: frozenset) -> None:
456+
"""
457+
Save marshalled version of frozenset fs.
458+
"""
459+
self.dump_collection(TYPE_FROZENSET, fs)
444460

445461
try:
446462
dispatch[frozenset] = dump_frozenset
@@ -1103,7 +1119,13 @@ def dumps(
11031119
is_pypy: Optional[bool] = None,
11041120
) -> bytes | str:
11051121
buffer = []
1106-
m = _Marshaller(buffer.append, python_version=python_version, is_pypy=is_pypy)
1122+
collection_order = x.collection_order if hasattr(x, "collection_order") else {}
1123+
m = _Marshaller(
1124+
buffer.append,
1125+
python_version=python_version,
1126+
is_pypy=is_pypy,
1127+
collection_order=collection_order,
1128+
)
11071129
m.dump(x)
11081130
if python_version:
11091131
is_python3 = python_version >= (3, 0)

xdis/unmarshal.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import sys
3131
from struct import unpack
3232
from types import EllipsisType
33-
from typing import Union
33+
from typing import Any, Dict, Tuple, Union
3434

3535
from xdis.codetype import to_portable
3636
from xdis.cross_types import LongTypeForPython3, UnicodeForPython3
@@ -162,6 +162,10 @@ def __init__(self, fp, magic_int, bytes_for_s, code_objects={}) -> None:
162162
# objects starts.
163163
self.code_to_file_offsets = {}
164164

165+
# It is helpful to save the order in sets, frozensets and dictionary keys,
166+
# so that on writing a bytecode file we can duplicate this order.
167+
self.collection_order: Dict[Union[set, frozenset, dict], Tuple[Any]] = {}
168+
165169
self.bytes_for_s = bytes_for_s
166170
version = magic_int2tuple(self.magic_int)
167171
if version >= (3, 4):
@@ -441,11 +445,14 @@ def t_list(self, save_ref, bytes_for_s: bool = False):
441445

442446
def t_frozenset(self, save_ref, bytes_for_s: bool = False):
443447
setsize = unpack("<i", self.fp.read(4))[0]
444-
ret, i = self.r_ref_reserve(tuple(), save_ref)
448+
collection, i = self.r_ref_reserve([], save_ref)
445449
while setsize > 0:
446-
ret += (self.r_object(bytes_for_s=bytes_for_s),)
450+
collection.append(self.r_object(bytes_for_s=bytes_for_s))
447451
setsize -= 1
448-
return self.r_ref_insert(frozenset(ret), i)
452+
final_frozenset = frozenset(collection)
453+
# Note the order of the frozenset elements.
454+
self.collection_order[final_frozenset] = tuple(collection)
455+
return self.r_ref_insert(final_frozenset, i)
449456

450457
def t_set(self, save_ref, bytes_for_s: bool = False):
451458
setsize = unpack("<i", self.fp.read(4))[0]
@@ -658,6 +665,7 @@ def t_code(self, save_ref, bytes_for_s: bool = False):
658665
co_cellvars=co_cellvars,
659666
co_exceptiontable=co_exceptiontable,
660667
version_triple=self.version_tuple,
668+
collection_order=self.collection_order,
661669
)
662670

663671
self.code_to_file_offsets[code] = (code_offset_in_file, co_code_offset_in_file)

0 commit comments

Comments
 (0)