Skip to content

Commit 872a45a

Browse files
committed
switched back from OrderlySet to StableSet because OrderlySet was
returning unordered sets when any operation other than add or remove was done on the OrderlySet
1 parent 429b348 commit 872a45a

21 files changed

+145
-179
lines changed

deepdiff/anyset.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from ordered_set import OrderedSet
21
from deepdiff.deephash import DeepHash
3-
from deepdiff.helper import dict_
2+
from deepdiff.helper import dict_, SortedSet
43

54

65
class AnySet:
@@ -11,7 +10,7 @@ class AnySet:
1110
However one the AnySet object is deleted, all those traces will be gone too.
1211
"""
1312
def __init__(self, items=None):
14-
self._set = OrderedSet()
13+
self._set = SortedSet()
1514
self._hashes = dict_()
1615
self._hash_to_objects = dict_()
1716
if items:

deepdiff/base.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from ordered_set import OrderedSet
2-
from deepdiff.helper import strings, numbers
1+
from deepdiff.helper import strings, numbers, SortedSet
32

43

54
DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
@@ -31,18 +30,18 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups,
3130

3231
result = []
3332
for item_group in ignore_type_in_groups:
34-
new_item_group = OrderedSet()
33+
new_item_group = SortedSet()
3534
for item in item_group:
3635
item = type(item) if item is None or not isinstance(item, type) else item
3736
new_item_group.add(item)
3837
result.append(new_item_group)
3938
ignore_type_in_groups = result
4039

4140
if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
42-
ignore_type_in_groups.append(OrderedSet(self.strings))
41+
ignore_type_in_groups.append(SortedSet(self.strings))
4342

4443
if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
45-
ignore_type_in_groups.append(OrderedSet(self.numbers))
44+
ignore_type_in_groups.append(SortedSet(self.numbers))
4645

4746
if not ignore_type_subclasses:
4847
# is_instance method needs tuples. When we look for subclasses, we need them to be tuples

deepdiff/delta.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from functools import partial, cmp_to_key
55
from collections.abc import Mapping
66
from copy import deepcopy
7-
from ordered_set import OrderedSet
87
from deepdiff import DeepDiff
98
from deepdiff.serialization import pickle_load, pickle_dump
109
from deepdiff.helper import (
@@ -14,6 +13,7 @@
1413
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
1514
OPCODE_TAG_TO_FLAT_DATA_ACTION,
1615
FLAT_DATA_ACTION_TO_OPCODE_TAG,
16+
SortedSet,
1717
)
1818
from deepdiff.path import (
1919
_path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
@@ -744,7 +744,7 @@ def _do_ignore_order(self):
744744
"""
745745
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
746746
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
747-
paths = OrderedSet(fixed_indexes.keys()) | OrderedSet(remove_indexes.keys())
747+
paths = SortedSet(fixed_indexes.keys()) | SortedSet(remove_indexes.keys())
748748
for path in paths:
749749
# In the case of ignore_order reports, we are pointing to the container object.
750750
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.

deepdiff/diff.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,23 @@
1616
from collections import defaultdict
1717
from inspect import getmembers
1818
from itertools import zip_longest
19-
from ordered_set import OrderedSet
2019
from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent,
2120
IndexedHash, unprocessed, add_to_frozen_set, basic_types,
2221
convert_item_or_items_into_set_else_none, get_type,
2322
convert_item_or_items_into_compiled_regexes_else_none,
2423
type_is_subclass_of_type_group, type_in_type_group, get_doc,
2524
number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans,
26-
np_ndarray, np_floating, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer,
25+
np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer,
2726
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths,
2827
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
29-
PydanticBaseModel, Opcode,)
28+
PydanticBaseModel, Opcode, SortedSet)
3029
from deepdiff.serialization import SerializationMixin
3130
from deepdiff.distance import DistanceMixin
3231
from deepdiff.model import (
3332
RemapDict, ResultDict, TextResult, TreeResult, DiffLevel,
3433
DictRelationship, AttributeRelationship, REPORT_KEYS,
3534
SubscriptableIterableRelationship, NonSubscriptableIterableRelationship,
36-
SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, PrettyOrderedSet,
35+
SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD,
3736
FORCE_DEFAULT,
3837
)
3938
from deepdiff.deephash import DeepHash, combine_hashes_lists
@@ -567,27 +566,26 @@ def _diff_dict(
567566
rel_class = DictRelationship
568567

569568
if self.ignore_private_variables:
570-
t1_keys = OrderedSet([key for key in t1 if not(isinstance(key, str) and key.startswith('__'))])
571-
t2_keys = OrderedSet([key for key in t2 if not(isinstance(key, str) and key.startswith('__'))])
569+
t1_keys = SortedSet([key for key in t1 if not(isinstance(key, str) and key.startswith('__'))])
570+
t2_keys = SortedSet([key for key in t2 if not(isinstance(key, str) and key.startswith('__'))])
572571
else:
573-
t1_keys = OrderedSet(t1.keys())
574-
t2_keys = OrderedSet(t2.keys())
572+
t1_keys = SortedSet(t1.keys())
573+
t2_keys = SortedSet(t2.keys())
575574
if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case:
576575
t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level)
577576
t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level)
578-
t1_keys = OrderedSet(t1_clean_to_keys.keys())
579-
t2_keys = OrderedSet(t2_clean_to_keys.keys())
577+
t1_keys = SortedSet(t1_clean_to_keys.keys())
578+
t2_keys = SortedSet(t2_clean_to_keys.keys())
580579
else:
581580
t1_clean_to_keys = t2_clean_to_keys = None
582581

583-
t_keys_intersect = t2_keys.intersection(t1_keys)
584-
582+
t_keys_intersect = t2_keys & t1_keys
583+
t_keys_union = t2_keys | t1_keys
585584
t_keys_added = t2_keys - t_keys_intersect
586585
t_keys_removed = t1_keys - t_keys_intersect
587586

588587
if self.threshold_to_diff_deeper:
589-
len_keys_changed = (len(t_keys_added) + len(t_keys_removed))
590-
if len_keys_changed and len(t_keys_intersect) / len_keys_changed < self.threshold_to_diff_deeper:
588+
if len(t_keys_union) and len(t_keys_intersect) / len(t_keys_union) < self.threshold_to_diff_deeper:
591589
self._report_result('values_changed', level, local_tree=local_tree)
592590
return
593591

@@ -1142,7 +1140,7 @@ def _get_most_in_common_pairs_in_iterables(
11421140
# It also includes a "max" key that is just the value of the biggest current distance in the
11431141
# most_in_common_pairs dictionary.
11441142
def defaultdict_orderedset():
1145-
return defaultdict(OrderedSetPlus)
1143+
return defaultdict(SortedSet)
11461144
most_in_common_pairs = defaultdict(defaultdict_orderedset)
11471145
pairs = dict_()
11481146

@@ -1185,7 +1183,7 @@ def defaultdict_orderedset():
11851183
pairs_of_item[_distance].add(removed_hash)
11861184
used_to_hashes = set()
11871185

1188-
distances_to_from_hashes = defaultdict(OrderedSetPlus)
1186+
distances_to_from_hashes = defaultdict(SortedSet)
11891187
for from_hash, distances_to_to_hashes in most_in_common_pairs.items():
11901188
# del distances_to_to_hashes['max']
11911189
for dist in distances_to_to_hashes:
@@ -1194,11 +1192,11 @@ def defaultdict_orderedset():
11941192
for dist in sorted(distances_to_from_hashes.keys()):
11951193
from_hashes = distances_to_from_hashes[dist]
11961194
while from_hashes:
1197-
from_hash = from_hashes.lpop()
1195+
from_hash = from_hashes.pop()
11981196
if from_hash not in used_to_hashes:
11991197
to_hashes = most_in_common_pairs[from_hash][dist]
12001198
while to_hashes:
1201-
to_hash = to_hashes.lpop()
1199+
to_hash = to_hashes.pop()
12021200
if to_hash not in used_to_hashes:
12031201
used_to_hashes.add(from_hash)
12041202
used_to_hashes.add(to_hash)
@@ -1217,8 +1215,8 @@ def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None,
12171215

12181216
full_t1_hashtable = self._create_hashtable(level, 't1')
12191217
full_t2_hashtable = self._create_hashtable(level, 't2')
1220-
t1_hashes = OrderedSetPlus(full_t1_hashtable.keys())
1221-
t2_hashes = OrderedSetPlus(full_t2_hashtable.keys())
1218+
t1_hashes = SortedSet(full_t1_hashtable.keys())
1219+
t2_hashes = SortedSet(full_t2_hashtable.keys())
12221220
hashes_added = t2_hashes - t1_hashes
12231221
hashes_removed = t1_hashes - t2_hashes
12241222

@@ -1630,7 +1628,7 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
16301628
elif isinstance(level.t1, tuple):
16311629
self._diff_tuple(level, parents_ids, local_tree=local_tree)
16321630

1633-
elif isinstance(level.t1, (set, frozenset, OrderedSet)):
1631+
elif isinstance(level.t1, (set, frozenset, SortedSet)):
16341632
self._diff_set(level, local_tree=local_tree)
16351633

16361634
elif isinstance(level.t1, np_ndarray):
@@ -1752,19 +1750,19 @@ def affected_paths(self):
17521750
'iterable_item_added': {'root[3][1]': 4},
17531751
'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
17541752
>>> ddiff.affected_paths
1755-
OrderedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
1753+
SortedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
17561754
>>> ddiff.affected_root_keys
1757-
OrderedSet([3, 4, 5, 6, 2])
1755+
SortedSet([3, 4, 5, 6, 2])
17581756
17591757
"""
1760-
result = OrderedSet()
1758+
result = SortedSet()
17611759
for key in REPORT_KEYS:
17621760
value = self.get(key)
17631761
if value:
1764-
if isinstance(value, PrettyOrderedSet):
1762+
if isinstance(value, SortedSet):
17651763
result |= value
17661764
else:
1767-
result |= OrderedSet(value.keys())
1765+
result |= SortedSet(value.keys())
17681766
return result
17691767

17701768
@property
@@ -1784,18 +1782,18 @@ def affected_root_keys(self):
17841782
'iterable_item_added': {'root[3][1]': 4},
17851783
'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
17861784
>>> ddiff.affected_paths
1787-
OrderedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
1785+
SortedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
17881786
>>> ddiff.affected_root_keys
1789-
OrderedSet([3, 4, 5, 6, 2])
1787+
SortedSet([3, 4, 5, 6, 2])
17901788
"""
1791-
result = OrderedSet()
1789+
result = SortedSet()
17921790
for key in REPORT_KEYS:
17931791
value = self.tree.get(key)
17941792
if value:
1795-
if isinstance(value, PrettyOrderedSet):
1796-
result |= OrderedSet([i.get_root_key() for i in value])
1793+
if isinstance(value, SortedSet):
1794+
result |= SortedSet([i.get_root_key() for i in value])
17971795
else:
1798-
result |= OrderedSet([i.get_root_key() for i in value.keys()])
1796+
result |= SortedSet([i.get_root_key() for i in value.keys()])
17991797
return result
18001798

18011799

deepdiff/distance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _precalculate_numpy_arrays_distance(
9898
self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):
9999

100100
# We only want to deal with 1D arrays.
101-
if isinstance(t2_hashtable[hashes_added[0]].item, (np_ndarray, list)):
101+
if isinstance(t2_hashtable[next(iter(hashes_added))].item, (np_ndarray, list)):
102102
return
103103

104104
pre_calced_distances = dict_()

deepdiff/helper.py

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
from ast import literal_eval
1313
from decimal import Decimal, localcontext, InvalidOperation as InvalidDecimalOperation
1414
from itertools import repeat
15-
from ordered_set import OrderedSet
15+
# from orderly_set import OrderlySet as OrderedSetModule # median: 0.806 s, some tests are failing
16+
# from orderly_set import SortedSet as OrderedSetModule # median 1.011 s, didn't work for tests
17+
from orderly_set import StableSetEq as OrderedSetModule # median: 1.0867 s for cache test, 5.63s for all tests
18+
# from orderly_set import OrderedSet as OrderedSetModule # median 1.1256 s for cache test, 5.63s for all tests
1619
from threading import Timer
1720

1821

@@ -24,6 +27,11 @@ class pydantic_base_model_type:
2427
pass
2528

2629

30+
class SortedSet(OrderedSetModule):
31+
def __repr__(self):
32+
return str(list(self))
33+
34+
2735
try:
2836
import numpy as np
2937
except ImportError: # pragma: no cover. The case without Numpy is tested locally only.
@@ -318,7 +326,7 @@ def add_root_to_paths(paths):
318326
"""
319327
if paths is None:
320328
return
321-
result = OrderedSet()
329+
result = SortedSet()
322330
for path in paths:
323331
if path.startswith('root'):
324332
result.add(path)
@@ -524,31 +532,6 @@ def __repr__(self):
524532
warnings.simplefilter('once', DeepDiffDeprecationWarning)
525533

526534

527-
class OrderedSetPlus(OrderedSet):
528-
529-
def lpop(self):
530-
"""
531-
Remove and return the first element from the set.
532-
Raises KeyError if the set is empty.
533-
Example:
534-
>>> oset = OrderedSet([1, 2, 3])
535-
>>> oset.lpop()
536-
1
537-
"""
538-
if not self.items:
539-
raise KeyError('lpop from an empty set')
540-
541-
elem = self.items[0]
542-
del self.items[0]
543-
del self.map[elem]
544-
return elem
545-
546-
def __repr__(self):
547-
return str(list(self))
548-
549-
__str__ = __repr__
550-
551-
552535
class RepeatedTimer:
553536
"""
554537
Threaded Repeated Timer by MestreLion

deepdiff/lfucache.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,16 @@
55
Modified by Sep Dehpour
66
"""
77
from collections import defaultdict
8-
from ordered_set import OrderedSet
98
from threading import Lock
109
from statistics import mean
11-
from deepdiff.helper import not_found, dict_
10+
from deepdiff.helper import not_found, dict_, SortedSet
1211

1312

1413
class CacheNode:
1514
def __init__(self, key, report_type, value, freq_node, pre, nxt):
1615
self.key = key
1716
if report_type:
18-
self.content = defaultdict(OrderedSet)
17+
self.content = defaultdict(SortedSet)
1918
self.content[report_type].add(value)
2019
else:
2120
self.content = value

0 commit comments

Comments
 (0)