Skip to content

Commit 43acb92

Browse files
committed
fixing the case where group_by by a number would leak type info into the
group path report
1 parent f8397c7 commit 43acb92

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

deepdiff/diff.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,12 @@ def _group_by_sort_key(x):
332332
}
333333
self.hashes = dict_() if hashes is None else hashes
334334
self._numpy_paths = dict_() # if _numpy_paths is None else _numpy_paths
335+
self.group_by_keys = set() # Track keys that originated from group_by operations
335336
self._shared_parameters = {
336337
'hashes': self.hashes,
337338
'_stats': self._stats,
338339
'_distance_cache': self._distance_cache,
340+
'group_by_keys': self.group_by_keys,
339341
'_numpy_paths': self._numpy_paths,
340342
_ENABLE_CACHE_EVERY_X_DIFF: self.cache_tuning_sample_size * 10,
341343
}
@@ -599,13 +601,21 @@ def _get_clean_to_keys_mapping(self, keys, level):
599601
elif self.use_enum_value and isinstance(key, Enum):
600602
clean_key = key.value
601603
elif isinstance(key, numbers):
602-
type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__
603-
if self.significant_digits is None:
604-
clean_key = key
604+
# Skip type prefixing for keys that originated from group_by operations
605+
if hasattr(self, 'group_by_keys') and key in self.group_by_keys:
606+
if self.significant_digits is None:
607+
clean_key = key
608+
else:
609+
clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
610+
number_format_notation=self.number_format_notation)
605611
else:
606-
clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
607-
number_format_notation=self.number_format_notation)
608-
clean_key = KEY_TO_VAL_STR.format(type_, clean_key)
612+
type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__
613+
if self.significant_digits is None:
614+
clean_key = key
615+
else:
616+
clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
617+
number_format_notation=self.number_format_notation)
618+
clean_key = KEY_TO_VAL_STR.format(type_, clean_key)
609619
else:
610620
clean_key = key
611621
if self.ignore_string_case and isinstance(clean_key, str):
@@ -1845,8 +1855,14 @@ def _group_iterable_to_dict(self, item, group_by, item_name):
18451855
for row in item_copy:
18461856
if isinstance(row, Mapping):
18471857
key1 = self._get_key_for_group_by(row, group_by_level1, item_name)
1858+
# Track keys created by group_by to avoid type prefixing later
1859+
if hasattr(self, 'group_by_keys'):
1860+
self.group_by_keys.add(key1)
18481861
if group_by_level2:
18491862
key2 = self._get_key_for_group_by(row, group_by_level2, item_name)
1863+
# Track level 2 keys as well
1864+
if hasattr(self, 'group_by_keys'):
1865+
self.group_by_keys.add(key2)
18501866
if key1 not in result:
18511867
result[key1] = {}
18521868
if self.group_by_sort_key:

tests/test_diff_text.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
from typing import List
1010
from decimal import Decimal
1111
from deepdiff import DeepDiff
12-
from deepdiff.helper import pypy3, PydanticBaseModel
12+
from deepdiff.helper import pypy3, PydanticBaseModel, SetOrdered, np_float64
1313
from tests import CustomClass
14-
from deepdiff.helper import np_float64
1514

1615

1716
logging.disable(logging.CRITICAL)
@@ -2258,3 +2257,18 @@ def test_range1(self):
22582257
range2 = range(0, 8)
22592258
diff = DeepDiff(range1, range2)
22602259
assert {'iterable_item_removed': {'root[8]': 8, 'root[9]': 9}} == diff
2260+
2261+
2262+
def test_group_by_that_has_integers(self):
2263+
"""Test that group_by with integer keys doesn't add type prefixes like 'int:33'"""
2264+
t1 = [{'row_num_in_file': 33, 'value': 'old'}]
2265+
t2 = [{'row_num_in_file': 33, 'value': 'new'}]
2266+
2267+
diff = DeepDiff(t1, t2, group_by='row_num_in_file', ignore_string_type_changes=True)
2268+
2269+
# Verify that the diff key contains the integer 33 without type prefix
2270+
changes = diff.get('values_changed', {})
2271+
assert len(changes) == 1
2272+
key = list(changes.keys())[0]
2273+
assert "int:" not in key
2274+
assert "[33]" in key or "['33']" in key

0 commit comments

Comments
 (0)