Skip to content
Merged

8.4.0 #525

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Tested on Python 3.8+ and PyPy3.

Please check the [ChangeLog](CHANGELOG.md) file for the detailed information.

DeepDiff 8-4-0

- Adding BaseOperatorPlus base class for custom operators
- default_timezone can be passed now to set your default timezone to something other than UTC.
- New summarization algorithm that produces valid json
- Better type hint support

DeepDiff 8-3-0

- Fixed some static typing issues
Expand Down
12 changes: 11 additions & 1 deletion deepdiff/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
from typing import Protocol, Any
from deepdiff.helper import strings, numbers, SetOrdered


DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
TYPE_STABILIZATION_MSG = 'Unable to stabilize the Numpy array {} due to {}. Please set ignore_order=False.'


class Base:
class BaseProtocol(Protocol):
t1: Any
t2: Any
cutoff_distance_for_pairs: float
use_log_scale: bool
log_scale_similarity_threshold: float
view: str


class Base(BaseProtocol):
numbers = numbers
strings = strings

Expand Down
54 changes: 33 additions & 21 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env python
import inspect
import pytz
import logging
import datetime
from typing import Union, Optional, Any, List
from collections.abc import Iterable, MutableMapping
from collections import defaultdict
from hashlib import sha1, sha256
Expand All @@ -14,7 +15,6 @@
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)

from deepdiff.summarize import summarize
from deepdiff.base import Base

try:
Expand Down Expand Up @@ -141,30 +141,32 @@ class DeepHash(Base):
def __init__(self,
obj,
*,
hashes=None,
exclude_types=None,
apply_hash=True,
custom_operators: Optional[List[Any]] =None,
default_timezone:Union[datetime.timezone, datetime.timezone, pytz.tzinfo.BaseTzInfo]=datetime.timezone.utc,
encodings=None,
exclude_obj_callback=None,
exclude_paths=None,
include_paths=None,
exclude_regex_paths=None,
exclude_types=None,
hasher=None,
hashes=None,
ignore_encoding_errors=False,
ignore_iterable_order=True,
ignore_numeric_type_changes=False,
ignore_private_variables=True,
ignore_repetition=True,
significant_digits=None,
truncate_datetime=None,
number_format_notation="f",
apply_hash=True,
ignore_type_in_groups=None,
ignore_string_case=False,
ignore_string_type_changes=False,
ignore_numeric_type_changes=False,
ignore_type_in_groups=None,
ignore_type_subclasses=False,
ignore_string_case=False,
use_enum_value=False,
exclude_obj_callback=None,
include_paths=None,
number_format_notation="f",
number_to_string_func=None,
ignore_private_variables=True,
parent="root",
encodings=None,
ignore_encoding_errors=False,
ignore_iterable_order=True,
significant_digits=None,
truncate_datetime=None,
use_enum_value=False,
**kwargs):
if kwargs:
raise ValueError(
Expand All @@ -173,7 +175,7 @@ def __init__(self,
"exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
"number_to_string_func, ignore_private_variables, parent, use_enum_value "
"number_to_string_func, ignore_private_variables, parent, use_enum_value, default_timezone "
"encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
if isinstance(hashes, MutableMapping):
self.hashes = hashes
Expand All @@ -190,7 +192,7 @@ def __init__(self,
self.hasher = default_hasher if hasher is None else hasher
self.hashes[UNPROCESSED_KEY] = []
self.use_enum_value = use_enum_value

self.default_timezone = default_timezone
self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
self.truncate_datetime = get_truncate_datetime(truncate_datetime)
self.number_format_notation = number_format_notation
Expand All @@ -214,6 +216,7 @@ def __init__(self,
self.encodings = encodings
self.ignore_encoding_errors = ignore_encoding_errors
self.ignore_iterable_order = ignore_iterable_order
self.custom_operators = custom_operators

self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)}))

Expand Down Expand Up @@ -317,6 +320,7 @@ def __repr__(self):
"""
Hide the counts since it will be confusing to see them when they are hidden everywhere else.
"""
from deepdiff.summarize import summarize
return summarize(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)

def __str__(self):
Expand Down Expand Up @@ -349,6 +353,7 @@ def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=Fals
if hasattr(obj, "__slots__"):
obj_to_dict_strategies.append(lambda o: {i: getattr(o, i) for i in o.__slots__})
else:
import inspect
obj_to_dict_strategies.append(lambda o: dict(inspect.getmembers(o, lambda m: not inspect.isroutine(m))))

for get_dict in obj_to_dict_strategies:
Expand Down Expand Up @@ -478,7 +483,7 @@ def _prep_number(self, obj):

def _prep_datetime(self, obj):
type_ = 'datetime'
obj = datetime_normalize(self.truncate_datetime, obj)
obj = datetime_normalize(self.truncate_datetime, obj, default_timezone=self.default_timezone)
return KEY_TO_VAL_STR.format(type_, obj)

def _prep_date(self, obj):
Expand All @@ -501,6 +506,13 @@ def _prep_tuple(self, obj, parent, parents_ids):
def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
"""The main hash method"""
counts = 1
if self.custom_operators is not None:
for operator in self.custom_operators:
func = getattr(operator, 'normalize_value_for_hashing', None)
if func is None:
raise NotImplementedError(f"{operator.__class__.__name__} needs to define a normalize_value_for_hashing method to be compatible with ignore_order=True or iterable_compare_func.".format(operator))
else:
obj = func(parent, obj)

if isinstance(obj, booleanTypes):
obj = self._prep_bool(obj)
Expand Down
78 changes: 44 additions & 34 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
# You might need to run it many times since dictionaries come in different orders
# every time you run the docstrings.
# However the docstring expects it in a specific order in order to pass!
import pytz
import difflib
import logging
import types
import datetime
from enum import Enum
from copy import deepcopy
from math import isclose as is_close
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet
from collections.abc import Mapping, Iterable, Sequence
from collections import defaultdict
from inspect import getmembers
Expand Down Expand Up @@ -110,6 +111,8 @@ def _report_progress(_stats, progress_logger, duration):
'ignore_private_variables',
'encodings',
'ignore_encoding_errors',
'default_timezone',
'custom_operators',
)


Expand All @@ -128,10 +131,11 @@ def __init__(self,
custom_operators: Optional[List[Any]] =None,
cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT,
cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT,
default_timezone:Union[datetime.timezone, datetime.timezone, pytz.tzinfo.BaseTzInfo]=datetime.timezone.utc,
encodings: Optional[List[str]]=None,
exclude_obj_callback: Optional[Callable]=None,
exclude_obj_callback_strict: Optional[Callable]=None,
exclude_paths: Union[str, List[str], None]=None,
exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None]=None,
exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
exclude_types: Optional[List[Any]]=None,
get_deep_distance: bool=False,
Expand All @@ -154,6 +158,8 @@ def __init__(self,
include_paths: Union[str, List[str], None]=None,
iterable_compare_func: Optional[Callable]=None,
log_frequency_in_sec: int=0,
log_scale_similarity_threshold: float=0.1,
log_stacktrace: bool=False,
math_epsilon: Optional[float]=None,
max_diffs: Optional[int]=None,
max_passes: int=10000000,
Expand All @@ -162,11 +168,10 @@ def __init__(self,
progress_logger: Callable=logger.info,
report_repetition: bool=False,
significant_digits: Optional[int]=None,
use_log_scale: bool=False,
log_scale_similarity_threshold: float=0.1,
threshold_to_diff_deeper: float = 0.33,
truncate_datetime: Optional[str]=None,
use_enum_value: bool=False,
use_log_scale: bool=False,
verbose_level: int=1,
view: str=TEXT_VIEW,
zip_ordered_iterables: bool=False,
Expand All @@ -183,8 +188,8 @@ def __init__(self,
"ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
"view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, "
"math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, "
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace,"
"math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone "
"ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold "
"_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))

Expand All @@ -205,6 +210,8 @@ def __init__(self,
self.use_enum_value = use_enum_value
self.log_scale_similarity_threshold = log_scale_similarity_threshold
self.use_log_scale = use_log_scale
self.default_timezone = default_timezone
self.log_stacktrace = log_stacktrace
self.threshold_to_diff_deeper = threshold_to_diff_deeper
self.ignore_string_type_changes = ignore_string_type_changes
self.ignore_type_in_groups = self.get_ignore_types_in_groups(
Expand Down Expand Up @@ -272,6 +279,10 @@ def _group_by_sort_key(x):
self.cache_size = cache_size
_parameters = self.__dict__.copy()
_parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes.
if log_stacktrace:
self.log_err = logger.exception
else:
self.log_err = logger.error

# Non-Root
if _shared_parameters:
Expand Down Expand Up @@ -732,7 +743,7 @@ def _compare_in_order(
self, level,
t1_from_index=None, t1_to_index=None,
t2_from_index=None, t2_to_index=None
):
) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
"""
Default compare if `iterable_compare_func` is not provided.
This will compare in sequence order.
Expand All @@ -752,7 +763,7 @@ def _get_matching_pairs(
self, level,
t1_from_index=None, t1_to_index=None,
t2_from_index=None, t2_to_index=None
):
) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
"""
Given a level get matching pairs. This returns list of two tuples in the form:
[
Expand Down Expand Up @@ -1084,44 +1095,43 @@ def _create_hashtable(self, level, t):
# It only includes the ones needed when comparing iterables.
# The self.hashes dictionary gets shared between different runs of DeepHash
# So that any object that is already calculated to have a hash is not re-calculated.
deep_hash = DeepHash(item,
hashes=self.hashes,
parent=parent,
apply_hash=True,
**self.deephash_parameters,
)
deep_hash = DeepHash(
item,
hashes=self.hashes,
parent=parent,
apply_hash=True,
**self.deephash_parameters,
)
except UnicodeDecodeError as err:
err.reason = f"Can not produce a hash for {level.path()}: {err.reason}"
raise
except Exception as e: # pragma: no cover
logger.error("Can not produce a hash for %s."
"Not counting this object.\n %s" %
(level.path(), e))
except NotImplementedError:
raise
# except Exception as e: # pragma: no cover
# logger.error("Can not produce a hash for %s."
# "Not counting this object.\n %s" %
# (level.path(), e))
else:
try:
item_hash = deep_hash[item]
except KeyError:
pass
else:
if item_hash is unprocessed: # pragma: no cover
logger.warning("Item %s was not processed while hashing "
self.log_err("Item %s was not processed while hashing "
"thus not counting this object." %
level.path())
else:
self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i)

# Also we hash the iterables themselves too so that we can later create cache keys from those hashes.
try:
DeepHash(
obj,
hashes=self.hashes,
parent=level.path(),
apply_hash=True,
**self.deephash_parameters,
)
except Exception as e: # pragma: no cover
logger.error("Can not produce a hash for iterable %s. %s" %
(level.path(), e))
DeepHash(
obj,
hashes=self.hashes,
parent=level.path(),
apply_hash=True,
**self.deephash_parameters,
)
return local_hashes

@staticmethod
Expand Down Expand Up @@ -1490,17 +1500,17 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True):

def _diff_datetime(self, level, local_tree=None):
"""Diff DateTimes"""
level.t1 = datetime_normalize(self.truncate_datetime, level.t1)
level.t2 = datetime_normalize(self.truncate_datetime, level.t2)
level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)

if level.t1 != level.t2:
self._report_result('values_changed', level, local_tree=local_tree)

def _diff_time(self, level, local_tree=None):
"""Diff DateTimes"""
if self.truncate_datetime:
level.t1 = datetime_normalize(self.truncate_datetime, level.t1)
level.t2 = datetime_normalize(self.truncate_datetime, level.t2)
level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)

if level.t1 != level.t2:
self._report_result('values_changed', level, local_tree=local_tree)
Expand Down
5 changes: 4 additions & 1 deletion deepdiff/distance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import datetime
from deepdiff.base import BaseProtocol
from deepdiff.deephash import DeepHash
from deepdiff.helper import (
DELTA_VIEW, numbers, strings, add_to_frozen_set, not_found, only_numbers, np, np_float64, time_to_seconds,
Expand All @@ -11,7 +12,9 @@
DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'"


class DistanceMixin:


class DistanceMixin(BaseProtocol):

def _get_rough_distance(self):
"""
Expand Down
Loading