Skip to content

Commit f1325e3

Browse files
committed
adding more docs. Almost done.
1 parent ea833d7 commit f1325e3

File tree

10 files changed

+298
-61
lines changed

10 files changed

+298
-61
lines changed

deepdiff/delta.py

Lines changed: 19 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from deepdiff.serialization import pickle_load, pickle_dump
66
from deepdiff.helper import (
77
strings, short_repr, numbers,
8-
np_ndarray, np_array_factory, numpy_dtypes,
8+
np_ndarray, np_array_factory, numpy_dtypes, get_doc,
99
not_found, numpy_dtype_string_to_type, dict_)
1010
from deepdiff.path import _path_to_elements, _get_nested_obj, GET, GETATTR
1111
from deepdiff.anyset import AnySet
@@ -35,6 +35,8 @@
3535
NUMPY_TO_LIST = 'NUMPY_TO_LIST'
3636
NOT_VALID_NUMPY_TYPE = "{} is not a valid numpy type."
3737

38+
doc = get_doc('delta_doc.rst')
39+
3840

3941
class DeltaError(ValueError):
4042
"""
@@ -51,58 +53,22 @@ class DeltaNumpyOperatorOverrideError(ValueError):
5153

5254

5355
class Delta:
54-
r"""
55-
**Delta**
56-
57-
DeepDiff Delta is a directed delta that when applied to t1 can yield t2 where delta is the difference of t1 and t2.
58-
59-
NOTE: THIS FEATURE IS IN BETA
60-
61-
**Parameters**
62-
63-
diff : Delta dictionary, Delta dump payload or a DeepDiff object, default=None.
64-
Content to be loaded.
65-
66-
delta_path : String, default=None.
67-
local path to the delta dump file to be loaded
68-
69-
You need to pass either diff or delta_path but not both.
70-
71-
safe_to_import : Set, default=None.
72-
A set of modules that needs to be explicitly white listed to be loaded
73-
Example: {'mymodule.MyClass', 'decimal.Decimal'}
74-
Note that this set will be added to the basic set of modules that are already white listed.
75-
The set of what is already white listed can be found in deepdiff.serialization.SAFE_TO_IMPORT
76-
77-
mutate : Boolean, default=False.
78-
Whether to mutate the original object when adding the delta to it or not.
79-
Note that this parameter is not always successful in mutating. For example if your original object
80-
is an immutable type such as a frozenset or a tuple, mutation will not succeed.
81-
Hence it is recommended to keep this parameter as the default value of False unless you are sure
82-
that you do not have immutable objects. There is a small overhead of doing deepcopy on the original
83-
object when mutate=False. If performance is a concern and modifying the original object is not a big deal,
84-
set the mutate=True but always reassign the output back to the original object.
8556

86-
Example:
87-
88-
delta = Delta(diff, mutate=True)
89-
90-
**Returns**
91-
92-
A delta object that can be added to t1 to recreate t2.
93-
94-
**Examples**
95-
96-
Importing
97-
>>> from deepdiff import DeepDiff, Delta
98-
>>> from pprint import pprint
99-
100-
101-
Note: Delta objects can not fully reproduce objects if the diff reports Numpy array shape changes.
102-
"""
103-
def __init__(self, diff=None, delta_path=None, delta_file=None, mutate=False, verify_symmetry=False,
104-
raise_errors=False, log_errors=True, safe_to_import=None,
105-
serializer=pickle_dump, deserializer=pickle_load):
57+
__doc__ = doc
58+
59+
def __init__(
60+
self,
61+
diff=None,
62+
delta_path=None,
63+
delta_file=None,
64+
deserializer=pickle_load,
65+
log_errors=True,
66+
mutate=False,
67+
raise_errors=False,
68+
safe_to_import=None,
69+
serializer=pickle_dump,
70+
verify_symmetry=False,
71+
):
10672

10773
if diff is not None:
10874
if isinstance(diff, DeepDiff):
@@ -542,7 +508,7 @@ def _do_ignore_order(self):
542508
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
543509
value=new_obj, action=parent_to_obj_action)
544510

545-
def dump(self, file, delta_path=None):
511+
def dump(self, file):
546512
"""
547513
Dump into file object
548514
"""

deepdiff/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def type_is_subclass_of_type_group(item, type_group):
314314

315315
def get_doc(doc_filename):
316316
try:
317-
with open(os.path.join(current_dir, doc_filename), 'r') as doc_file:
317+
with open(os.path.join(current_dir, '../docs/', doc_filename), 'r') as doc_file:
318318
doc = doc_file.read()
319319
except Exception: # pragma: no cover
320320
doc = 'Failed to load the docstrings. Please visit: https://github.com/seperman/deepdiff' # pragma: no cover

docs/deep_distance.rst

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Deep Distance
66
=============
77

88
The distance between 2 objects. A number between 0 and 1.
9-
Deep Distance in concept is inspired by Levenshtein Edit distance. At its core, the Deep Distance is the number of operations needed to convert one object to the other divided by the sum of the sizes of the 2 objects. Then the number is capped at 1.
9+
Deep Distance in concept is inspired by Levenshtein Edit distance. At its core, the Deep Distance is the number of operations needed to convert one object to the other divided by the sum of the sizes of the 2 objects capped at 1. Note that it is the number of operations and NOT the "minimum" number of operations to convert one object to the other. The number is highly dependent on the granularity of the diff results controlled by the parameters passed to DeepDiff.
1010

1111
.. _get_deep_distance_label:
1212

@@ -34,5 +34,84 @@ get_deep_distance: Boolean, default = False
3434
>>> DeepDiff([[2, 1]], [[1, 2, 3]], ignore_order=True, get_deep_distance=True)
3535
{'iterable_item_added': {'root[0][2]': 3}, 'deep_distance': 0.1111111111111111}
3636

37+
.. _distance_and_diff_granularity_label:
38+
39+
Distance And Diff Granularity
40+
-----------------------------
41+
42+
.. note::
43+
Deep Distance of objects are highly dependent on the diff object that is produced. A diff object that is more granular will give more accurate Deep Distance value too.
44+
45+
Let's use the following 2 deeply nested objects as an example. If you ignore the order of items, they are very similar and only differ in a few elements.
46+
47+
We will run 2 diffs and ask for the deep distance. The only difference between the below 2 diffs is that in the first one the :ref:`cutoff_intersection_for_pairs_label` is not passed so the default value of 0.3 is used while in the other one cutoff_intersection_for_pairs=1 is used which forces extra pass calculations.
48+
49+
>>> from pprint import pprint
50+
>>> t1 = [
51+
... {
52+
... "key3": [[[[[[[[[[1, 2, 4, 5]]], [[[8, 7, 3, 5]]]]]]]]]],
53+
... "key4": [7, 8]
54+
... },
55+
... {
56+
... "key5": "val5",
57+
... "key6": "val6"
58+
... }
59+
... ]
60+
>>>
61+
>>> t2 = [
62+
... {
63+
... "key5": "CHANGE",
64+
... "key6": "val6"
65+
... },
66+
... {
67+
... "key3": [[[[[[[[[[1, 3, 5, 4]]], [[[8, 8, 1, 5]]]]]]]]]],
68+
... "key4": [7, 8]
69+
... }
70+
... ]
71+
72+
We don't pass cutoff_intersection_for_pairs in the first diff.
73+
74+
>>> diff1=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, get_deep_distance=True)
75+
>>> pprint(diff1)
76+
{'deep_distance': 0.36363636363636365,
77+
'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'},
78+
'old_value': {'key3': [[[[[[[[[[1, 2, 4, 5]]],
79+
[[[8,
80+
7,
81+
3,
82+
5]]]]]]]]]],
83+
'key4': [7, 8]}},
84+
'root[1]': {'new_value': {'key3': [[[[[[[[[[1, 3, 5, 4]]],
85+
[[[8,
86+
8,
87+
1,
88+
5]]]]]]]]]],
89+
'key4': [7, 8]},
90+
'old_value': {'key5': 'val5', 'key6': 'val6'}}}}
91+
92+
Note that the stats show that only 5 set of objects were compared with each other according to the DIFF COUNT:
93+
94+
>>> diff1.get_stats()
95+
{'PASSES COUNT': 0, 'DIFF COUNT': 5, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False}
96+
97+
Let's pass cutoff_intersection_for_pairs=1 to enforce pass calculations. As you can see the results are way more granular and the deep distance value is way more accurate now.
98+
99+
>>> diff2=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, cutoff_intersection_for_pairs=1, get_deep_distance=True)
100+
>>> from pprint import pprint
101+
>>> pprint(diff2)
102+
{'deep_distance': 0.06060606060606061,
103+
'iterable_item_removed': {"root[0]['key3'][0][0][0][0][0][0][1][0][0][1]": 7},
104+
'values_changed': {"root[0]['key3'][0][0][0][0][0][0][0][0][0][1]": {'new_value': 3,
105+
'old_value': 2},
106+
"root[0]['key3'][0][0][0][0][0][0][1][0][0][2]": {'new_value': 1,
107+
'old_value': 3},
108+
"root[1]['key5']": {'new_value': 'CHANGE',
109+
'old_value': 'val5'}}}
110+
111+
As you can see now way more calculations have happened behind the scene. Instead of only 5 set of items being compared with each other, we have 306 items that are compared with each other in 110 passes.
112+
113+
>>> diff2.get_stats()
114+
{'PASSES COUNT': 110, 'DIFF COUNT': 306, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False}
115+
37116

38117
Back to :doc:`/index`
File renamed without changes.

docs/delta.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22

33
.. _delta_label:
44

5-
65
Delta
76
=====
87

9-
Delta objects are like git commits but for structured data.
10-
You can convert the diff results into Delta objects, store the deltas and later apply to other objects.
8+
.. toctree::
9+
:maxdepth: 3
1110

12-
.. note::
13-
If you plan to generate Delta objects from the DeepDiff result, and ignore_order=True, you need to also set the report_repetition=True.
11+
.. automodule:: deepdiff.delta
1412

13+
.. autoclass:: Delta
14+
:members:
1515

1616
Back to :doc:`/index`

0 commit comments

Comments
 (0)