Skip to content

Commit 62d3ec4

Browse files
authored
Merge pull request #1238 from compas-dev/hashtree
Hashtree
2 parents e5bfea6 + e887548 commit 62d3ec4

File tree

6 files changed

+377
-0
lines changed

6 files changed

+377
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3232
* Added `compas.geometry.curves.curve.Curve.from_native`.
3333
* Added `compas_rhino.geometry.curves.curve.Curve.from_native`.
3434
* Added `compas_rhino.geometry.curves.nurbs.NurbsCurve.from_native`.
35+
* Added `compas.datastructures.HashTree` and `compas.datastructures.HashNode`.
3536

3637
### Changed
3738

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
********************************************************************************
2+
Hash Tree
3+
********************************************************************************
4+
5+
Hash tree (or Merkle tree) is a tree data structure in which every leaf node is labelled with the hash of a data block and every non-leaf node is labelled with the cryptographic hash of the labels of its child nodes.
6+
Hash trees are useful because they allow efficient and secure verification of the contents of large data structures. It is widly used in modern distributed version control systems like Git as well as peer-to-peer systems like Blockchain.
7+
COMPAS provides a simple implementation of a hash tree that can be used for detecting and locating changes in a complex data structure. In context of AEC, this feature can also be useful for many real-world applications,
8+
such as detecting changes in a complicated Building Information Model, tracking minor deformation in structural assessments, or even detecting robot joint movements in a digital fabracation process, and many more.
9+
10+
Hash Tree From Dict
11+
===================
12+
13+
A COMPAS hash tree can be created from any raw python dictionary using the `HashTree.from_dict` method.
14+
15+
>>> from compas.datastructures import HashTree
16+
>>> data = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}}
17+
>>> tree = HashTree.from_dict(data)
18+
19+
The structure of the hash tree and crypo hash on each node can be visualised using the `print` function.
20+
21+
>>> print(tree)
22+
<Tree with 6 nodes>
23+
└── ROOT @ b2e1c
24+
├── .a:1 @ 4d9a8
25+
├── .b:2 @ 82b86
26+
└── .c @ 664a3
27+
├── .d:3 @ 76d82
28+
└── .e:4 @ ebe84
29+
30+
Once the original data is modified, a new hash tree can be created from the modified data and the changes can be detected by comparing the two hash trees.
31+
32+
>>> data['c']['d'] = 5
33+
>>> del data["b"]
34+
>>> data["f"] = True
35+
>>> new_tree = HashTree.from_dict(data)
36+
>>> print(new_tree)
37+
<Tree with 6 nodes>
38+
└── ROOT @ a8c1b
39+
├── .a:1 @ 4d9a8
40+
├── .c @ e1701
41+
│ ├── .d:5 @ 98b1e
42+
│ └── .e:4 @ ebe84
43+
└── .f:True @ 753e5
44+
45+
>>> new_tree.diff(tree)
46+
{'added': [{'path': '.f', 'value': True}], 'removed': [{'path': '.b', 'value': 2}], 'modified': [{'path': '.c.d', 'old': 3, 'new': 5}]}
47+
48+
Hash Tree From COMPAS Data
49+
==========================
50+
51+
A COMPAS hash tree can also be created from any classes that inherit from the base `Data` class in COMPAS, such as `Mesh`, `Graph`, `Shape`, `Geometry`, etc.
52+
This is done by hashing the serilised data of the object.
53+
54+
>>> from compas.datastructures import Mesh
55+
>>> mesh = Mesh.from_polyhedron(6)
56+
>>> tree = HashTree.from_object(mesh)
57+
>>> print(tree)
58+
<Tree with 58 nodes>
59+
└── ROOT @ 44cc1
60+
├── .attributes @ 3370c
61+
├── .default_vertex_attributes @ 84700
62+
│ ├── .x:0.0 @ 5bc2d
63+
│ ├── .y:0.0 @ 1704b
64+
│ └── .z:0.0 @ 6199e
65+
├── .default_edge_attributes @ 5e834
66+
├── .default_face_attributes @ 5a8d9
67+
├── .vertex @ ff6d0
68+
│ ├── .0 @ 84ec1
69+
│ │ ├── .x:-1.1547005383792517 @ 874f4
70+
│ │ ├── .y:-1.1547005383792517 @ d2b16
71+
│ │ └── .z:-1.1547005383792517 @ bd9f0
72+
│ ├── .1 @ 316d3
73+
...
74+
75+
>>> mesh.vertex_attribute(0, "x", 1.0)
76+
>>> mesh.delete_face(3)
77+
>>> new_tree = HashTree.from_object(mesh)
78+
>>> new_tree.diff(tree)
79+
{'added': [], 'removed': [{'path': '.face.3', 'value': [4, 2, 3, 5]}, {'path': '.facedata.3', 'value': None}], 'modified': [{'path': '.vertex.0.x', 'old': -1.1547005383792517, 'new': 1.0}]}
80+

docs/userguide/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ User Guide
3232
advanced.tolerance
3333
advanced.serialisation
3434
advanced.rpc
35+
advanced.hashtree
3536

3637

3738
.. toctree::

src/compas/datastructures/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from .assembly.part import Feature, GeometricFeature, ParametricFeature, Part
5656
from .cell_network.cell_network import CellNetwork
5757
from .tree.tree import Tree, TreeNode
58+
from .tree.hashtree import HashTree, HashNode
5859

5960
Network = Graph
6061

@@ -72,4 +73,6 @@
7273
"ParametricFeature",
7374
"Tree",
7475
"TreeNode",
76+
"HashTree",
77+
"HashNode",
7578
]
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
import hashlib
2+
3+
from compas.data import Data
4+
from compas.data import json_dumps
5+
from compas.datastructures import Tree
6+
from compas.datastructures import TreeNode
7+
8+
9+
class HashNode(TreeNode):
10+
"""A node in a HashTree. This class is used internally by the HashTree class.
11+
12+
Parameters
13+
----------
14+
path : str
15+
The relative path of the node.
16+
value : str, int, float, list, bool, None
17+
The value of the node. Only leaf nodes can have a value.
18+
19+
Attributes
20+
----------
21+
path : str
22+
The relative path of the node.
23+
value : str, int, float, list, bool, None
24+
The value of the node. Only leaf nodes can have a value.
25+
absolute_path : str
26+
The absolute path of the node.
27+
is_value : bool
28+
True if the node is a leaf node and has a value.
29+
signature : str
30+
The SHA256 signature of the node.
31+
children_dict : dict
32+
A dictionary of the children of the node. The keys are the relative paths
33+
children_paths : list[str]
34+
A list of the relative paths of the children of the node.
35+
36+
"""
37+
38+
def __init__(self, path, value=None, **kwargs):
39+
super(HashNode, self).__init__(**kwargs)
40+
self.path = path
41+
self.value = value
42+
self._signature = None
43+
44+
def __repr__(self):
45+
path = self.path or "ROOT"
46+
if self.value is not None:
47+
return "{}:{} @ {}".format(path, self.value, self.signature[:5])
48+
else:
49+
return "{} @ {}".format(path, self.signature[:5])
50+
51+
@property
52+
def absolute_path(self):
53+
if self.parent is None:
54+
return self.path
55+
return self.parent.absolute_path + self.path
56+
57+
@property
58+
def is_value(self):
59+
return self.value is not None
60+
61+
@property
62+
def signature(self):
63+
return self._signature
64+
65+
@property
66+
def children_dict(self):
67+
return {child.path: child for child in self.children}
68+
69+
@property
70+
def children_paths(self):
71+
return [child.path for child in self.children]
72+
73+
@classmethod
74+
def from_dict(cls, data_dict, path=""):
75+
"""Construct a HashNode from a dictionary.
76+
77+
Parameters
78+
----------
79+
data_dict : dict
80+
A dictionary to construct the HashNode from.
81+
path : str
82+
The relative path of the node.
83+
84+
Returns
85+
-------
86+
:class:`compas.datastructures.HashNode`
87+
A HashNode constructed from the dictionary.
88+
89+
"""
90+
node = cls(path)
91+
for key in data_dict:
92+
path = ".{}".format(key)
93+
if isinstance(data_dict[key], dict):
94+
child = cls.from_dict(data_dict[key], path=path)
95+
node.add(child)
96+
else:
97+
node.add(cls(path, value=data_dict[key]))
98+
99+
return node
100+
101+
102+
class HashTree(Tree):
103+
"""HashTree data structure to compare differences in hierarchical data.
104+
105+
A Hash tree (or Merkle tree) is a tree in which every leaf node is labelled with the cryptographic hash
106+
of a data block and every non-leaf node is labelled with the hash of the labels of its child nodes.
107+
Hash trees allow efficient and secure verification of the contents of large data structures.
108+
They can also be used to compare different versions(states) of the same data structure for changes.
109+
110+
Attributes
111+
----------
112+
signatures : dict[str, str]
113+
The SHA256 signatures of the nodes in the tree. The keys are the absolute paths of the nodes, the values are the signatures.
114+
115+
Examples
116+
--------
117+
>>> tree1 = HashTree.from_dict({"a": {"b": 1, "c": 3}, "d": [1, 2, 3], "e": 2})
118+
>>> tree2 = HashTree.from_dict({"a": {"b": 1, "c": 2}, "d": [1, 2, 3], "f": 2})
119+
>>> print(tree1)
120+
+-- ROOT @ 4cd56
121+
+-- .a @ c16fd
122+
| +-- .b:1 @ c9b55
123+
| +-- .c:3 @ 518d4
124+
+-- .d:[1, 2, 3] @ 9be3a
125+
+-- .e:2 @ 68355
126+
>>> print(tree2)
127+
+-- ROOT @ fbe39
128+
+-- .a @ c2022
129+
| +-- .b:1 @ c9b55
130+
| +-- .c:2 @ e3365
131+
+-- .d:[1, 2, 3] @ 9be3a
132+
+-- .f:2 @ 93861
133+
>>> tree2.print_diff(tree1)
134+
Added:
135+
{'path': '.f', 'value': 2}
136+
Removed:
137+
{'path': '.e', 'value': 2}
138+
Modified:
139+
{'path': '.a.c', 'old': 3, 'new': 2}
140+
141+
"""
142+
143+
def __init__(self, **kwargs):
144+
super(HashTree, self).__init__(**kwargs)
145+
self.signatures = {}
146+
147+
@classmethod
148+
def from_dict(cls, data_dict):
149+
"""Construct a HashTree from a dictionary.
150+
151+
Parameters
152+
----------
153+
data_dict : dict
154+
A dictionary to construct the HashTree from.
155+
156+
Returns
157+
-------
158+
:class:`compas.datastructures.HashTree`
159+
A HashTree constructed from the dictionary.
160+
161+
"""
162+
tree = cls()
163+
root = HashNode.from_dict(data_dict)
164+
tree.add(root)
165+
tree.node_signature(tree.root)
166+
return tree
167+
168+
@classmethod
169+
def from_object(cls, obj):
170+
"""Construct a HashTree from a COMPAS data object."""
171+
if not isinstance(obj, Data):
172+
raise TypeError("The object must be a COMPAS data object.")
173+
return cls.from_dict(obj.__data__)
174+
175+
def node_signature(self, node, parent_path=""):
176+
"""Compute the SHA256 signature of a node. The computed nodes are cached in `self.signatures` dictionary.
177+
178+
Parameters
179+
----------
180+
node : :class:`compas.datastructures.HashNode`
181+
The node to compute the signature of.
182+
parent_path : str
183+
The absolute path of the parent node.
184+
185+
Returns
186+
-------
187+
str
188+
The SHA256 signature of the node.
189+
190+
"""
191+
absolute_path = parent_path + node.path
192+
if absolute_path in self.signatures:
193+
return self.signatures[absolute_path]
194+
195+
content = {
196+
"path": node.path,
197+
"value": node.value,
198+
"children": [self.node_signature(child, absolute_path) for child in node.children],
199+
}
200+
201+
signature = hashlib.sha256(json_dumps(content).encode()).hexdigest()
202+
203+
self.signatures[absolute_path] = signature
204+
node._signature = signature
205+
206+
return signature
207+
208+
def diff(self, other):
209+
"""Compute the difference between two HashTrees.
210+
211+
Parameters
212+
----------
213+
other : :class:`compas.datastructures.HashTree`
214+
The HashTree to compare with.
215+
216+
Returns
217+
-------
218+
dict
219+
A dictionary containing the differences between the two HashTrees. The keys are `added`, `removed` and `modified`.
220+
The values are lists of dictionaries containing the paths and values of the nodes that were added, removed or modified.
221+
"""
222+
added = []
223+
removed = []
224+
modified = []
225+
226+
def _diff(node1, node2):
227+
if node1.signature == node2.signature:
228+
return
229+
else:
230+
if node1.is_value or node2.is_value:
231+
modified.append({"path": node1.absolute_path, "old": node2.value, "new": node1.value})
232+
233+
for path in node1.children_paths:
234+
if path in node2.children_dict:
235+
_diff(node1.children_dict[path], node2.children_dict[path])
236+
else:
237+
added.append({"path": node1.children_dict[path].absolute_path, "value": node1.children_dict[path].value})
238+
239+
for path in node2.children_paths:
240+
if path not in node1.children_dict:
241+
removed.append({"path": node2.children_dict[path].absolute_path, "value": node2.children_dict[path].value})
242+
243+
_diff(self.root, other.root)
244+
245+
return {"added": added, "removed": removed, "modified": modified}
246+
247+
def print_diff(self, other):
248+
"""Print the difference between two HashTrees.
249+
250+
Parameters
251+
----------
252+
other : :class:`compas.datastructures.HashTree`
253+
The HashTree to compare with.
254+
255+
"""
256+
257+
diff = self.diff(other)
258+
print("Added:")
259+
for item in diff["added"]:
260+
print(item)
261+
print("Removed:")
262+
for item in diff["removed"]:
263+
print(item)
264+
print("Modified:")
265+
for item in diff["modified"]:
266+
print(item)

0 commit comments

Comments
 (0)