Skip to content
This repository was archived by the owner on Jul 16, 2024. It is now read-only.

Commit 2085e2a

Browse files
author
James Bell
authored
Merge pull request #98 from CameronJHall/master
Guaranteed order in deduplication dictionary hashes
2 parents 396f9ff + abdffe6 commit 2085e2a

File tree

4 files changed

+134
-84
lines changed

4 files changed

+134
-84
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name='tgt_grease',
7-
version='2.4.1',
7+
version='2.5.0',
88
license="MIT",
99
description='Modern distributed automation engine built with love by Target',
1010
long_description="""

tgt_grease/enterprise/Model/DeDuplication.py

Lines changed: 71 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -234,39 +234,37 @@ def deduplicate_object(ioc, obj, expiry, expiry_max, threshold, source_name, con
234234
}
235235
)
236236
return
237-
else:
238-
# T1 Not Found Protocol: We have a possibly unique object
239-
ioc.getLogger().debug("Type1 Match not found; Beginning type 2 processing")
240-
# Create a T1
241-
T1ObjectId = DeDupCollection.insert_one({
242-
'expiry': Deduplication.generate_expiry_time(int(expiry)),
243-
'grease_internal_configuration': configuration_name,
244-
'max_expiry': Deduplication.generate_max_expiry_time(int(expiry_max)),
245-
'type': 1,
246-
'score': 1,
247-
'source': str(source_name),
248-
'hash': Deduplication.generate_hash_from_obj(t1test)
249-
}).inserted_id
250-
# Begin T2 Deduplication
251-
compositeScore = Deduplication.object_field_score(
252-
collection, ioc, source_name, configuration_name, obj, str(T1ObjectId), expiry, expiry_max, field_set
237+
# T1 Not Found Protocol: We have a possibly unique object
238+
ioc.getLogger().debug("Type1 Match not found; Beginning type 2 processing")
239+
# Create a T1
240+
T1ObjectId = DeDupCollection.insert_one({
241+
'expiry': Deduplication.generate_expiry_time(int(expiry)),
242+
'grease_internal_configuration': configuration_name,
243+
'max_expiry': Deduplication.generate_max_expiry_time(int(expiry_max)),
244+
'type': 1,
245+
'score': 1,
246+
'source': str(source_name),
247+
'hash': Deduplication.generate_hash_from_obj(t1test)
248+
}).inserted_id
249+
# Begin T2 Deduplication
250+
compositeScore = Deduplication.object_field_score(
251+
collection, ioc, source_name, configuration_name, obj, str(T1ObjectId), expiry, expiry_max, field_set
252+
)
253+
if compositeScore < threshold:
254+
# unique obj
255+
ioc.getLogger().trace(
256+
"Unique object! Composite score was: [{0}] threashold: [{1}]".format(compositeScore, threshold),
257+
verbose=True
253258
)
254-
if compositeScore < threshold:
255-
# unique obj
256-
ioc.getLogger().trace(
257-
"Unique object! Composite score was: [{0}] threashold: [{1}]".format(compositeScore, threshold),
258-
verbose=True
259-
)
260-
final.append(obj)
261-
return
262-
else:
263-
# likely duplicate value
264-
ioc.getLogger().trace(
265-
"Object surpassed threshold, suspected to be duplicate! "
266-
"Composite score was: [{0}] threashold: [{1}]".format(compositeScore, threshold),
267-
verbose=True
268-
)
269-
return
259+
final.append(obj)
260+
return
261+
# likely duplicate value
262+
ioc.getLogger().trace(
263+
"Object surpassed threshold, suspected to be duplicate! "
264+
"Composite score was: [{0}] threashold: [{1}]".format(compositeScore, threshold),
265+
verbose=True
266+
)
267+
return
270268

271269
@staticmethod
272270
def object_field_score(collection, ioc, source_name, configuration_name, obj, objectId, expiry, max_expiry, field_set=None):
@@ -361,8 +359,46 @@ def object_field_score(collection, ioc, source_name, configuration_name, obj, ob
361359
continue
362360
if len(field_scores) is 0:
363361
return 0.0
364-
else:
365-
return float(sum(field_scores) / float(len(field_scores)))
362+
return float(sum(field_scores) / float(len(field_scores)))
363+
364+
@staticmethod
365+
def make_hashable(obj):
366+
"""Takes a dictionary and makes a sorted tuple of strings representing flattened key value pairs
367+
Args:
368+
obj (dict): A dictionary
369+
Returns:
370+
tuple<str>: a sorted flattened tuple of the dictionary's key value pairs
371+
372+
Example:
373+
{
374+
"a": ["test1", "test2"],
375+
"b": [{"test2": 21}, {"test1": 1}, {"test7": 3}],
376+
"c": "test"
377+
}
378+
becomes...
379+
(('a', ('test1', 'test2')),
380+
('b', ((('test1', 1),), (('test2', 21),), (('test7', 3),))),
381+
('c', 'test'))
382+
"""
383+
final = []
384+
sorted_tuples = Deduplication.make_hashable_helper(obj)
385+
for pair in sorted_tuples:
386+
final.append(pair)
387+
return tuple(final)
388+
389+
@staticmethod
390+
def make_hashable_helper(obj):
391+
"""Recursively turns iterables into sorted tuples"""
392+
if isinstance(obj, (tuple, list)):
393+
return tuple(sorted(Deduplication.make_hashable_helper(e) for e in obj))
394+
395+
if isinstance(obj, dict):
396+
return tuple(sorted((k, Deduplication.make_hashable_helper(v)) for k, v in obj.items()))
397+
398+
if isinstance(obj, (set, frozenset)):
399+
return tuple(sorted(Deduplication.make_hashable_helper(e) for e in obj))
400+
401+
return obj
366402

367403
@staticmethod
368404
def generate_hash_from_obj(obj):
@@ -375,7 +411,7 @@ def generate_hash_from_obj(obj):
375411
str: Object Hash
376412
377413
"""
378-
return hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
414+
return hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
379415

380416
@staticmethod
381417
def generate_expiry_time(hours):

tgt_grease/enterprise/Model/tests/test_deduplication.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,51 +18,70 @@ def test_generate_expiry_time(self):
1818
)
1919

2020
def test_generate_max_expiry_time(self):
21-
print(str(Deduplication.generate_max_expiry_time(7)))
22-
print(str(datetime.datetime.utcnow() + datetime.timedelta(days=7)))
23-
self.assertTrue(
24-
Deduplication.generate_max_expiry_time(7).day == (datetime.datetime.utcnow() + datetime.timedelta(days=7)).day
25-
)
21+
print(str(Deduplication.generate_max_expiry_time(7)))
22+
print(str(datetime.datetime.utcnow() + datetime.timedelta(days=7)))
23+
self.assertTrue(
24+
Deduplication.generate_max_expiry_time(7).day == (datetime.datetime.utcnow() + datetime.timedelta(days=7)).day
25+
)
26+
27+
def test_make_hashable_helper(self):
28+
obj1 = {'test1': 1, 'test2': 2, 'test3': 3}
29+
obj2 = {'test3': 3, 'test1': 1, 'test2': 2}
30+
self.assertEqual(
31+
Deduplication.make_hashable_helper(obj1),
32+
(('test1', 1), ('test2', 2), ('test3', 3))
33+
)
34+
self.assertEqual(
35+
Deduplication.make_hashable_helper(obj1),
36+
Deduplication.make_hashable_helper(obj2)
37+
)
38+
39+
def test_make_hashable(self):
40+
obj = {'test1': 'var', 'test2': 5, 'test3': ['1', '3', '2']}
41+
self.assertEqual(
42+
Deduplication.make_hashable_helper(obj),
43+
(('test1', 'var'), ('test2', 5), ('test3', ('1', '2', '3')))
44+
)
2645

2746
def test_generate_hash(self):
2847
obj = {'test': 'var', 'test1': 5, 'test2': 7.89}
2948
self.assertEqual(
3049
Deduplication.generate_hash_from_obj(obj),
31-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
50+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
3251
)
3352

3453
def test_generate_hash_multi_str_type(self):
3554
obj = {'test': u'var', 'test1': 5, 'test2': 7.89, 'test3': 'ver'}
3655
self.assertEqual(
3756
Deduplication.generate_hash_from_obj(obj),
38-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
57+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
3958
)
4059

4160
def test_generate_hash_other_type(self):
42-
obj = 7
61+
obj = {'test': 7}
4362
self.assertEqual(
4463
Deduplication.generate_hash_from_obj(obj),
45-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
64+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
4665
)
47-
obj = 'test'
66+
obj = {'test': 'test'}
4867
self.assertEqual(
4968
Deduplication.generate_hash_from_obj(obj),
50-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
69+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
5170
)
52-
obj = u'test'
71+
obj = {'test': u'test'}
5372
self.assertEqual(
5473
Deduplication.generate_hash_from_obj(obj),
55-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
74+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
5675
)
57-
obj = 7.8
76+
obj = {'test': 7.8}
5877
self.assertEqual(
5978
Deduplication.generate_hash_from_obj(obj),
60-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
79+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
6180
)
62-
obj = ['test', 'var', 8, 8.43]
81+
obj = {'test': ['test', 'var', '8', '8.43']}
6382
self.assertEqual(
6483
Deduplication.generate_hash_from_obj(obj),
65-
hashlib.sha256(str(obj).encode('utf-8')).hexdigest()
84+
hashlib.sha256(repr(Deduplication.make_hashable(obj)).encode('utf-8')).hexdigest()
6685
)
6786

6887
def test_object_score_low_duplication(self):

tgt_grease/management/Model/bridge.py

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,9 @@ def action_register(self):
3636
print("Registration Complete!")
3737
self.ioc.getLogger().info("Registration Completed Successfully")
3838
return True
39-
else:
40-
print("Registration Failed!")
41-
self.ioc.getLogger().info("Registration Failed")
42-
return False
39+
print("Registration Failed!")
40+
self.ioc.getLogger().info("Registration Failed")
41+
return False
4342

4443
def action_info(self, node=None, jobs=None, prototypeJobs=None):
4544
"""Gets Node Information
@@ -66,8 +65,8 @@ def action_info(self, node=None, jobs=None, prototypeJobs=None):
6665
return False
6766
valid, serverId = self.valid_server(node)
6867
if not valid:
69-
print("Invalid ObjectID")
70-
return False
68+
print("Invalid ObjectID")
69+
return False
7170
server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(str(serverId))})
7271
if server:
7372
server = dict(server)
@@ -148,10 +147,9 @@ def action_info(self, node=None, jobs=None, prototypeJobs=None):
148147
job['grease_data']['execution']['returnData'])
149148
)
150149
return True
151-
else:
152-
print("Unable to locate server")
153-
self.ioc.getLogger().error("Unable to load [{0}] server for information".format(serverId))
154-
return False
150+
print("Unable to locate server")
151+
self.ioc.getLogger().error("Unable to load [{0}] server for information".format(serverId))
152+
return False
155153

156154
def action_assign(self, prototype=None, role=None, node=None):
157155
"""Assign prototypes/roles to a node either local or remote
@@ -177,12 +175,12 @@ def action_assign(self, prototype=None, role=None, node=None):
177175
del job
178176
valid, serverId = self.valid_server(node)
179177
if not valid:
180-
print("Invalid ObjectID")
181-
return False
178+
print("Invalid ObjectID")
179+
return False
182180
updated = self.ioc.getCollection('JobServer').update_one(
183181
{'_id': ObjectId(serverId)},
184182
{
185-
'$push': {
183+
'$addToSet': {
186184
'prototypes': prototype
187185
}
188186
}
@@ -198,8 +196,8 @@ def action_assign(self, prototype=None, role=None, node=None):
198196
if role:
199197
valid, serverId = self.valid_server(node)
200198
if not valid:
201-
print("Invalid ObjectID")
202-
return False
199+
print("Invalid ObjectID")
200+
return False
203201
updated = self.ioc.getCollection('JobServer').update_one(
204202
{'_id': ObjectId(serverId)},
205203
{
@@ -245,8 +243,8 @@ def action_unassign(self, prototype=None, role=None, node=None):
245243
del job
246244
valid, serverId = self.valid_server(node)
247245
if not valid:
248-
print("Invalid ObjectID")
249-
return False
246+
print("Invalid ObjectID")
247+
return False
250248
updated = self.ioc.getCollection('JobServer').update_one(
251249
{'_id': ObjectId(serverId)},
252250
{
@@ -266,8 +264,8 @@ def action_unassign(self, prototype=None, role=None, node=None):
266264
if role:
267265
valid, serverId = self.valid_server(node)
268266
if not valid:
269-
print("Invalid ObjectID")
270-
return False
267+
print("Invalid ObjectID")
268+
return False
271269
updated = self.ioc.getCollection('JobServer').update_one(
272270
{'_id': ObjectId(serverId)},
273271
{
@@ -302,8 +300,8 @@ def action_cull(self, node=None):
302300
return False
303301
valid, serverId = self.valid_server(node)
304302
if not valid:
305-
print("Invalid ObjectID")
306-
return False
303+
print("Invalid ObjectID")
304+
return False
307305
if not self.monitor.deactivateServer(serverId):
308306
self.ioc.getLogger().error(
309307
"Failed deactivating server [{0}]".format(serverId)
@@ -356,8 +354,8 @@ def action_activate(self, node=None):
356354
return False
357355
valid, serverId = self.valid_server(node)
358356
if not valid:
359-
print("Invalid ObjectID")
360-
return False
357+
print("Invalid ObjectID")
358+
return False
361359
if self.ioc.getCollection('JobServer').update_one(
362360
{'_id': ObjectId(serverId)},
363361
{
@@ -369,9 +367,8 @@ def action_activate(self, node=None):
369367
).modified_count < 1:
370368
self.ioc.getLogger().warning("Server [{0}] failed to be activated".format(serverId))
371369
return False
372-
else:
373-
self.ioc.getLogger().warning("Server [{0}] activated".format(serverId))
374-
return True
370+
self.ioc.getLogger().warning("Server [{0}] activated".format(serverId))
371+
return True
375372

376373
def valid_server(self, node=None):
377374
"""Validates node is in the MongoDB instance connected to
@@ -391,8 +388,6 @@ def valid_server(self, node=None):
391388
return False, ""
392389
if server:
393390
return True, dict(server).get('_id')
394-
else:
395-
self.ioc.getLogger().error("Failed to find server [{0}] in the database".format(node))
396-
return False, ""
397-
else:
398-
return True, self.ioc.getConfig().NodeIdentity
391+
self.ioc.getLogger().error("Failed to find server [{0}] in the database".format(node))
392+
return False, ""
393+
return True, self.ioc.getConfig().NodeIdentity

0 commit comments

Comments
 (0)