55
66# Fact collection helpers
77
8+ import warnings
9+ import uuid
810import hashlib
911import ipaddress
1012
1416def init_pseudonymization (enforce , rdb ):
1517 global PSEUDO_ENFORCING , PSEUDO_SEED
1618 seed = rdb .get ('cluster/anon_seed' )
17- if not seed :
18- raise Exception ("The cluster/anon_seed value is not set" )
19+ if seed :
20+ PSEUDO_SEED = seed
21+ elif enforce :
22+ warnings .warn ("Generating an unstable, temporary seed for pseudonymization" )
23+ PSEUDO_SEED = str (uuid .uuid4 ())
1924 PSEUDO_ENFORCING = enforce
20- PSEUDO_SEED = seed
2125
2226def has_subscription (rdb ):
2327 provider = rdb .hget ('cluster/subscription' , 'provider' )
@@ -26,13 +30,16 @@ def has_subscription(rdb):
2630def pseudo_string (val , maxlen = 12 ):
2731 """Calculate a stable pseudonym of the given string"""
2832 if val and PSEUDO_ENFORCING :
29- hashed_val = hashlib .md5 ((PSEUDO_SEED + val ).encode ('utf-8' )).hexdigest ()
33+ hashed_val = hashlib .sha256 ((PSEUDO_SEED + val ).encode ('utf-8' )).hexdigest ()
3034 return hashed_val [0 :maxlen ]
3135 else :
3236 return val
3337
3438def pseudo_domain (val ):
3539 """Calculate a stable pseudonym of the given domain, keeping the TLD in clear text"""
40+ if not val or not PSEUDO_ENFORCING :
41+ return val
42+
3643 try :
3744 domain , suffix = val .rsplit ("." , 1 )
3845 return pseudo_string (domain , 8 ) + '.' + suffix
@@ -51,7 +58,7 @@ def pseudo_ip(val):
5158 except ValueError :
5259 return val
5360
54- digest = hashlib .md5 ((PSEUDO_SEED + ip .exploded ).encode ('utf-8' )).digest ()
61+ digest = hashlib .sha256 ((PSEUDO_SEED + ip .exploded ).encode ('utf-8' )).digest ()
5562
5663 if isinstance (ip , ipaddress .IPv4Address ):
5764 if ip .is_private :
0 commit comments