Skip to content

Commit 32c6bee

Browse files
committed
feature: deanon option
1 parent 5072394 commit 32c6bee

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

src/json2rdf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
__version__ = "78" # shoud equal `git rev-list --count master`
1+
__version__ = "79" # shoud equal `git rev-list --count master`
22
# can roll back to 0 if errors
33
from .json2rdf import json2rdf, j2r

src/json2rdf/json2rdf.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class anonID(ID): ...
4242
terminals = tuple(terminals)
4343

4444
subject_keys = ('id',)
45+
deanon = False
4546
# cant do
4647
# subject_key = subject_keys[0]
4748
# @classproperty 'deprecated'
@@ -51,6 +52,14 @@ class anonID(ID): ...
5152
class list:
5253
key = '__rdftype__'
5354
value = '__rdfseq__'
55+
56+
@classmethod
57+
def maybeanon(cls, id):
58+
if cls.deanon:
59+
return cls.ID(id)
60+
else:
61+
assert(cls.deanon is False)
62+
return cls.anonID(id)
5463

5564
@classmethod
5665
def enter(cls, p, k, v):
@@ -59,19 +68,20 @@ def dicthasid(v):
5968
for id in cls.subject_keys:
6069
if id in v:
6170
yield id
71+
6272
if type(v) is dict:
6373
dids = dicthasid(v)
6474
dids = tuple(dids)
6575
return (
6676
# wrap in ID
6777
{sk: cls.ID(v[sk]) for sk in dids}
68-
or {subject_key: cls.anonID(id(v))},
78+
or {subject_key: cls.maybeanon(id(v))},
6979
# ..the rest of the data
7080
((k,v) for k,v in v.items() if k not in dids ) )
7181
elif type(v) is list:
7282
# id(lst) is not deterministic. don't think it's a 'problem'
7383
return ({
74-
subject_key: cls.anonID(id(v)),
84+
subject_key: cls.maybeanon(id(v)),
7585
cls.list.key: cls.list.value
7686
},
7787
enumerate(v))
@@ -117,6 +127,7 @@ class list(list): #ordered set? TODO
117127
def __str__(self) -> str:
118128
_ = '\n'.join([str(i) for i in self])
119129
return _
130+
_exclude_keys = {}
120131

121132
@classmethod
122133
def enter(cls, p, k, v):
@@ -133,7 +144,8 @@ def _(v, subject_key):
133144
else:
134145
assert(isinstance(iv, Identification.terminals ))
135146
if not ((ik in Identification.subject_keys) and (type(iv) is Identification.anonID)):
136-
yield cls.Triple(v[subject_key], ik, iv)
147+
if ik not in cls._exclude_keys:
148+
yield cls.Triple(v[subject_key], ik, iv)
137149
def __(v):
138150
for sk in Identification.subject_keys:
139151
if sk in v: yield from _(v, sk)
@@ -310,6 +322,7 @@ def json2rdf(
310322
sort = True, # (attempt to) make conversion deterministic
311323
# id interpretation
312324
subject_id_keys = defaults.Identification.subject_keys,
325+
deanon:bool = defaults.Identification.deanon,
313326
object_id_keys = defaults.Identification.object_keys,
314327
# # uri construction
315328
id_prefix = (defaults.RDFing.list.id_prefix,
@@ -330,11 +343,15 @@ def json2rdf(
330343
object_keys: set of keys to interpret as a uri out of as an *object*.
331344
example: {"id": 1, "refid": 2,} ->
332345
prefix:1 prefix:refid prefix:2.
346+
deanon: can be set to True to use id_prefix when no id key is present.
347+
otherwise, a blank/anon node will be used.
333348
"""
334349
f = classes()
335350
if not subject_id_keys: # hack for the case when no identifier is desired from the input
336351
import uuid # todo: ya right.
337352
subject_id_keys = {str(uuid.uuid4())} # impossible key in data
353+
f.Tripling._exclude_keys = subject_id_keys
354+
f.Identification.deanon = deanon
338355
f.Identification.subject_keys = [k for k in subject_id_keys if k in frozenset(subject_id_keys)]
339356
f.Identification.object_keys = frozenset(object_id_keys)
340357

0 commit comments

Comments
 (0)