11"""
2- process_json returns a generator that yields Entities)
2+ read_json returns a generator that yields Entities)
33
44usage:
55with open("file.csv", "r") as f:
6- for entity in process_json (f):
6+ for entity in read_json (f):
77 do_things()
88
99"""
1515 print "ujson not found"
1616 import json as json
1717
18- def process_json (input_file ):
18+ def read_json (input_file ):
1919 count = 0
2020 for jsonline in input_file :
2121 count += 1
@@ -27,33 +27,34 @@ def process_json(input_file):
2727 except ValueError :
2828 continue
2929 if data ["type" ] == "item" :
30- title = data ["id" ]
31- if not "claims" in data :
32- yield Entity (title , [])
33- continue
34- claims = []
35- for prop , statements in data ["claims" ].iteritems ():
36- for statement in statements :
37- references = []
38- if "references" in statement :
39- for prop , snaks in statement ["references" ][0 ]["snaks" ].iteritems ():
40- for snak in snaks :
41- ref = _parse_json_snak (snak )
42- if ref :
43- references .append (ref )
44- qualifiers = []
45- if "qualifiers" in statement :
46- for prop , snaks in statement ["qualifiers" ].iteritems ():
47- for snak in snaks :
48- qualifier = _parse_json_snak (snak )
49- if qualifier :
50- qualifiers .append (qualifier )
51- claim = _parse_json_snak (statement ["mainsnak" ])
52- if claim :
53- claims .append (Claim (claim , qualifiers , references ))
30+ yield _process_json (data )
5431
55- yield Entity (title , claims )
32+ def _process_json (data ):
33+ title = data ["id" ]
34+ if not "claims" in data :
35+ return Entity (title , [])
36+ claims = []
37+ for prop , statements in data ["claims" ].iteritems ():
38+ for statement in statements :
39+ references = []
40+ if "references" in statement :
41+ for prop , snaks in statement ["references" ][0 ]["snaks" ].iteritems ():
42+ for snak in snaks :
43+ ref = _parse_json_snak (snak )
44+ if ref :
45+ references .append (ref )
46+ qualifiers = []
47+ if "qualifiers" in statement :
48+ for prop , snaks in statement ["qualifiers" ].iteritems ():
49+ for snak in snaks :
50+ qualifier = _parse_json_snak (snak )
51+ if qualifier :
52+ qualifiers .append (qualifier )
53+ claim = _parse_json_snak (statement ["mainsnak" ])
54+ if claim :
55+ claims .append (Claim (claim , qualifiers , references ))
5656
57+ return Entity (title , claims )
5758
5859def _parse_json_snak (claim_json ):
5960 if claim_json ["snaktype" ] == "value" :
@@ -81,5 +82,5 @@ def _parse_json_snak(claim_json):
8182 else : # novalue, somevalue, ...
8283 datatype = "unknown"
8384 value = claim_json ["snaktype" ]
84- property_id = claim_json ["property" ][1 :]
85+ property_id = int ( claim_json ["property" ][1 :])
8586 return Snak (property_id , datatype , value )
0 commit comments