Skip to content

Commit deccf9b

Browse files
committed
add tests + refactoring
1 parent e3f3dbc commit deccf9b

File tree

9 files changed

+73
-41
lines changed

9 files changed

+73
-41
lines changed

dumpconverter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@
1313
#parser.add_argument("-p", "--processes", help="Number of processors to use (default 4)", type=int, default=4)
1414
args = parser.parse_args()
1515
start = time.time()
16-
CsvWriter.write_csv(newJsonReader.process_json(args.input), args.output)
16+
CsvWriter.write_csv(JsonReader.read_json(args.input), args.output)
1717
print "total time: %.2fs" % (time.time() - start)
Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
"""
2-
process_json returns a generator that yields Entities)
2+
read_json returns a generator that yields Entities)
33
44
usage:
55
with open("file.csv", "r") as f:
6-
for entity in process_json(f):
6+
for entity in read_json(f):
77
do_things()
88
99
"""
@@ -15,7 +15,7 @@
1515
print "ujson not found"
1616
import json as json
1717

18-
def process_json(input_file):
18+
def read_json(input_file):
1919
count = 0
2020
for jsonline in input_file:
2121
count += 1
@@ -27,33 +27,34 @@ def process_json(input_file):
2727
except ValueError:
2828
continue
2929
if data["type"] == "item":
30-
title = data["id"]
31-
if not "claims" in data:
32-
yield Entity(title, [])
33-
continue
34-
claims = []
35-
for prop, statements in data["claims"].iteritems():
36-
for statement in statements:
37-
references = []
38-
if "references" in statement:
39-
for prop, snaks in statement["references"][0]["snaks"].iteritems():
40-
for snak in snaks:
41-
ref = _parse_json_snak(snak)
42-
if ref:
43-
references.append(ref)
44-
qualifiers = []
45-
if "qualifiers" in statement:
46-
for prop, snaks in statement["qualifiers"].iteritems():
47-
for snak in snaks:
48-
qualifier = _parse_json_snak(snak)
49-
if qualifier:
50-
qualifiers.append(qualifier)
51-
claim = _parse_json_snak(statement["mainsnak"])
52-
if claim:
53-
claims.append(Claim(claim, qualifiers, references))
30+
yield _process_json(data)
5431

55-
yield Entity(title, claims)
32+
def _process_json(data):
33+
title = data["id"]
34+
if not "claims" in data:
35+
return Entity(title, [])
36+
claims = []
37+
for prop, statements in data["claims"].iteritems():
38+
for statement in statements:
39+
references = []
40+
if "references" in statement:
41+
for prop, snaks in statement["references"][0]["snaks"].iteritems():
42+
for snak in snaks:
43+
ref = _parse_json_snak(snak)
44+
if ref:
45+
references.append(ref)
46+
qualifiers = []
47+
if "qualifiers" in statement:
48+
for prop, snaks in statement["qualifiers"].iteritems():
49+
for snak in snaks:
50+
qualifier = _parse_json_snak(snak)
51+
if qualifier:
52+
qualifiers.append(qualifier)
53+
claim = _parse_json_snak(statement["mainsnak"])
54+
if claim:
55+
claims.append(Claim(claim, qualifiers, references))
5656

57+
return Entity(title, claims)
5758

5859
def _parse_json_snak(claim_json):
5960
if claim_json["snaktype"] == "value":
@@ -81,5 +82,5 @@ def _parse_json_snak(claim_json):
8182
else: # novalue, somevalue, ...
8283
datatype = "unknown"
8384
value = claim_json["snaktype"]
84-
property_id = claim_json["property"][1:]
85+
property_id = int(claim_json["property"][1:])
8586
return Snak(property_id, datatype, value)

propertysuggester/parser/XmlReader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def _parse_json_snak(claim_json):
114114
if datatype == "string":
115115
value = claim_json[3]
116116
elif datatype == "wikibase-entityid":
117+
datatype = "wikibase-item"
117118
if claim_json[3]["entity-type"] == "item":
118119
value = "Q" + str(claim_json[3]["numeric-id"])
119120
else:
7.67 KB
Binary file not shown.
3.43 KB
Binary file not shown.

propertysuggester/test/parser/test_abstract_reader.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ def assert_universe(self, result):
1111

1212
self.assertThat(q1.title, Equals("Q1"))
1313
self.assertThat(q1.claims, Contains(Claim(Snak(373, "string", "Universe"), [],
14-
[Snak(143, "wikibase-entityid", "Q328")])))
15-
self.assertThat(q1.claims, Contains(Claim(Snak(31, "wikibase-entityid", "Q223557"))))
16-
self.assertThat(q1.claims, Contains(Claim(Snak(31, "wikibase-entityid", "Q1088088"))))
17-
self.assertThat(q1.claims, Contains(Claim(Snak(361, "wikibase-entityid", "Q3327819"),
18-
[Snak(31, "wikibase-entityid", "Q41719")], [])))
14+
[Snak(143, "wikibase-item", "Q328")])))
15+
self.assertThat(q1.claims, Contains(Claim(Snak(31, "wikibase-item", "Q223557"))))
16+
self.assertThat(q1.claims, Contains(Claim(Snak(31, "wikibase-item", "Q1088088"))))
17+
self.assertThat(q1.claims, Contains(Claim(Snak(361, "wikibase-item", "Q3327819"),
18+
[Snak(31, "wikibase-item", "Q41719")], [])))

propertysuggester/test/parser/test_csv_reader.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ def setUp(self):
1616
def test_universe(self):
1717
out = StringIO()
1818
out.writelines(["Q1,claim,373,string,Universe\n",
19-
"Q1,reference,143,wikibase-entityid,Q328\n"
20-
"Q1,claim,31,wikibase-entityid,Q223557\n",
21-
"Q1,claim,31,wikibase-entityid,Q1088088\n",
22-
"Q1,claim,361,wikibase-entityid,Q3327819\n",
23-
"Q1,qualifier,31,wikibase-entityid,Q41719\n"])
19+
"Q1,reference,143,wikibase-item,Q328\n"
20+
"Q1,claim,31,wikibase-item,Q223557\n",
21+
"Q1,claim,31,wikibase-item,Q1088088\n",
22+
"Q1,claim,361,wikibase-item,Q3327819\n",
23+
"Q1,qualifier,31,wikibase-item,Q41719\n"])
2424
out.seek(0)
2525
result = list(CsvReader.read_csv(out))
2626
self.assert_universe(result)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import unittest
2+
import gzip
3+
4+
from pkg_resources import resource_filename
5+
from testtools import TestCase
6+
from testtools.matchers import *
7+
8+
from propertysuggester.test.parser.test_abstract_reader import AbstractUniverseTest
9+
from propertysuggester.parser import JsonReader
10+
from propertysuggester.utils.datamodel import Claim, Snak, Entity
11+
12+
13+
class JsonReaderTest(AbstractUniverseTest):
14+
15+
def test_updated_dump(self):
16+
with gzip.open(resource_filename(__name__, "Wikidata-Q15511.json.gz"), "r") as f:
17+
result = list(JsonReader.read_json(f))
18+
19+
self.assertThat(len(result), Equals(1))
20+
q15511 = result[0]
21+
self.assertThat(q15511.title, Equals("Q15511"))
22+
self.assertThat(q15511.claims, Contains(Claim(Snak(1082, "quantity", "+25"), [Snak(585, "time", "+00000002001-01-01T00:00:00Z"), Snak(459, "wikibase-item", "Q745221")], [Snak(248, "wikibase-item", "Q17597573")])))
23+
24+
def test_special_cases(self):
25+
data = dict([("id", "Q1"), ("type", "item")])
26+
self.assertThat(JsonReader._process_json(data), Equals(Entity("Q1", [])))
27+
28+
if __name__ == '__main__':
29+
unittest.main()
30+

propertysuggester/test/parser/test_xml_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_updated_dump(self):
2323
self.assertThat(len(result), Equals(1))
2424
q9351 = result[0]
2525
self.assertThat(q9351.title, Equals("Q9351"))
26-
self.assertThat(q9351.claims, Contains(Claim(Snak(156, "wikibase-entityid", "Q1647331"))))
26+
self.assertThat(q9351.claims, Contains(Claim(Snak(156, "wikibase-item", "Q1647331"))))
2727
self.assertThat(q9351.claims, Contains(Claim(Snak(1112, "quantity", "+25"))))
2828

2929
def test_special_cases(self):

0 commit comments

Comments
 (0)