Skip to content

Commit 041153e

Browse files
committed
Implement new version of FastRun
1 parent 85e8515 commit 041153e

32 files changed

+934
-892
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,15 @@ for entrez_id, ensembl in raw_data.items():
856856

857857
Note: Fastrun mode checks for equality of property/value pairs, qualifiers (not including qualifier attributes), labels,
858858
aliases and description, but it ignores references by default!
859-
References can be checked in fast run mode by setting `use_refs` to `True`.
859+
References can be checked in fast run mode by setting `use_references` to `True`.
860+
861+
# Statistics #
862+
863+
| Dataset | partial fastrun | fastrun without qualifiers/references | fastrun with qualifiers | fastrun with qualifiers/references |
864+
|:----------------------------|----------------:|--------------------------------------:|------------------------:|-----------------------------------:|
865+
| Communes (34990 elements) | ? | 7min | 30s | 60s |
866+
| Cantons (2042 elements) | ? | ? | ? | ? |
867+
| Départements (100 elements) | 70min | 1s | 30s | 60s |
860868

861869
# Debugging #
862870

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,4 @@ disable = [
116116

117117
[tool.pytest.ini_options]
118118
log_cli = true
119+
log_cli_level = 'DEBUG'

test/test_all.py

Lines changed: 6 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import copy
2+
import logging
23
import unittest
34

45
from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun
56
from wikibaseintegrator.datatypes import BaseDataType, Item
67
from wikibaseintegrator.entities import ItemEntity
78
from wikibaseintegrator.wbi_config import config as wbi_config
8-
from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype
9-
from wikibaseintegrator.wbi_fastrun import get_fastrun_container
9+
from wikibaseintegrator.wbi_enums import WikibaseDatatype
1010

1111
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_all.py)'
1212

@@ -53,66 +53,22 @@ class TestFastRun(unittest.TestCase):
5353
"""
5454
some basic tests for fastrun mode
5555
"""
56+
logging.basicConfig(level=logging.DEBUG)
5657

5758
def test_fastrun(self):
5859
statements = [
59-
datatypes.ExternalID(value='P40095', prop_nr='P352'),
60+
datatypes.ExternalID(value='A0A023PZB3', prop_nr='P352'),
6061
datatypes.ExternalID(value='YER158C', prop_nr='P705')
6162
]
6263

6364
frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)
6465

65-
fastrun_result = frc.write_required(data=statements)
66-
67-
if fastrun_result:
68-
message = 'fastrun failed'
69-
else:
70-
message = 'successful fastrun'
66+
fastrun_result = frc.write_required(claims=statements)
7167

7268
# here, fastrun should succeed, if not, test failed
73-
if fastrun_result:
69+
if not fastrun_result:
7470
raise ValueError
7571

76-
def test_fastrun_label(self):
77-
# tests fastrun label, description and aliases, and label in another language
78-
frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
79-
item = WikibaseIntegrator().item.get('Q2')
80-
81-
assert item.labels.get(language='en') == "Earth"
82-
descr = item.descriptions.get(language='en')
83-
assert len(descr) > 3
84-
assert "Planet Earth" in item.aliases.get()
85-
86-
assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
87-
assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
88-
assert "Planet Earth" in item.aliases.get()
89-
assert "planet" in item.descriptions.get()
90-
91-
assert item.labels.get('es') == "Tierra"
92-
93-
item.descriptions.set(value=descr)
94-
item.descriptions.set(value="fghjkl")
95-
assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'}
96-
item.labels.set(value="Earth")
97-
item.labels.set(value="xfgfdsg")
98-
assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'}
99-
item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND_OR_REPLACE)
100-
assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en']
101-
102-
# something that's empty (for now.., can change, so this just makes sure no exception is thrown)
103-
frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label')
104-
frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label')
105-
frc.check_language_data("Q2", [''], 'ak', 'description')
106-
frc.check_language_data("Q2", [], 'ak', 'aliases')
107-
frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases')
108-
109-
item.labels.get(language="ak")
110-
item.descriptions.get(language='ak')
111-
item.aliases.get(language="ak")
112-
item.labels.set(value="label", language="ak")
113-
item.descriptions.set(value="d", language="ak")
114-
item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND_OR_REPLACE)
115-
11672

11773
def test_sitelinks():
11874
item = wbi.item.get('Q622901')

test/test_entity_item.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,18 @@ def test_write(self):
5151
def test_write_not_required(self):
5252
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')])
5353

54+
def test_write_not_required_ref(self):
55+
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)
56+
5457
def test_write_required(self):
5558
item = wbi.item.get('Q582')
5659
item.claims.add(Item(prop_nr='P1791', value='Q42'))
5760
assert item.write_required([BaseDataType(prop_nr='P1791')])
5861

59-
def test_write_not_required_ref(self):
60-
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
61-
6262
def test_write_required_ref(self):
6363
item = wbi.item.get('Q582')
64-
item.claims.get('P2581')[0].references.references.pop()
65-
assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
64+
item.claims.get('P1464')[0].references.references.pop()
65+
assert item.write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)
6666

6767
def test_long_item_id(self):
6868
assert wbi.item.get('Item:Q582').id == 'Q582'

test/test_wbi_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def test_new_item_creation(self):
243243
MonolingualText(text='xxx', language='fr', prop_nr='P7'),
244244
Quantity(amount=-5.04, prop_nr='P8'),
245245
Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit='Q11573', prop_nr='P8'),
246-
CommonsMedia(value='xxx', prop_nr='P9'),
246+
CommonsMedia(value="Place lazare goujon.jpg", prop_nr='P9'),
247247
GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr='P10'),
248248
GeoShape(value='Data:xxx.map', prop_nr='P11'),
249249
Property(value='P123', prop_nr='P12'),

test/test_wbi_fastrun.py

Lines changed: 0 additions & 211 deletions
This file was deleted.

wikibaseintegrator/datatypes/basedatatype.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class BaseDataType(Claim):
1111
The base class for all Wikibase data types, they inherit from it
1212
"""
1313
DTYPE = 'base-data-type'
14+
PTYPE = 'property-data-type'
1415
subclasses: list[type[BaseDataType]] = []
1516
sparql_query: str = '''
1617
SELECT * WHERE {{
@@ -28,7 +29,14 @@ def __init__(self, prop_nr: int | str | None = None, **kwargs: Any):
2829

2930
super().__init__(**kwargs)
3031

31-
self.mainsnak.property_number = prop_nr or None
32+
if isinstance(prop_nr, str):
33+
pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$')
34+
matches = pattern.match(str(prop_nr))
35+
36+
if matches:
37+
prop_nr = prop_nr.rsplit('/', 1)[-1]
38+
39+
self.mainsnak.property_number = prop_nr
3240
# self.subclasses.append(self)
3341

3442
# Allow registration of subclasses of BaseDataType into BaseDataType.subclasses
@@ -39,7 +47,7 @@ def __init_subclass__(cls, **kwargs):
3947
def set_value(self, value: Any | None = None):
4048
pass
4149

42-
def get_sparql_value(self) -> str:
50+
def get_sparql_value(self, **kwargs: Any) -> str | None:
4351
return '"' + self.mainsnak.datavalue['value'] + '"'
4452

4553
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
@@ -61,3 +69,6 @@ def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
6169
raise ValueError
6270

6371
return True
72+
73+
def from_sparql_value(self, sparql_value: dict) -> BaseDataType: # type: ignore
74+
pass

0 commit comments

Comments
 (0)