Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,15 @@ for entrez_id, ensembl in raw_data.items():

Note: Fastrun mode checks for equality of property/value pairs, qualifiers (not including qualifier attributes), labels,
aliases and description, but it ignores references by default!
References can be checked in fast run mode by setting `use_refs` to `True`.
References can be checked in fast run mode by setting `use_references` to `True`.

# Statistics #

| Dataset | partial fastrun | fastrun without qualifiers/references | fastrun with qualifiers | fastrun with qualifiers/references |
|:----------------------------|----------------:|--------------------------------------:|------------------------:|-----------------------------------:|
| Communes (34990 elements) | ? | 7min | 30s | 60s |
| Cantons (2042 elements) | ? | ? | ? | ? |
| Départements (100 elements) | 70min | 1s | 30s | 60s |

# Debugging #

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ disable = [

[tool.pytest.ini_options]
log_cli = true
log_cli_level = 'DEBUG'
56 changes: 6 additions & 50 deletions test/test_all.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import copy
import logging
import unittest

from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun
from wikibaseintegrator.datatypes import BaseDataType, Item
from wikibaseintegrator.entities import ItemEntity
from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype
from wikibaseintegrator.wbi_fastrun import get_fastrun_container
from wikibaseintegrator.wbi_enums import WikibaseDatatype

wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_all.py)'

Expand Down Expand Up @@ -53,66 +53,22 @@ class TestFastRun(unittest.TestCase):
"""
some basic tests for fastrun mode
"""
logging.basicConfig(level=logging.DEBUG)

def test_fastrun(self):
statements = [
datatypes.ExternalID(value='P40095', prop_nr='P352'),
datatypes.ExternalID(value='A0A023PZB3', prop_nr='P352'),
datatypes.ExternalID(value='YER158C', prop_nr='P705')
]

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)

fastrun_result = frc.write_required(data=statements)

if fastrun_result:
message = 'fastrun failed'
else:
message = 'successful fastrun'
fastrun_result = frc.write_required(claims=statements)

# here, fastrun should succeed, if not, test failed
if fastrun_result:
if not fastrun_result:
raise ValueError

def test_fastrun_label(self):
# tests fastrun label, description and aliases, and label in another language
frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
item = WikibaseIntegrator().item.get('Q2')

assert item.labels.get(language='en') == "Earth"
descr = item.descriptions.get(language='en')
assert len(descr) > 3
assert "Planet Earth" in item.aliases.get()

assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert "Planet Earth" in item.aliases.get()
assert "planet" in item.descriptions.get()

assert item.labels.get('es') == "Tierra"

item.descriptions.set(value=descr)
item.descriptions.set(value="fghjkl")
assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'}
item.labels.set(value="Earth")
item.labels.set(value="xfgfdsg")
assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'}
item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND_OR_REPLACE)
assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en']

# something that's empty (for now.., can change, so this just makes sure no exception is thrown)
frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label')
frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label')
frc.check_language_data("Q2", [''], 'ak', 'description')
frc.check_language_data("Q2", [], 'ak', 'aliases')
frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases')

item.labels.get(language="ak")
item.descriptions.get(language='ak')
item.aliases.get(language="ak")
item.labels.set(value="label", language="ak")
item.descriptions.set(value="d", language="ak")
item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


def test_sitelinks():
item = wbi.item.get('Q622901')
Expand Down
10 changes: 5 additions & 5 deletions test/test_entity_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ def test_write(self):
def test_write_not_required(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')])

def test_write_not_required_ref(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)

def test_write_required(self):
item = wbi.item.get('Q582')
item.claims.add(Item(prop_nr='P1791', value='Q42'))
assert item.write_required([BaseDataType(prop_nr='P1791')])

def test_write_not_required_ref(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)

def test_write_required_ref(self):
item = wbi.item.get('Q582')
item.claims.get('P2581')[0].references.references.pop()
assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
item.claims.get('P1464')[0].references.references.pop()
assert item.write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)

def test_long_item_id(self):
assert wbi.item.get('Item:Q582').id == 'Q582'
Expand Down
2 changes: 1 addition & 1 deletion test/test_wbi_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def test_new_item_creation(self):
MonolingualText(text='xxx', language='fr', prop_nr='P7'),
Quantity(amount=-5.04, prop_nr='P8'),
Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit='Q11573', prop_nr='P8'),
CommonsMedia(value='xxx', prop_nr='P9'),
CommonsMedia(value="Place lazare goujon.jpg", prop_nr='P9'),
GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr='P10'),
GeoShape(value='Data:xxx.map', prop_nr='P11'),
Property(value='P123', prop_nr='P12'),
Expand Down
211 changes: 0 additions & 211 deletions test/test_wbi_fastrun.py

This file was deleted.

15 changes: 13 additions & 2 deletions wikibaseintegrator/datatypes/basedatatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class BaseDataType(Claim):
The base class for all Wikibase data types, they inherit from it
"""
DTYPE = 'base-data-type'
PTYPE = 'property-data-type'
subclasses: list[type[BaseDataType]] = []
sparql_query: str = '''
SELECT * WHERE {{
Expand All @@ -28,7 +29,14 @@ def __init__(self, prop_nr: int | str | None = None, **kwargs: Any):

super().__init__(**kwargs)

self.mainsnak.property_number = prop_nr or None
if isinstance(prop_nr, str):
pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$')
matches = pattern.match(str(prop_nr))

if matches:
prop_nr = prop_nr.rsplit('/', 1)[-1]

self.mainsnak.property_number = prop_nr
# self.subclasses.append(self)

# Allow registration of subclasses of BaseDataType into BaseDataType.subclasses
Expand All @@ -39,7 +47,7 @@ def __init_subclass__(cls, **kwargs):
def set_value(self, value: Any | None = None):
pass

def get_sparql_value(self) -> str:
def get_sparql_value(self, **kwargs: Any) -> str | None:
return '"' + self.mainsnak.datavalue['value'] + '"'

def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
Expand All @@ -61,3 +69,6 @@ def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
raise ValueError

return True

def from_sparql_value(self, sparql_value: dict) -> BaseDataType: # type: ignore
pass
Loading