Skip to content

Commit 2f17582

Browse files
committed
Add query_limit parameter
1 parent 25ddf63 commit 2f17582

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

wikibaseintegrator/wbi_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@
2828
'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql',
2929
'WIKIBASE_URL': 'http://www.wikidata.org',
3030
'DEFAULT_LANGUAGE': 'en',
31-
'DEFAULT_LEXEME_LANGUAGE': 'Q1860'
31+
'DEFAULT_LEXEME_LANGUAGE': 'Q1860',
32+
'SPARQL_QUERY_LIMIT': 10000
3233
}

wikibaseintegrator/wbi_fastrun.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def __init__(self, base_filter: List[BaseDataType | List[BaseDataType]], base_da
5656
if self.case_insensitive:
5757
raise ValueError("Case insensitive does not work for the moment.")
5858

59-
def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, wb_url: Optional[str] = None, limit: int = 10000) -> None:
59+
def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, wb_url: Optional[str] = None, limit: Optional[int] = None) -> None:
6060
"""
6161
Load the statements related to the given claims into the internal cache of the current object.
6262
@@ -75,6 +75,8 @@ def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache:
7575

7676
wb_url = wb_url or self.wikibase_url
7777

78+
limit = limit or int(config['SPARQL_QUERY_LIMIT']) # type: ignore
79+
7880
for claim in claims:
7981
prop_nr = claim.mainsnak.property_number
8082

@@ -168,7 +170,7 @@ def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache:
168170
if len(results) == 0 or len(results) < limit:
169171
break
170172

171-
def _load_qualifiers(self, sid: str, limit: int = 10000) -> Qualifiers:
173+
def _load_qualifiers(self, sid: str, limit: Optional[int] = None) -> Qualifiers:
172174
"""
173175
Load the qualifiers of a statement.
174176
@@ -178,6 +180,8 @@ def _load_qualifiers(self, sid: str, limit: int = 10000) -> Qualifiers:
178180
"""
179181
offset = 0
180182

183+
limit = limit or int(config['SPARQL_QUERY_LIMIT']) # type: ignore
184+
181185
# We force a refresh of the data, remove the previous results
182186
qualifiers: Qualifiers = Qualifiers()
183187
while True:
@@ -309,20 +313,21 @@ def _get_property_type(self, prop_nr: Union[str, int]) -> str:
309313

310314
return results
311315

312-
def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None) -> List[str]:
316+
def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, query_limit: Optional[int] = None) -> List[str]:
313317
"""
314318
Return a list of entities who correspond to the specified claims.
315319
316320
:param claims: A list of claims to query the SPARQL endpoint.
317321
:param use_cache: Put data returned by WDQS in cache. Enabled by default.
322+
:param query_limit: Limit the amount of results from the SPARQL server
318323
:return: A list of entity ID.
319324
"""
320325
if isinstance(claims, Claim):
321326
claims = [claims]
322327
elif (not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims):
323328
raise ValueError("claims must be an instance of Claim or Claims or a list of Claim")
324329

325-
self.load_statements(claims=claims, use_cache=use_cache)
330+
self.load_statements(claims=claims, use_cache=use_cache, limit=query_limit)
326331

327332
result = set()
328333
for claim in claims:
@@ -334,14 +339,15 @@ def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Opt
334339
return list(result)
335340

336341
def write_required(self, entity: BaseEntity, property_filter: Union[List[str], str, None] = None, use_qualifiers: Optional[bool] = None, use_references: Optional[bool] = None,
337-
use_cache: Optional[bool] = None) -> bool:
342+
use_cache: Optional[bool] = None, query_limit: Optional[int] = None) -> bool:
338343
"""
339344
340345
:param entity:
341346
:param property_filter:
342347
:param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
343348
:param use_references: Use references during fastrun. Disabled by default.
344349
:param use_cache: Put data returned by WDQS in cache. Enabled by default.
350+
:param query_limit: Limit the amount of results from the SPARQL server
345351
:return: a boolean True if a write is required. False otherwise.
346352
"""
347353
from wikibaseintegrator.entities import BaseEntity
@@ -372,7 +378,7 @@ def contains(in_list, lambda_filter):
372378
statements_to_check: Dict[str, List[str]] = {}
373379
for claim in entity.claims:
374380
if claim.mainsnak.property_number in property_filter:
375-
self.load_statements(claims=claim, use_cache=use_cache)
381+
self.load_statements(claims=claim, use_cache=use_cache, limit=query_limit)
376382
if claim.mainsnak.property_number in self.data:
377383
if not contains(self.data[claim.mainsnak.property_number], (lambda x, c=claim: x == c.get_sparql_value())):
378384
# Found if a property with this value does not exist, return True if none exist

0 commit comments

Comments
 (0)