@@ -206,8 +206,13 @@ def _load_qualifiers(self, sid: str, limit: int | None = None) -> Qualifiers:
206206 """
207207 offset = 0
208208
209+ if not isinstance (sid , str ):
210+ raise ValueError ('sid must be a string' )
211+
209212 limit = limit or int (config ['SPARQL_QUERY_LIMIT' ]) # type: ignore
210213
214+ # TODO: Add cache
215+
211216 # We force a refresh of the data, remove the previous results
212217 qualifiers : Qualifiers = Qualifiers ()
213218 while True :
@@ -261,6 +266,10 @@ def _load_references(self, sid: str, limit: int = 10000) -> References:
261266 if not isinstance (sid , str ):
262267 raise ValueError ('sid must be a string' )
263268
269+ limit = limit or int (config ['SPARQL_QUERY_LIMIT' ]) # type: ignore
270+
271+ # TODO: Add cache
272+
264273 # We force a refresh of the data, remove the previous results
265274 references : References = References ()
266275 while True :
@@ -326,6 +335,10 @@ def _load_rank(self, sid: str) -> WikibaseRank | None:
326335 if not isinstance (sid , str ):
327336 raise ValueError ('sid must be a string' )
328337
338+ # TODO: Add limit?
339+
340+ # TODO: Add cache
341+
329342 query = f'''
330343 #Tool: WikibaseIntegrator wbi_fastrun._load_rank
331344 SELECT ?rank WHERE {{
@@ -501,44 +514,60 @@ def contains(in_list, lambda_filter):
501514
502515 # If the property is already found, load it completely to compare deeply
503516 for claim in claims :
517+ # Check if the property is in the filter
504518 if claim .mainsnak .property_number in property_filter :
505519 sparql_value = claim .get_sparql_value ()
520+ # If the value exist in the cache
506521 if sparql_value and claim .mainsnak .property_number in self .data and sparql_value in self .data [claim .mainsnak .property_number ]:
507- for statement in self .data [claim .mainsnak .property_number ][sparql_value ]:
508- if entity_filter and statement ['entity' ].rsplit ('/' , 1 )[- 1 ] not in entity_filter :
509- continue
510- if statement ['entity' ] in common_entities :
511- if use_qualifiers :
512- qualifiers = self ._load_qualifiers (statement ['sid' ], limit = 100 )
513-
514- if len (qualifiers ) != len (claim .qualifiers ):
515- logging .debug ("Difference in number of qualifiers, '%i' != '%i'" , len (qualifiers ), len (claim .qualifiers ))
516- return True
517-
518- for qualifier in qualifiers :
519- if qualifier not in claim .qualifiers :
520- logging .debug ("Difference between two qualifiers" )
522+ entity_cache = [statement ['entity' ].rsplit ('/' , 1 )[- 1 ] for statement in self .data [claim .mainsnak .property_number ][sparql_value ]]
523+ if entity_filter :
524+ common_cache_filter = [value for value in entity_cache if value in entity_filter ]
525+ else :
526+ common_cache_filter = entity_cache
527+ # If there is common entities between the cache and the entity_filter
528+ if common_cache_filter :
529+ for statement in self .data [claim .mainsnak .property_number ][sparql_value ]:
530+ if entity_filter and statement ['entity' ].rsplit ('/' , 1 )[- 1 ] not in entity_filter :
531+ continue
532+
533+ if statement ['entity' ] in common_entities :
534+ if use_qualifiers :
535+ qualifiers = self ._load_qualifiers (statement ['sid' ], limit = 100 )
536+
537+ if len (qualifiers ) != len (claim .qualifiers ):
538+ logging .debug ("Difference in number of qualifiers, '%i' != '%i'" , len (qualifiers ), len (claim .qualifiers ))
521539 return True
522540
523- if use_references :
524- references = self ._load_references (statement ['sid' ], limit = 100 )
541+ for qualifier in qualifiers :
542+ if qualifier not in claim .qualifiers :
543+ logging .debug ("Difference between two qualifiers" )
544+ return True
525545
526- if sum (len (ref ) for ref in references ) != sum (len (x ) for x in claim .references ):
527- logging .debug ("Difference in number of references, '%i' != '%i'" , sum (len (ref ) for ref in references ), sum (len (x ) for x in claim .references ))
528- return True
546+ if use_references :
547+ references = self ._load_references (statement ['sid' ], limit = 100 )
529548
530- for reference in references :
531- if reference not in claim .references :
532- logging .debug ("Difference between two references" )
549+ if sum (len (ref ) for ref in references ) != sum (len (x ) for x in claim .references ):
550+ logging .debug ("Difference in number of references, '%i' != '%i'" , sum (len (ref ) for ref in references ), sum (len (x ) for x in claim .references ))
533551 return True
534552
535- if use_rank :
536- rank = self ._load_rank (statement ['sid' ])
553+ for reference in references :
554+ if reference not in claim .references :
555+ logging .debug ("Difference between two references" )
556+ return True
537557
538- if claim .rank != rank :
539- logging .debug ("Difference with the rank" )
540- return True
541- # TODO: Add use_rank to compare rank ?
558+ if use_rank :
559+ rank = self ._load_rank (statement ['sid' ])
560+
561+ if claim .rank != rank :
562+ logging .debug ("Difference with the rank" )
563+ return True
564+ else :
565+ logging .debug ("No common entities between cache and entity_filter" )
566+ return True
567+ # Enable this if the value doesn't exist ?
568+ else :
569+ logging .debug ("Value doesn't already exist in an entity" )
570+ return True
542571
543572 return False
544573
0 commit comments