1818endpoint_url = 'https://query.wikidata.org/sparql'
1919
2020
21- def safe_sparql_query (query ):
21+ def safe_sparql_query (query , time_out = 10 ):
2222
2323 if query in sparql_cache and "nocahe" not in sys .argv :
2424 err_bot .log_error ("SPARQL Cache Hit" , f"Query retrieved from cache: { query } " )
@@ -31,7 +31,7 @@ def safe_sparql_query(query):
3131 sparql .setQuery (query )
3232 # ---
3333 sparql .setReturnFormat (JSON )
34- sparql .setTimeout (10 )
34+ sparql .setTimeout (time_out )
3535 # ---
3636 data = sparql .query ().convert ()
3737 # ---
@@ -61,11 +61,11 @@ def safe_sparql_query(query):
6161 return {}, "SPARQL Unknown Error"
6262
6363
64- def get_results (query ):
64+ def get_results (query , time_out = 10 , get_err = False ):
6565 # ---
6666 now = time .time ()
6767 # ---
68- data , err = safe_sparql_query (query )
68+ data , err = safe_sparql_query (query , time_out = time_out )
6969 # ---
7070 # تنسيق النتائج
7171 result = []
@@ -95,6 +95,9 @@ def get_results(query):
9595 # ---
9696 print (f"SPARQL sparql_exec_time: { sparql_exec_time } " )
9797 # ---
98+ if get_err :
99+ return result , sparql_exec_time , err
100+ # ---
98101 return result , sparql_exec_time
99102
100103
@@ -236,23 +239,26 @@ def count_arabic_with_P11038():
236239 return count , sparql_exec_time
237240
238241
239- def find_duplicates ():
242+ def find_duplicates (LIMIT = 100 ):
240243 sparql_query = """
241- SELECT ?lemma_1 ?category
242- (GROUP_CONCAT(?1_item; separator=", ") AS ?items)
244+ SELECT ?lemma_fixed ?category
245+ (GROUP_CONCAT(strafter(str( ?1_item),"/entity/") ; separator=", ") AS ?items)
243246 (GROUP_CONCAT(?lemma; separator=", ") AS ?lemmas)
244247 WHERE {
245248 #service <https://qlever.cs.uni-freiburg.de/api/wikidata> {
246249 ?1_item dct:language wd:Q13955;
247250 wikibase:lemma ?lemma;
248251 wikibase:lexicalCategory ?category.
249- BIND(REPLACE(STR(?lemma), "[\u064B -\u065F \u066A -\u06EF ]", "") AS ?lemma_1 )
252+ BIND(REPLACE(STR(?lemma), "[\u064B -\u065F \u066A -\u06EF ]", "") AS ?lemma_fixed )
250253 #}
251254 }
252- GROUP BY ?lemma_1 ?category
255+ GROUP BY ?lemma_fixed ?category
253256 HAVING(COUNT(?1_item) > 1)
254- #LIMIT 10
255257 """
256- data , sparql_exec_time = get_results (sparql_query )
257258 # ---
258- return data , sparql_exec_time
259+ if LIMIT > 0 :
260+ sparql_query += f" LIMIT { LIMIT } "
261+ # ---
262+ data , sparql_exec_time , err = get_results (sparql_query , time_out = 35 , get_err = True )
263+ # ---
264+ return data , sparql_exec_time , err
0 commit comments