|
1 | | -"""Wikidata Tool Functions for NLQ2SPARQL LLM Integrations |
| 1 | +"""Wikidata Tool Functions for NLQ2SPARQL LLM Integrations. |
2 | 2 |
|
3 | | -Adapter-backed client creation to avoid touching shared libs. |
| 3 | +Async helpers that delegate to a loop-safe Wikidata client via the |
| 4 | +local integrations adapter. Provides two primary functions: |
| 5 | + - find_entity_id(label) -> QID |
| 6 | + - find_property_id(label) -> PID |
4 | 7 | """ |
5 | 8 | from __future__ import annotations |
6 | | -import asyncio, logging |
| 9 | + |
| 10 | +import asyncio |
| 11 | +import logging |
7 | 12 | from functools import lru_cache |
8 | | -from pathlib import Path |
9 | | -from typing import Optional, Sequence |
10 | | -from integrations.wikidata_adapter import ( |
11 | | - get_wikidata_client, |
12 | | - close_wikidata_client, |
13 | | -) |
| 13 | +from typing import Dict, Optional, Sequence |
| 14 | + |
| 15 | +try: |
| 16 | + from ..integrations.wikidata_adapter import ( |
| 17 | + get_wikidata_client, |
| 18 | + close_wikidata_client, |
| 19 | + ) |
| 20 | +except Exception: |
| 21 | + # Fallback absolute import if package context differs (e.g., scripts) |
| 22 | + from code.nlq2sparql.integrations.wikidata_adapter import ( # type: ignore |
| 23 | + get_wikidata_client, |
| 24 | + close_wikidata_client, |
| 25 | + ) |
| 26 | + |
14 | 27 | logger = logging.getLogger(__name__) |
15 | 28 |
|
| 29 | + |
16 | 30 | async def _get_client(): |
17 | 31 | """Get a loop-safe Wikidata client via the local adapter.""" |
18 | 32 | return await get_wikidata_client() |
| 33 | + |
| 34 | + |
19 | 35 | async def _search_entities_precise(term: str, entity_type: str, limit: int = 1): |
20 | 36 | client = await _get_client() |
21 | 37 | try: |
22 | 38 | return await client.wbsearchentities(term, entity_type=entity_type, limit=limit) |
23 | 39 | except Exception as e: |
24 | | - logger.error("wbsearchentities failed for '%s': %s", term, e); return [] |
| 40 | + logger.error("wbsearchentities failed for '%s': %s", term, e) |
| 41 | + return [] |
| 42 | + |
| 43 | + |
25 | 44 | async def _search_entities_fuzzy(term: str, entity_type: str, limit: int = 5): |
26 | | - if entity_type != 'item': return [] |
| 45 | + if entity_type != "item": |
| 46 | + return [] |
27 | 47 | client = await _get_client() |
28 | 48 | try: |
29 | | - return await client.search(term, limit=limit, entity_type='items') |
| 49 | + return await client.search(term, limit=limit, entity_type="items") |
30 | 50 | except Exception as e: |
31 | | - logger.error("Elastic search failed for '%s': %s", term, e); return [] |
| 51 | + logger.error("Elastic search failed for '%s': %s", term, e) |
| 52 | + return [] |
| 53 | + |
| 54 | + |
32 | 55 | @lru_cache(maxsize=512) |
33 | | -def _normalized_input(s: str) -> str: return ' '.join(s.strip().split()) |
34 | | -from typing import Dict |
| 56 | +def _normalized_input(s: str) -> str: |
| 57 | + return " ".join(s.strip().split()) |
| 58 | + |
35 | 59 |
|
36 | 60 | def _pick_best_candidate(term: str, candidates: Sequence[Dict]) -> Optional[str]: |
37 | | - if not candidates: return None |
| 61 | + if not candidates: |
| 62 | + return None |
38 | 63 | term_lower = term.lower().strip() |
39 | 64 | norm = [] |
40 | 65 | for c in candidates: |
41 | | - cid = c.get('id') or '' |
42 | | - label = c.get('label') or c.get('snippet') or '' |
43 | | - if cid and label: norm.append((cid,label)) |
44 | | - if not norm: return None |
45 | | - for cid,label in norm: |
46 | | - if label.lower()==term_lower: return cid |
47 | | - for cid,label in norm: |
48 | | - if label.lower().startswith(term_lower): return cid |
| 66 | + cid = c.get("id") or "" |
| 67 | + label = c.get("label") or c.get("snippet") or "" |
| 68 | + if cid and label: |
| 69 | + norm.append((cid, label)) |
| 70 | + if not norm: |
| 71 | + return None |
| 72 | + for cid, label in norm: |
| 73 | + if label.lower() == term_lower: |
| 74 | + return cid |
| 75 | + for cid, label in norm: |
| 76 | + if label.lower().startswith(term_lower): |
| 77 | + return cid |
49 | 78 | return norm[0][0] |
| 79 | + |
| 80 | + |
50 | 81 | async def find_entity_id(entity_label: str) -> Optional[str]: |
51 | | - if not entity_label or not entity_label.strip(): return None |
| 82 | + if not entity_label or not entity_label.strip(): |
| 83 | + return None |
52 | 84 | term = _normalized_input(entity_label) |
53 | 85 | # test suite expects limit=1 call on wbsearchentities |
54 | | - precise = await _search_entities_precise(term,'item',1) |
| 86 | + precise = await _search_entities_precise(term, "item", 1) |
55 | 87 | qid = _pick_best_candidate(term, precise) |
56 | | - if qid: return qid |
57 | | - fuzzy = await _search_entities_fuzzy(term,'item',5) |
| 88 | + if qid: |
| 89 | + return qid |
| 90 | + fuzzy = await _search_entities_fuzzy(term, "item", 5) |
58 | 91 | return _pick_best_candidate(term, fuzzy) |
| 92 | + |
| 93 | + |
59 | 94 | async def find_property_id(property_label: str) -> Optional[str]: |
60 | | - if not property_label or not property_label.strip(): return None |
| 95 | + if not property_label or not property_label.strip(): |
| 96 | + return None |
61 | 97 | term = _normalized_input(property_label) |
62 | | - precise = await _search_entities_precise(term,'property',1) |
| 98 | + precise = await _search_entities_precise(term, "property", 1) |
63 | 99 | return _pick_best_candidate(term, precise) |
| 100 | + |
| 101 | + |
64 | 102 | async def _close_session(): |
65 | 103 | await close_wikidata_client() |
| 104 | + |
| 105 | + |
66 | 106 | class WikidataTool: |
67 | 107 | """Lightweight OO wrapper kept for backward compatibility with tests. |
68 | 108 |
|
69 | 109 | Delegates to module-level async functions. |
70 | 110 | """ |
| 111 | + |
71 | 112 | async def find_entity_id(self, label: str): # pragma: no cover simple delegation |
72 | 113 | return await find_entity_id(label) |
73 | 114 |
|
74 | 115 | async def find_property_id(self, label: str): # pragma: no cover |
75 | 116 | return await find_property_id(label) |
76 | 117 |
|
77 | | -__all__ = ['find_entity_id','find_property_id','WikidataTool'] |
78 | | -if __name__ == '__main__': |
| 118 | + |
| 119 | +__all__ = ["find_entity_id", "find_property_id", "WikidataTool"] |
| 120 | + |
| 121 | + |
| 122 | +if __name__ == "__main__": |
79 | 123 | async def _demo(): |
80 | | - print('Entity Bach ->', await find_entity_id('Bach')) |
81 | | - print('Property composer ->', await find_property_id('composer')) |
| 124 | + print("Entity Bach ->", await find_entity_id("Bach")) |
| 125 | + print("Property composer ->", await find_property_id("composer")) |
82 | 126 | await _close_session() |
| 127 | + |
83 | 128 | asyncio.run(_demo()) |
0 commit comments