1717
1818
1919class EndpointSchema :
20+ # FROM <{graph}>
2021 _CLASS_PREDICATE_QUERY = """
2122 SELECT ?class ?predicate COUNT(*) AS ?count
22- FROM <{graph}>
23- WHERE {{
23+ WHERE {
2424 ?s a ?class ;
2525 ?predicate ?o .
26- }}
26+ }
2727 GROUP BY ?class ?predicate
2828 """
2929
3030 _RANGE_QUERY = """
3131 SELECT ?range
32- FROM <{graph}>
3332 WHERE {{
3433 ?s a <{class_name}> ;
3534 <{predicate_name}> ?o .
@@ -49,7 +48,7 @@ class EndpointSchema:
4948 def __init__ (
5049 self ,
5150 endpoint_url : str ,
52- graph : str ,
51+ # graph: str,
5352 limit_schema : dict [str , float ],
5453 max_workers : int ,
5554 force_recompute : bool ,
@@ -59,7 +58,6 @@ def __init__(
5958 Fetch class and predicate information from the SPARQL endpoint.
6059 Args:
6160 endpoint_url (str): The URL of the SPARQL endpoint to connect to.
62- graph (str): The graph URI to query within the endpoint.
6361 limit_queries (dict[str, float]): A dictionary specifying query limits.
6462 max_workers (int): The maximum number of worker threads to use for concurrent operations.
6563 Funtions:
@@ -68,7 +66,7 @@ def __init__(
6866 """
6967
7068 self ._endpoint_url = endpoint_url
71- self ._graph = graph
69+ # self._graph = graph
7270 self ._limit_schema = limit_schema
7371 self ._max_workers = max_workers
7472 self ._force_recompute = force_recompute
@@ -79,7 +77,7 @@ def _save_schema_dict(self) -> None:
7977 # Fetch counts information
8078 logger .info (f"Fetching class-predicate frequency information from { self ._endpoint_url } ..." )
8179 schema = query_sparql (
82- self ._CLASS_PREDICATE_QUERY . format ( graph = self . _graph ) ,
80+ self ._CLASS_PREDICATE_QUERY ,
8381 endpoint_url = self ._endpoint_url ,
8482 check_service_desc = False ,
8583 )["results" ]["bindings" ]
@@ -136,10 +134,9 @@ def _save_schema_dict(self) -> None:
136134 def _retrieve_predicate_information (self , class_name : str , predicate_name : str ) -> list [str ]:
137135 """Fetch ranges for a given predicate of a class"""
138136 try :
139- range = (
137+ pred_range = (
140138 query_sparql (
141139 self ._RANGE_QUERY .format (
142- graph = self ._graph ,
143140 class_name = class_name ,
144141 predicate_name = predicate_name ,
145142 limit = self ._limit_schema ["top_n_ranges" ],
@@ -151,9 +148,9 @@ def _retrieve_predicate_information(self, class_name: str, predicate_name: str)
151148 )
152149
153150 # Filter out unwanted ranges
154- range = [
151+ pred_range = [
155152 r ["range" ]["value" ]
156- for r in range
153+ for r in pred_range
157154 if (
158155 ("range" in r )
159156 and ("value" in r ["range" ])
@@ -162,8 +159,8 @@ def _retrieve_predicate_information(self, class_name: str, predicate_name: str)
162159 ]
163160 except Exception as e :
164161 logger .warning (f"Error retrieving range for { class_name } - { predicate_name } : { e } " )
165- range = []
166- return range
162+ pred_range = []
163+ return pred_range
167164
168165 def get_schema (self ) -> pd .DataFrame :
169166 """Load schema information from a JSON file."""
@@ -186,9 +183,7 @@ def get_schema(self) -> pd.DataFrame:
186183 def plot_heatmap (self , apply_limit : bool = True ) -> None :
187184 # Fetch counts information
188185 logger .info (f"Fetching counts information from { self ._endpoint_url } ..." )
189- counts = query_sparql (self ._CLASS_PREDICATE_QUERY .format (graph = self ._graph ), endpoint_url = self ._endpoint_url )[
190- "results"
191- ]["bindings" ]
186+ counts = query_sparql (self ._CLASS_PREDICATE_QUERY , endpoint_url = self ._endpoint_url )["results" ]["bindings" ]
192187 counts = pd .DataFrame (counts ).map (lambda x : x ["value" ]).assign (count = lambda df : df ["count" ].astype (int ))
193188 counts = counts .sort_values (by = "count" , ascending = False )
194189
@@ -223,30 +218,29 @@ def plot_heatmap(self, apply_limit: bool = True) -> None:
223218
224219if __name__ == "__main__" :
225220 start_time = time .time ()
226- schema = EndpointSchema (
227- endpoint_url = "http://localhost:8890/sparql/" ,
228- graph = "https://text2sparql.aksw.org/2025/corporate/" ,
229- limit_schema = {
230- "top_classes_percentile" : 0 ,
231- "top_n_predicates" : 20 ,
232- "top_n_ranges" : 1 ,
233- },
234- max_workers = 4 ,
235- force_recompute = True ,
236- schema_path = os .path .join ("data" , "benchmarks" , "Text2SPARQL" , "schemas" , "corporate_schema.json" ),
237- )
221+ # schema = EndpointSchema(
222+ # endpoint_url="http://localhost:8890/sparql/",
223+ # graph="https://text2sparql.aksw.org/2025/corporate/",
224+ # limit_schema={
225+ # "top_classes_percentile": 0,
226+ # "top_n_predicates": 20,
227+ # "top_n_ranges": 1,
228+ # },
229+ # max_workers=4,
230+ # force_recompute=True,
231+ # schema_path=os.path.join("data", "benchmarks", "Text2SPARQL", "schemas", "corporate_schema.json"),
232+ # )
238233
239234 schema = EndpointSchema (
240235 endpoint_url = "http://localhost:8890/sparql/" ,
241- graph = "https://text2sparql.aksw.org/2025/dbpedia/" ,
242236 limit_schema = {
243237 "top_classes_percentile" : 0.90 ,
244238 "top_n_predicates" : 20 ,
245239 "top_n_ranges" : 1 ,
246240 },
247241 max_workers = 4 ,
248242 force_recompute = True ,
249- schema_path = os .path .join ("data" , "benchmarks" , "Text2SPARQL" , "schemas" , " dbpedia_schema.json" ),
243+ schema_path = os .path .join ("data" , "dbpedia_schema.json" ),
250244 )
251245
252246 # Debugging examples
0 commit comments