@@ -106,16 +106,10 @@ def minimal_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
106106 """
107107 source_list = [term .get_iri () for term in self ._term_list ]
108108 object_list = list (set (source_list + SlimManager .get_slim_members (slim_list )))
109- s_result = []
110- for chunk in chunks (object_list , 90 ):
111- s_result .extend (
112- [
113- res
114- for res in run_sparql_query (
115- get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list )
116- )
117- ]
118- )
109+ s_result = self ._batched_enrichment_results (
110+ object_list ,
111+ lambda chunk : get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list ),
112+ )
119113 self .enriched_df = (
120114 pd .DataFrame (s_result , columns = ["s" , "s_label" , "p" , "o" , "o_label" ])
121115 .sort_values ("s" )
@@ -140,9 +134,10 @@ def full_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
140134 """
141135 source_list = [term .get_iri () for term in self ._term_list ]
142136 object_list = list (set (source_list + SlimManager .get_slim_members (slim_list )))
143- s_result = []
144- for chunk in chunks (object_list , 90 ):
145- s_result .extend ([res for res in run_sparql_query (get_full_enrichment_query (source_list , chunk ))])
137+ s_result = self ._batched_enrichment_results (
138+ object_list ,
139+ lambda chunk : get_full_enrichment_query (source_list , chunk ),
140+ )
146141
147142 self .enriched_df = (
148143 pd .DataFrame (s_result , columns = ["s" , "s_label" , "p" , "x" , "x_label" ])
@@ -172,16 +167,10 @@ def contextual_slim_enrichment(self, context: List[str]) -> pd.DataFrame:
172167 query_string = get_contextual_enrichment_query (context )
173168 source_list = [term .get_iri () for term in self ._term_list ]
174169 object_list = list (set (source_list + [res .get ("term" ) for res in run_sparql_query (query_string )]))
175- s_result = []
176- for chunk in chunks (object_list , 90 ):
177- s_result .extend (
178- [
179- res
180- for res in run_sparql_query (
181- get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list )
182- )
183- ]
184- )
170+ s_result = self ._batched_enrichment_results (
171+ object_list ,
172+ lambda chunk : get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list ),
173+ )
185174
186175 self .enriched_df = (
187176 pd .DataFrame (s_result , columns = ["s" , "s_label" , "p" , "o" , "o_label" ])
@@ -219,16 +208,10 @@ def ancestor_enrichment(self, step_count: int) -> pd.DataFrame:
219208 source_list = [term .get_iri () for term in self ._term_list ]
220209 query_string = get_ancestor_enrichment_query (source_list , step_count )
221210 object_list = list (set (uri for res in run_sparql_query (query_string ) for uri in res .values ()))
222- s_result = []
223- for chunk in chunks (object_list , 90 ):
224- s_result .extend (
225- [
226- res
227- for res in run_sparql_query (
228- get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list )
229- )
230- ]
231- )
211+ s_result = self ._batched_enrichment_results (
212+ object_list ,
213+ lambda chunk : get_simple_enrichment_query (source_list , chunk , self ._enrichment_property_list ),
214+ )
232215
233216 self .enriched_df = (
234217 pd .DataFrame (s_result , columns = ["s" , "s_label" , "p" , "o" , "o_label" ])
@@ -365,15 +348,13 @@ def mirror_enrichment_for_graph_generation(self, term_list: List[str]) -> None:
365348 # TODO definitely need a refactoring later on
366349 s_result = []
367350 for s_chunk in chunks (term_list , 45 ):
368- for o_chunk in chunks (term_list , 45 ):
369- s_result .extend (
370- [
371- res
372- for res in run_sparql_query (
373- get_simple_enrichment_query (s_chunk , o_chunk , self ._enrichment_property_list )
374- )
375- ]
351+ s_result .extend (
352+ self ._batched_enrichment_results (
353+ term_list ,
354+ lambda o_chunk : get_simple_enrichment_query (s_chunk , o_chunk , self ._enrichment_property_list ),
355+ chunk_size = 45 ,
376356 )
357+ )
377358 self .graph_df = (
378359 pd .DataFrame (s_result , columns = ["s" , "s_label" , "p" , "o" , "o_label" ])
379360 .sort_values ("s" )
@@ -385,3 +366,16 @@ def _generate_enrichment_graph(self, object_list: List[str]) -> None:
385366 self .mirror_enrichment_for_graph_generation (object_list )
386367 self .graph = GraphGenerator .generate_enrichment_graph (self .graph_df )
387368 self .graph = GraphGenerator .apply_transitive_reduction (self .graph , self .enriched_df ["p" ].unique ().tolist ())
369+
370+ def _batched_enrichment_results (
371+ self ,
372+ object_list : List [str ],
373+ query_builder ,
374+ chunk_size : int = 90 ,
375+ ):
376+ """Execute enrichment queries in batches to avoid oversized SPARQL VALUES blocks."""
377+ results = []
378+ for chunk in chunks (object_list , chunk_size ):
379+ query_string = query_builder (chunk )
380+ results .extend ([res for res in run_sparql_query (query_string )])
381+ return results
0 commit comments