@@ -114,6 +114,298 @@ def get_data_in_dataframe() -> pd.DataFrame:
114114 pass
115115
116116
117+ class ChebiMockOntology (MockOntologyGraphData ):
118+ """
119+ A mock ontology representing a simplified ChEBI (Chemical Entities of Biological Interest) structure.
120+ This class is used for testing purposes and includes nodes and edges representing chemical compounds
121+ and their relationships in a graph structure.
122+
123+ Nodes:
124+ - CHEBI:12345 (Compound A)
125+ - CHEBI:54321 (Compound B)
126+ - CHEBI:67890 (Compound C)
127+ - CHEBI:11111 (Compound D)
128+ - CHEBI:22222 (Compound E)
129+ - CHEBI:99999 (Compound F)
130+ - CHEBI:77533 (Compound G, Obsolete node)
131+ - CHEBI:77564 (Compound H, Obsolete node)
132+ - CHEBI:88888 (Compound I)
133+
134+ Valid Edges:
135+ - CHEBI:54321 -> CHEBI:12345
136+ - CHEBI:67890 -> CHEBI:12345
137+ - CHEBI:67890 -> CHEBI:88888
138+ - CHEBI:11111 -> CHEBI:54321
139+ - CHEBI:22222 -> CHEBI:67890
140+ - CHEBI:12345 -> CHEBI:99999
141+
142+ The class also includes methods to retrieve nodes, edges, and transitive closure of the graph.
143+
144+ Visual Representation Graph with Valid Nodes and Edges:
145+
146+ 22222
147+ /
148+ 11111 67890
149+ \\ / \
150+ 54321 / 88888
151+ \\ /
152+ 12345
153+ \
154+ 99999
155+ """
156+
157+ @staticmethod
158+ def get_nodes () -> List [int ]:
159+ """
160+ Get the set of valid node IDs in the mock ontology.
161+
162+ Returns:
163+ - Set[int]: A set of integers representing the valid ChEBI node IDs.
164+ """
165+ return [11111 , 12345 , 22222 , 54321 , 67890 , 88888 , 99999 ]
166+
167+ @staticmethod
168+ def get_number_of_nodes () -> int :
169+ """
170+ Get the number of valid nodes in the mock ontology.
171+
172+ Returns:
173+ - int: The number of valid nodes.
174+ """
175+ return len (ChebiMockOntology .get_nodes ())
176+
177+ @staticmethod
178+ def get_edges () -> Set [Tuple [int , int ]]:
179+ """
180+ Get the set of valid edges in the mock ontology.
181+
182+ Returns:
183+ - Set[Tuple[int, int]]: A set of tuples representing the directed edges
184+ between ChEBI nodes.
185+ """
186+ return {
187+ (54321 , 12345 ),
188+ (67890 , 12345 ),
189+ (67890 , 88888 ),
190+ (11111 , 54321 ),
191+ (22222 , 67890 ),
192+ (12345 , 99999 ),
193+ }
194+
195+ @staticmethod
196+ def get_number_of_edges () -> int :
197+ """
198+ Get the number of valid edges in the mock ontology.
199+
200+ Returns:
201+ - int: The number of valid edges.
202+ """
203+ return len (ChebiMockOntology .get_edges ())
204+
205+ @staticmethod
206+ def get_edges_of_transitive_closure_graph () -> Set [Tuple [int , int ]]:
207+ """
208+ Get the set of edges derived from the transitive closure of the mock ontology graph.
209+
210+ Returns:
211+ - Set[Tuple[int, int]]: A set of tuples representing the directed edges
212+ in the transitive closure of the ChEBI graph.
213+ """
214+ return {
215+ (54321 , 12345 ),
216+ (54321 , 99999 ),
217+ (67890 , 12345 ),
218+ (67890 , 99999 ),
219+ (67890 , 88888 ),
220+ (11111 , 54321 ),
221+ (11111 , 12345 ),
222+ (11111 , 99999 ),
223+ (22222 , 67890 ),
224+ (22222 , 12345 ),
225+ (22222 , 99999 ),
226+ (22222 , 88888 ),
227+ (12345 , 99999 ),
228+ }
229+
230+ @staticmethod
231+ def get_number_of_transitive_edges () -> int :
232+ """
233+ Get the number of edges in the transitive closure of the mock ontology graph.
234+
235+ Returns:
236+ - int: The number of edges in the transitive closure graph.
237+ """
238+ return len (ChebiMockOntology .get_edges_of_transitive_closure_graph ())
239+
240+ @staticmethod
241+ def get_obsolete_nodes_ids () -> Set [int ]:
242+ """
243+ Get the set of obsolete node IDs in the mock ontology.
244+
245+ Returns:
246+ - Set[int]: A set of integers representing the obsolete ChEBI node IDs.
247+ """
248+ return {77533 , 77564 }
249+
250+ @staticmethod
251+ def get_raw_data () -> str :
252+ """
253+ Get the raw data representing the mock ontology in OBO format.
254+
255+ Returns:
256+ - str: A string containing the raw OBO data for the mock ChEBI terms.
257+ """
258+ return """
259+ [Term]
260+ id: CHEBI:12345
261+ name: Compound A
262+ subset: 2_STAR
263+ property_value: http://purl.obolibrary.org/obo/chebi/formula "C26H35ClN4O6S" xsd:string
264+ property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string
265+ property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "566.19658" xsd:string
266+ property_value: http://purl.obolibrary.org/obo/chebi/mass "567.099" xsd:string
267+ property_value: http://purl.obolibrary.org/obo/chebi/inchikey "ROXPMFGZZQEKHB-IUKKYPGJSA-N" xsd:string
268+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1" xsd:string
269+ property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/C26H35ClN4O6S/c1-16(2)28-26(34)30(5)14-23-17(3)13-31(18(4)15-32)25(33)21-7-6-8-22(24(21)37-23)29-38(35,36)20-11-9-19(27)10-12-20/h6-12,16-18,23,29,32H,13-15H2,1-5H3,(H,28,34)/t17-,18-,23+/m0/s1" xsd:string
270+ xref: LINCS:LSM-20139
271+ is_a: CHEBI:54321
272+ is_a: CHEBI:67890
273+
274+ [Term]
275+ id: CHEBI:54321
276+ name: Compound B
277+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1O" xsd:string
278+ is_a: CHEBI:11111
279+ is_a: CHEBI:77564
280+
281+ [Term]
282+ id: CHEBI:67890
283+ name: Compound C
284+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1N" xsd:string
285+ is_a: CHEBI:22222
286+
287+ [Term]
288+ id: CHEBI:11111
289+ name: Compound D
290+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1F" xsd:string
291+
292+ [Term]
293+ id: CHEBI:22222
294+ name: Compound E
295+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1Cl" xsd:string
296+
297+ [Term]
298+ id: CHEBI:99999
299+ name: Compound F
300+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1Br" xsd:string
301+ is_a: CHEBI:12345
302+
303+ [Term]
304+ id: CHEBI:77533
305+ name: Compound G
306+ is_a: CHEBI:99999
307+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=C1Br" xsd:string
308+ is_obsolete: true
309+
310+ [Term]
311+ id: CHEBI:77564
312+ name: Compound H
313+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "CC=C1Br" xsd:string
314+ is_obsolete: true
315+
316+ [Typedef]
317+ id: has_major_microspecies_at_pH_7_3
318+ name: has major microspecies at pH 7.3
319+ is_cyclic: true
320+ is_transitive: false
321+
322+ [Term]
323+ id: CHEBI:88888
324+ name: Compound I
325+ property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1[Mg+]" xsd:string
326+ is_a: CHEBI:67890
327+ """
328+
329+ @staticmethod
330+ def get_data_in_dataframe () -> pd .DataFrame :
331+ data = OrderedDict (
332+ id = [
333+ 12345 ,
334+ 54321 ,
335+ 67890 ,
336+ 11111 ,
337+ 22222 ,
338+ 99999 ,
339+ 88888 ,
340+ ],
341+ name = [
342+ "Compound A" ,
343+ "Compound B" ,
344+ "Compound C" ,
345+ "Compound D" ,
346+ "Compound E" ,
347+ "Compound F" ,
348+ "Compound I" ,
349+ ],
350+ SMILES = [
351+ "C1=CC=CC=C1" ,
352+ "C1=CC=CC=C1O" ,
353+ "C1=CC=CC=C1N" ,
354+ "C1=CC=CC=C1F" ,
355+ "C1=CC=CC=C1Cl" ,
356+ "C1=CC=CC=C1Br" ,
357+ "C1=CC=CC=C1[Mg+]" ,
358+ ],
359+ ** {
360+ # -row- [12345, 54321, 67890, 11111, 22222, 99999, 88888]
361+ 11111 : [True , True , False , True , False , True , False ],
362+ 12345 : [True , False , False , False , False , True , False ],
363+ 22222 : [True , False , True , False , True , True , True ],
364+ 54321 : [True , True , False , False , False , True , False ],
365+ 67890 : [True , False , True , False , False , True , True ],
366+ 88888 : [False , False , False , False , False , False , True ],
367+ 99999 : [False , False , False , False , False , True , False ],
368+ },
369+ )
370+
371+ data_df = pd .DataFrame (data )
372+
373+ # ------------- Code Approach -------
374+ # ancestors_of_nodes = {}
375+ # for parent, child in ChebiMockOntology.get_edges_of_transitive_closure_graph():
376+ # if child not in ancestors_of_nodes:
377+ # ancestors_of_nodes[child] = set()
378+ # if parent not in ancestors_of_nodes:
379+ # ancestors_of_nodes[parent] = set()
380+ # ancestors_of_nodes[child].add(parent)
381+ # ancestors_of_nodes[child].add(child)
382+ #
383+ # # For each node in the ontology, create a column to check if it's an ancestor of any other node or itself
384+ # for node in ChebiMockOntology.get_nodes():
385+ # data_df[node] = data_df['id'].apply(
386+ # lambda x: (x == node) or (node in ancestors_of_nodes[x])
387+ # )
388+
389+ return data_df
390+
391+ @staticmethod
392+ def get_transitively_closed_graph () -> nx .DiGraph :
393+ """
394+ Create a directed graph, compute its transitive closure, and return it.
395+
396+ Returns:
397+ g (nx.DiGraph): A transitively closed directed graph.
398+ """
399+ g = nx .DiGraph ()
400+
401+ for node in ChebiMockOntology .get_nodes ():
402+ g .add_node (node , ** {"smiles" : "test_smiles_placeholder" })
403+
404+ g .add_edges_from (ChebiMockOntology .get_edges_of_transitive_closure_graph ())
405+
406+ return g
407+
408+
117409class GOUniProtMockData (MockOntologyGraphData ):
118410 """
119411 A mock ontology representing a simplified version of the Gene Ontology (GO) structure with nodes and edges
0 commit comments