Skip to content

Commit 2422518

Browse files
committed
changes for fix
1 parent 7221a9e commit 2422518

File tree

5 files changed

+296
-5
lines changed

5 files changed

+296
-5
lines changed

chebai/models/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Any, Dict, Optional, Union, Iterable
2+
from typing import Any, Dict, Iterable, Optional, Union
33

44
import torch
55
from lightning.pytorch.core.module import LightningModule

chebai/preprocessing/collect_all.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytorch_lightning as pl
66
import torch
77
import torch.nn.functional as F
8+
from data import ClassificationData, JCIClassificationData
89
from pytorch_lightning import loggers as pl_loggers
910
from pytorch_lightning.callbacks import ModelCheckpoint
1011
from pytorch_lightning.metrics import F1
@@ -13,8 +14,6 @@
1314
from torch_geometric import nn as tgnn
1415
from torch_geometric.data import DataLoader
1516

16-
from data import ClassificationData, JCIClassificationData
17-
1817
logging.getLogger("pysmiles").setLevel(logging.CRITICAL)
1918

2019

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .base import XYBaseDataModule, _DynamicDataset
1+
from .base import XYBaseDataModule, _DynamicDataset

configs/data/scope/scope50.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
class_path: chebai.preprocessing.datasets.scope.scope.SCOPeOver50
22
init_args:
3-
scope_version: "2.08"
3+
scope_version: "2.08"

tests/unit/mock_data/ontology_mock_data.py

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,298 @@ def get_data_in_dataframe() -> pd.DataFrame:
114114
pass
115115

116116

117+
class ChebiMockOntology(MockOntologyGraphData):
118+
"""
119+
A mock ontology representing a simplified ChEBI (Chemical Entities of Biological Interest) structure.
120+
This class is used for testing purposes and includes nodes and edges representing chemical compounds
121+
and their relationships in a graph structure.
122+
123+
Nodes:
124+
- CHEBI:12345 (Compound A)
125+
- CHEBI:54321 (Compound B)
126+
- CHEBI:67890 (Compound C)
127+
- CHEBI:11111 (Compound D)
128+
- CHEBI:22222 (Compound E)
129+
- CHEBI:99999 (Compound F)
130+
- CHEBI:77533 (Compound G, Obsolete node)
131+
- CHEBI:77564 (Compound H, Obsolete node)
132+
- CHEBI:88888 (Compound I)
133+
134+
Valid Edges:
135+
- CHEBI:54321 -> CHEBI:12345
136+
- CHEBI:67890 -> CHEBI:12345
137+
- CHEBI:67890 -> CHEBI:88888
138+
- CHEBI:11111 -> CHEBI:54321
139+
- CHEBI:22222 -> CHEBI:67890
140+
- CHEBI:12345 -> CHEBI:99999
141+
142+
The class also includes methods to retrieve nodes, edges, and transitive closure of the graph.
143+
144+
Visual Representation Graph with Valid Nodes and Edges:
145+
146+
22222
147+
/
148+
11111 67890
149+
\\ / \
150+
54321 / 88888
151+
\\ /
152+
12345
153+
\
154+
99999
155+
"""
156+
157+
@staticmethod
158+
def get_nodes() -> List[int]:
159+
"""
160+
Get the set of valid node IDs in the mock ontology.
161+
162+
Returns:
163+
- Set[int]: A set of integers representing the valid ChEBI node IDs.
164+
"""
165+
return [11111, 12345, 22222, 54321, 67890, 88888, 99999]
166+
167+
@staticmethod
168+
def get_number_of_nodes() -> int:
169+
"""
170+
Get the number of valid nodes in the mock ontology.
171+
172+
Returns:
173+
- int: The number of valid nodes.
174+
"""
175+
return len(ChebiMockOntology.get_nodes())
176+
177+
@staticmethod
178+
def get_edges() -> Set[Tuple[int, int]]:
179+
"""
180+
Get the set of valid edges in the mock ontology.
181+
182+
Returns:
183+
- Set[Tuple[int, int]]: A set of tuples representing the directed edges
184+
between ChEBI nodes.
185+
"""
186+
return {
187+
(54321, 12345),
188+
(67890, 12345),
189+
(67890, 88888),
190+
(11111, 54321),
191+
(22222, 67890),
192+
(12345, 99999),
193+
}
194+
195+
@staticmethod
196+
def get_number_of_edges() -> int:
197+
"""
198+
Get the number of valid edges in the mock ontology.
199+
200+
Returns:
201+
- int: The number of valid edges.
202+
"""
203+
return len(ChebiMockOntology.get_edges())
204+
205+
@staticmethod
206+
def get_edges_of_transitive_closure_graph() -> Set[Tuple[int, int]]:
207+
"""
208+
Get the set of edges derived from the transitive closure of the mock ontology graph.
209+
210+
Returns:
211+
- Set[Tuple[int, int]]: A set of tuples representing the directed edges
212+
in the transitive closure of the ChEBI graph.
213+
"""
214+
return {
215+
(54321, 12345),
216+
(54321, 99999),
217+
(67890, 12345),
218+
(67890, 99999),
219+
(67890, 88888),
220+
(11111, 54321),
221+
(11111, 12345),
222+
(11111, 99999),
223+
(22222, 67890),
224+
(22222, 12345),
225+
(22222, 99999),
226+
(22222, 88888),
227+
(12345, 99999),
228+
}
229+
230+
@staticmethod
231+
def get_number_of_transitive_edges() -> int:
232+
"""
233+
Get the number of edges in the transitive closure of the mock ontology graph.
234+
235+
Returns:
236+
- int: The number of edges in the transitive closure graph.
237+
"""
238+
return len(ChebiMockOntology.get_edges_of_transitive_closure_graph())
239+
240+
@staticmethod
241+
def get_obsolete_nodes_ids() -> Set[int]:
242+
"""
243+
Get the set of obsolete node IDs in the mock ontology.
244+
245+
Returns:
246+
- Set[int]: A set of integers representing the obsolete ChEBI node IDs.
247+
"""
248+
return {77533, 77564}
249+
250+
@staticmethod
251+
def get_raw_data() -> str:
252+
"""
253+
Get the raw data representing the mock ontology in OBO format.
254+
255+
Returns:
256+
- str: A string containing the raw OBO data for the mock ChEBI terms.
257+
"""
258+
return """
259+
[Term]
260+
id: CHEBI:12345
261+
name: Compound A
262+
subset: 2_STAR
263+
property_value: http://purl.obolibrary.org/obo/chebi/formula "C26H35ClN4O6S" xsd:string
264+
property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string
265+
property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "566.19658" xsd:string
266+
property_value: http://purl.obolibrary.org/obo/chebi/mass "567.099" xsd:string
267+
property_value: http://purl.obolibrary.org/obo/chebi/inchikey "ROXPMFGZZQEKHB-IUKKYPGJSA-N" xsd:string
268+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1" xsd:string
269+
property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/C26H35ClN4O6S/c1-16(2)28-26(34)30(5)14-23-17(3)13-31(18(4)15-32)25(33)21-7-6-8-22(24(21)37-23)29-38(35,36)20-11-9-19(27)10-12-20/h6-12,16-18,23,29,32H,13-15H2,1-5H3,(H,28,34)/t17-,18-,23+/m0/s1" xsd:string
270+
xref: LINCS:LSM-20139
271+
is_a: CHEBI:54321
272+
is_a: CHEBI:67890
273+
274+
[Term]
275+
id: CHEBI:54321
276+
name: Compound B
277+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1O" xsd:string
278+
is_a: CHEBI:11111
279+
is_a: CHEBI:77564
280+
281+
[Term]
282+
id: CHEBI:67890
283+
name: Compound C
284+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1N" xsd:string
285+
is_a: CHEBI:22222
286+
287+
[Term]
288+
id: CHEBI:11111
289+
name: Compound D
290+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1F" xsd:string
291+
292+
[Term]
293+
id: CHEBI:22222
294+
name: Compound E
295+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1Cl" xsd:string
296+
297+
[Term]
298+
id: CHEBI:99999
299+
name: Compound F
300+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1Br" xsd:string
301+
is_a: CHEBI:12345
302+
303+
[Term]
304+
id: CHEBI:77533
305+
name: Compound G
306+
is_a: CHEBI:99999
307+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=C1Br" xsd:string
308+
is_obsolete: true
309+
310+
[Term]
311+
id: CHEBI:77564
312+
name: Compound H
313+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "CC=C1Br" xsd:string
314+
is_obsolete: true
315+
316+
[Typedef]
317+
id: has_major_microspecies_at_pH_7_3
318+
name: has major microspecies at pH 7.3
319+
is_cyclic: true
320+
is_transitive: false
321+
322+
[Term]
323+
id: CHEBI:88888
324+
name: Compound I
325+
property_value: http://purl.obolibrary.org/obo/chebi/smiles "C1=CC=CC=C1[Mg+]" xsd:string
326+
is_a: CHEBI:67890
327+
"""
328+
329+
@staticmethod
330+
def get_data_in_dataframe() -> pd.DataFrame:
331+
data = OrderedDict(
332+
id=[
333+
12345,
334+
54321,
335+
67890,
336+
11111,
337+
22222,
338+
99999,
339+
88888,
340+
],
341+
name=[
342+
"Compound A",
343+
"Compound B",
344+
"Compound C",
345+
"Compound D",
346+
"Compound E",
347+
"Compound F",
348+
"Compound I",
349+
],
350+
SMILES=[
351+
"C1=CC=CC=C1",
352+
"C1=CC=CC=C1O",
353+
"C1=CC=CC=C1N",
354+
"C1=CC=CC=C1F",
355+
"C1=CC=CC=C1Cl",
356+
"C1=CC=CC=C1Br",
357+
"C1=CC=CC=C1[Mg+]",
358+
],
359+
**{
360+
# -row- [12345, 54321, 67890, 11111, 22222, 99999, 88888]
361+
11111: [True, True, False, True, False, True, False],
362+
12345: [True, False, False, False, False, True, False],
363+
22222: [True, False, True, False, True, True, True],
364+
54321: [True, True, False, False, False, True, False],
365+
67890: [True, False, True, False, False, True, True],
366+
88888: [False, False, False, False, False, False, True],
367+
99999: [False, False, False, False, False, True, False],
368+
},
369+
)
370+
371+
data_df = pd.DataFrame(data)
372+
373+
# ------------- Code Approach -------
374+
# ancestors_of_nodes = {}
375+
# for parent, child in ChebiMockOntology.get_edges_of_transitive_closure_graph():
376+
# if child not in ancestors_of_nodes:
377+
# ancestors_of_nodes[child] = set()
378+
# if parent not in ancestors_of_nodes:
379+
# ancestors_of_nodes[parent] = set()
380+
# ancestors_of_nodes[child].add(parent)
381+
# ancestors_of_nodes[child].add(child)
382+
#
383+
# # For each node in the ontology, create a column to check if it's an ancestor of any other node or itself
384+
# for node in ChebiMockOntology.get_nodes():
385+
# data_df[node] = data_df['id'].apply(
386+
# lambda x: (x == node) or (node in ancestors_of_nodes[x])
387+
# )
388+
389+
return data_df
390+
391+
@staticmethod
392+
def get_transitively_closed_graph() -> nx.DiGraph:
393+
"""
394+
Create a directed graph, compute its transitive closure, and return it.
395+
396+
Returns:
397+
g (nx.DiGraph): A transitively closed directed graph.
398+
"""
399+
g = nx.DiGraph()
400+
401+
for node in ChebiMockOntology.get_nodes():
402+
g.add_node(node, **{"smiles": "test_smiles_placeholder"})
403+
404+
g.add_edges_from(ChebiMockOntology.get_edges_of_transitive_closure_graph())
405+
406+
return g
407+
408+
117409
class GOUniProtMockData(MockOntologyGraphData):
118410
"""
119411
A mock ontology representing a simplified version of the Gene Ontology (GO) structure with nodes and edges

0 commit comments

Comments
 (0)