Skip to content

Commit cdb5869

Browse files
committed
use _after_setup hook for setup_properties
ChEB-AI/python-chebai#93
1 parent 52007ad commit cdb5869

File tree

3 files changed

+205
-19
lines changed

3 files changed

+205
-19
lines changed

.github/workflows/black.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
name: Lint
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
lint:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v2
10+
- uses: psf/black@stable

.gitignore

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
docs/build/
74+
75+
# PyBuilder
76+
.pybuilder/
77+
target/
78+
79+
# Jupyter Notebook
80+
.ipynb_checkpoints
81+
82+
# IPython
83+
profile_default/
84+
ipython_config.py
85+
86+
# pyenv
87+
# For a library or package, you might want to ignore these files since the code is
88+
# intended to run in multiple environments; otherwise, check them in:
89+
# .python-version
90+
91+
# pipenv
92+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
94+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
95+
# install all needed dependencies.
96+
#Pipfile.lock
97+
98+
# poetry
99+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100+
# This is especially recommended for binary packages to ensure reproducibility, and is more
101+
# commonly ignored for libraries.
102+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103+
#poetry.lock
104+
105+
# pdm
106+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107+
#pdm.lock
108+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109+
# in version control.
110+
# https://pdm.fming.dev/#use-with-ide
111+
.pdm.toml
112+
113+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114+
__pypackages__/
115+
116+
# Celery stuff
117+
celerybeat-schedule
118+
celerybeat.pid
119+
120+
# SageMath parsed files
121+
*.sage.py
122+
123+
# Environments
124+
.env
125+
.venv
126+
env/
127+
venv/
128+
ENV/
129+
env.bak/
130+
venv.bak/
131+
132+
# Spyder project settings
133+
.spyderproject
134+
.spyproject
135+
136+
# Rope project settings
137+
.ropeproject
138+
139+
# mkdocs documentation
140+
/site
141+
142+
# mypy
143+
.mypy_cache/
144+
.dmypy.json
145+
dmypy.json
146+
147+
# Pyre type checker
148+
.pyre/
149+
150+
# pytype static type analyzer
151+
.pytype/
152+
153+
# Cython debug symbols
154+
cython_debug/
155+
156+
# PyCharm
157+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159+
# and can be added to the global gitignore or merged into this file. For a more nuclear
160+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
161+
#.idea/
162+
163+
# configs/ # commented as new configs can be added as a part of a feature
164+
165+
/.idea
166+
/data
167+
/logs
168+
/results_buffer
169+
electra_pretrained.ckpt
170+
.isort.cfg
171+
/.vscode

chebai_graph/preprocessing/datasets/chebi.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1-
from typing import Optional, List, Callable
1+
import importlib
2+
import os
3+
from typing import Callable, List, Optional
24

5+
import pandas as pd
6+
import torch
7+
import tqdm
8+
from chebai.preprocessing.datasets.base import XYBaseDataModule
39
from chebai.preprocessing.datasets.chebi import (
410
ChEBIOver50,
511
ChEBIOver100,
612
ChEBIOverXPartial,
713
)
8-
from chebai.preprocessing.datasets.base import XYBaseDataModule
914
from lightning_utilities.core.rank_zero import rank_zero_info
15+
from torch_geometric.data.data import Data as GeomData
1016

11-
from chebai_graph.preprocessing.reader import GraphReader, GraphPropertyReader
17+
import chebai_graph.preprocessing.properties as graph_properties
1218
from chebai_graph.preprocessing.properties import (
1319
AtomProperty,
1420
BondProperty,
1521
MolecularProperty,
1622
)
17-
import pandas as pd
18-
from torch_geometric.data.data import Data as GeomData
19-
import torch
20-
import chebai_graph.preprocessing.properties as graph_properties
21-
import importlib
22-
import os
23-
import tqdm
23+
from chebai_graph.preprocessing.reader import GraphPropertyReader, GraphReader
2424

2525

2626
class ChEBI50GraphData(ChEBIOver50):
@@ -84,18 +84,20 @@ def _setup_properties(self):
8484
for file in file_names:
8585
# processed_dir_main only exists for ChEBI datasets
8686
path = os.path.join(
87-
self.processed_dir_main
88-
if hasattr(self, "processed_dir_main")
89-
else self.raw_dir,
87+
(
88+
self.processed_dir_main
89+
if hasattr(self, "processed_dir_main")
90+
else self.raw_dir
91+
),
9092
file,
9193
)
9294
raw_data += list(self._load_dict(path))
9395
idents = [row["ident"] for row in raw_data]
9496
features = [row["features"] for row in raw_data]
9597

9698
# use vectorized version of encode function, apply only if value is present
97-
enc_if_not_none = (
98-
lambda encode, value: [encode(atom_v) for atom_v in value]
99+
enc_if_not_none = lambda encode, value: (
100+
[encode(atom_v) for atom_v in value]
99101
if value is not None and len(value) > 0
100102
else None
101103
)
@@ -134,11 +136,14 @@ def get_property_path(self, property: MolecularProperty):
134136
f"{property.name}_{property.encoder.name}.pt",
135137
)
136138

137-
def setup(self, **kwargs):
138-
super().setup(keep_reader=True, **kwargs)
139-
self._setup_properties()
139+
def _after_setup(self, **kwargs):
140+
"""
141+
Finalize the setup process after ensuring the processed data is available.
140142
141-
self.reader.on_finish()
143+
This method performs post-setup tasks like finalizing the reader and setting internal properties.
144+
"""
145+
self._setup_properties()
146+
super()._after_setup(**kwargs)
142147

143148
def _merge_props_into_base(self, row):
144149
geom_data = row["features"]

0 commit comments

Comments
 (0)