Skip to content

Commit f28cfbd

Browse files
authored
Remove ase dependency and update ROY dataset (#232)
1 parent bc76938 commit f28cfbd

File tree

7 files changed

+26
-61
lines changed

7 files changed

+26
-61
lines changed

examples/selection/GCH-ROY.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,9 @@
2727

2828
roy_data = load_roy_dataset()
2929

30-
structures = roy_data["structures"]
31-
32-
density = np.array([s.info["density"] for s in structures])
33-
energy = np.array([s.info["energy"] for s in structures])
34-
structype = np.array([s.info["type"] for s in structures])
30+
density = roy_data["densities"]
31+
energy = roy_data["energies"]
32+
structype = roy_data["structure_types"]
3533
iknown = np.where(structype == "known")[0]
3634
iothers = np.where(structype != "known")[0]
3735

@@ -247,3 +245,5 @@
247245
},
248246
)
249247
"""
248+
249+
# %%

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ dynamic = ["version"]
4444

4545
[project.optional-dependencies]
4646
examples = [
47-
"ase",
4847
"matplotlib",
4948
"pandas",
5049
"tqdm",

src/skmatter/datasets/_base.py

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -119,32 +119,27 @@ def load_who_dataset():
119119

120120

121121
def load_roy_dataset():
122-
"""Load and returns the ROY dataset, which contains structures,
123-
energies and SOAP-derived descriptors for 264 polymorphs of ROY, from [Beran et Al,
124-
Chemical Science (2022)](https://doi.org/10.1039/D1SC06074K)
122+
"""Load and returns the ROY dataset, which contains densities,
123+
energies and SOAP-derived descriptors for 264 structures of polymorphs of ROY,
124+
from [Beran et Al, Chemical Science (2022)](https://doi.org/10.1039/D1SC06074K)
125+
Each structure is labeled as "Known" or "Unknown".
125126
126127
Returns
127128
-------
128129
roy_dataset : sklearn.utils.Bunch
129130
Dictionary-like object, with the following attributes:
130-
structures : `ase.Atoms` -- the roy structures as ASE objects
131-
features: `np.array` -- SOAP-derived descriptors for the structures
132-
energies: `np.array` -- energies of the structures
131+
densities : `np.array` -- the densities of the structures
132+
structure_types : `np.array` -- the type of the structures
133+
features : `np.array` -- SOAP-derived descriptors for the structures
134+
energies : `np.array` -- energies of the structures
133135
"""
134136
module_path = dirname(__file__)
135-
target_structures = join(module_path, "data", "beran_roy_structures.xyz.bz2")
136-
137-
try:
138-
from ase.io import read
139-
except ImportError:
140-
raise ImportError("load_roy_dataset requires the ASE package.")
141-
142-
import bz2
143-
144-
structures = read(bz2.open(target_structures, "rt"), ":", format="extxyz")
145-
energies = np.array([f.info["energy"] for f in structures])
146-
147-
target_features = join(module_path, "data", "beran_roy_features.npz")
148-
features = np.load(target_features)["feats"]
149-
150-
return Bunch(structures=structures, features=features, energies=energies)
137+
target_properties = join(module_path, "data", "beran_roy_properties.npz")
138+
properties = np.load(target_properties)
139+
140+
return Bunch(
141+
densities=properties["densities"],
142+
energies=properties["energies"],
143+
structure_types=properties["structure_types"],
144+
features=properties["feats"],
145+
)
-66.3 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/test_datasets.py

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -107,45 +107,16 @@ class ROYTests(unittest.TestCase):
107107
def setUpClass(cls):
108108
cls.size = 264
109109
cls.shape = (264, 32)
110-
try:
111-
from ase.io import read # NoQa: F401
112-
113-
cls.has_ase = True
114-
cls.roy = load_roy_dataset()
115-
except ImportError:
116-
cls.has_ase = False
117-
118-
def test_load_dataset_without_ase(self):
119-
"""Check if the correct exception occurs when ase isn't present."""
120-
with unittest.mock.patch.dict("sys.modules", {"ase.io": None}):
121-
with self.assertRaises(ImportError) as cm:
122-
_ = load_roy_dataset()
123-
self.assertEqual(
124-
str(cm.exception), "load_roy_dataset requires the ASE package."
125-
)
110+
cls.roy = load_roy_dataset()
126111

127112
def test_dataset_content(self):
128113
"""Check if the correct number of datapoints are present in the dataset.
129114
130115
Also check if the size of the dataset is correct.
131116
"""
132-
if self.has_ase is True:
133-
self.assertEqual(len(self.roy["structures"]), self.size)
134-
self.assertEqual(self.roy["features"].shape, self.shape)
135-
self.assertEqual(len(self.roy["energies"]), self.size)
136-
137-
def test_dataset_consistency(self):
138-
"""Check if the energies in the structures are the same as in the explicit
139-
array.
140-
"""
141-
if self.has_ase is True:
142-
self.assertTrue(
143-
np.allclose(
144-
self.roy["energies"],
145-
[f.info["energy"] for f in self.roy["structures"]],
146-
rtol=1e-6,
147-
)
148-
)
117+
self.assertEqual(len(self.roy["structure_types"]), self.size)
118+
self.assertEqual(self.roy["features"].shape, self.shape)
119+
self.assertEqual(len(self.roy["energies"]), self.size)
149120

150121

151122
if __name__ == "__main__":

0 commit comments

Comments
 (0)