Skip to content

Commit 2f6c156

Browse files
machcThe TensorFlow Datasets Authors
authored andcommitted
Add QM9 checksums, README.md and CITATIONS.bib.
PiperOrigin-RevId: 616084769
1 parent 89440d4 commit 2f6c156

File tree

6 files changed

+38
-25
lines changed

6 files changed

+38
-25
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
@article{ramakrishnan2014quantum,
2+
title={Quantum chemistry structures and properties of 134 kilo molecules},
3+
author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and von Lilienfeld, O Anatole},
4+
journal={Scientific Data},
5+
volume={1},
6+
year={2014},
7+
publisher={Nature Publishing Group}
8+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
QM9 consists of computed geometric, energetic, electronic, and thermodynamic
2+
properties for 134k stable small organic molecules made up of CHONF. As usual,
3+
we remove the uncharacterized molecules and provide the remaining 130,831 in the
4+
original order (not shuffled). We provide a single 'train' split, users are
5+
expected to make their own validation/test splits.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# coding=utf-8
2+
# Copyright 2024 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
https://figshare.com/ndownloader/files/3195395 964 af739d4a0fbe894a56f346ad6045ee1b74c58f8a8c9ef3fbfed920c9ad8b00f9 atomref.txt
2+
https://springernature.figshare.com/ndownloader/files/3195389 86144227 3a63848ac80691bdb8d41834b575afad345b9300d7a2db0c38adb7f6eaa8360c dsgdb9nsd.xyz.tar.bz2
3+
https://springernature.figshare.com/ndownloader/files/3195404 486752 3aa5115d540b356de94791d4a74c3bf1ed91c469ecf52a4f5d7cc0506fe02e24 uncharacterized.txt

tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,6 @@
3030

3131
pd = tfds.core.lazy_imports.pandas
3232

33-
_DESCRIPTION = """\
34-
QM9 consists of computed geometric, energetic, electronic, and thermodynamic
35-
properties for 134k stable small organic molecules made up of CHONF.
36-
"""
37-
38-
_CITATION = """\
39-
@article{ramakrishnan2014quantum,
40-
title={Quantum chemistry structures and properties of 134 kilo molecules},
41-
author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and von Lilienfeld, O Anatole},
42-
journal={Scientific Data},
43-
volume={1},
44-
year={2014},
45-
publisher={Nature Publishing Group}
46-
}
47-
"""
48-
4933
_HOMEPAGE = 'https://doi.org/10.6084/m9.figshare.c.978904.v5'
5034

5135
_ATOMREF_URL = 'https://figshare.com/ndownloader/files/3195395'
@@ -145,15 +129,14 @@ class Builder(tfds.core.GeneratorBasedBuilder):
145129

146130
def _info(self) -> tfds.core.DatasetInfo:
147131
"""Returns the dataset metadata."""
148-
return tfds.core.DatasetInfo(
149-
builder=self,
150-
description=_DESCRIPTION,
132+
return self.dataset_info_from_configs(
151133
disable_shuffling=True,
152134
features=tfds.features.FeaturesDict({
153135
'num_atoms': tfds.features.Tensor(shape=(), dtype=np.int64),
154136
'charges': tfds.features.Tensor(shape=(29,), dtype=np.int64),
155-
'Mulliken_charges': tfds.features.Tensor(shape=(29,),
156-
dtype=np.float32),
137+
'Mulliken_charges': tfds.features.Tensor(
138+
shape=(29,), dtype=np.float32
139+
),
157140
'positions': tfds.features.Tensor(shape=(29, 3), dtype=np.float32),
158141
'index': tfds.features.Tensor(shape=(), dtype=np.int64),
159142
'A': tfds.features.Tensor(shape=(), dtype=np.float32),
@@ -180,13 +163,13 @@ def _info(self) -> tfds.core.DatasetInfo:
180163
'SMILES_relaxed': tfds.features.Tensor(shape=(), dtype=np.str_),
181164
'InChI': tfds.features.Tensor(shape=(), dtype=np.str_),
182165
'InChI_relaxed': tfds.features.Tensor(shape=(), dtype=np.str_),
183-
'frequencies': tfds.features.Tensor(shape=(None,),
184-
dtype=np.float32),
166+
'frequencies': tfds.features.Tensor(
167+
shape=(None,), dtype=np.float32
168+
),
185169
}),
186170
# These are returned if `as_supervised=True` in `builder.as_dataset`.
187171
supervised_keys=None,
188172
homepage=_HOMEPAGE,
189-
citation=_CITATION,
190173
)
191174

192175
def _split_generators(

tensorflow_datasets/datasets/qm9/qm9_dataset_builder_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class Qm9Test(testing.DatasetBuilderTestCase):
2929
qm9_dataset_builder._VALIDATION_SIZE = 1
3030
qm9_dataset_builder._TEST_SIZE = 1
3131

32-
SKIP_CHECKSUMS = True
3332
DATASET_CLASS = qm9_dataset_builder.Builder
3433

3534
DL_EXTRACT_RESULT = {

0 commit comments

Comments
 (0)