Skip to content

Commit bcccac5

Browse files
author
The TensorFlow Datasets Authors
committed
Merge pull request #5445 from CristianoPizzamiglio:feat-pneumoniamnist-dataset
PiperOrigin-RevId: 675117641
2 parents 8f0f960 + bb5d40c commit bcccac5

File tree

8 files changed

+145
-0
lines changed

8 files changed

+145
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
@article{yang2023medmnist,
2+
title={Medmnist v2-a large-scale lightweight benchmark for 2d and 3d biomedical image classification},
3+
author={Yang, Jiancheng and Shi, Rui and Wei, Donglai and Liu, Zequan and Zhao, Lin and Ke, Bilian and Pfister, Hanspeter and Ni, Bingbing},
4+
journal={Scientific Data},
5+
volume={10},
6+
number={1},
7+
pages={41},
8+
year={2023},
9+
publisher={Nature Publishing Group UK London}
10+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# MedMNIST Pneumonia Dataset
2+
3+
The PneumoniaMNIST is based on a prior dataset of 5,856 pediatric chest X-Ray
4+
images. The task is binary-class classification of pneumonia against normal. The
5+
source training set is split with a ratio of 9:1 into training and validation
6+
set, and use its source validation set as the test set. The source images are
7+
gray-scale, and their sizes are (384–2,916) × (127–2,713). The images are
8+
center-cropped with a window size of length of the short edge and resized into 1
9+
× 28 × 28.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
content.data-type.image # Contains image data.
2+
content.subject.health # Relates to health.
3+
ml.task.image-classification # Relates to Image Classification, a machine learning task.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# coding=utf-8
2+
# Copyright 2024 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
https://zenodo.org/records/10519652/files/pneumoniamnist.npz 4170669 e1792d3f03751cb101e99f19a63b3c1941436c988665f47853417b05be250cd8 pneumoniamnist.npz
2+
https://zenodo.org/records/10519652/files/pneumoniamnist.npz?download=1 4170669 e1792d3f03751cb101e99f19a63b3c1941436c988665f47853417b05be250cd8 pneumoniamnist.npz
Binary file not shown.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# coding=utf-8
2+
# Copyright 2024 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Pneumonia Mnist dataset."""
17+
18+
import numpy as np
19+
from tensorflow_datasets.core.utils.lazy_imports_utils import tensorflow as tf
20+
import tensorflow_datasets.public_api as tfds
21+
22+
23+
class Builder(tfds.core.GeneratorBasedBuilder):
24+
"""DatasetBuilder for Pneumonia Mnist dataset."""
25+
26+
VERSION = tfds.core.Version('1.0.0')
27+
RELEASE_NOTES = {
28+
'1.0.0': 'Initial release.',
29+
}
30+
31+
def _info(self) -> tfds.core.DatasetInfo:
32+
"""Returns the dataset metadata."""
33+
return self.dataset_info_from_configs(
34+
features=tfds.features.FeaturesDict({
35+
'image': tfds.features.Image(shape=(28, 28, 1)),
36+
'label': tfds.features.ClassLabel(names=['Normal', 'Pneumonia']),
37+
}),
38+
supervised_keys=('image', 'label'),
39+
homepage='https://medmnist.com//',
40+
)
41+
42+
def _split_generators(self, dl_manager: tfds.download.DownloadManager):
43+
"""Returns SplitGenerators."""
44+
npz_path = dl_manager.download(
45+
'https://zenodo.org/records/10519652/files/pneumoniamnist.npz'
46+
)
47+
48+
with tf.io.gfile.GFile(npz_path, 'rb') as f:
49+
raw_data = np.load(f)
50+
51+
train_images = np.expand_dims(raw_data.f.train_images, axis=-1)
52+
val_images = np.expand_dims(raw_data.f.val_images, axis=-1)
53+
test_images = np.expand_dims(raw_data.f.test_images, axis=-1)
54+
train_labels = raw_data.f.train_labels.flatten()
55+
val_labels = raw_data.f.val_labels.flatten()
56+
test_labels = raw_data.f.test_labels.flatten()
57+
58+
return {
59+
'train': self._generate_examples(train_images, train_labels),
60+
'val': self._generate_examples(val_images, val_labels),
61+
'test': self._generate_examples(test_images, test_labels),
62+
}
63+
64+
def _generate_examples(self, images, labels):
65+
"""Yields examples."""
66+
for idx, (image, label) in enumerate(zip(images, labels)):
67+
yield idx, {
68+
'image': image,
69+
'label': int(np.squeeze(label)),
70+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# coding=utf-8
2+
# Copyright 2024 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Pneumonia Mnist dataset."""
17+
18+
from tensorflow_datasets.datasets.pneumonia_mnist import pneumonia_mnist_dataset_builder
19+
import tensorflow_datasets.public_api as tfds
20+
21+
22+
class PneumoniaMnistTest(tfds.testing.DatasetBuilderTestCase):
23+
"""Tests for pneumoniamnist dataset."""
24+
25+
DATASET_CLASS = pneumonia_mnist_dataset_builder.Builder
26+
SPLITS = {
27+
'train': 3,
28+
'val': 1,
29+
'test': 1,
30+
}
31+
32+
DL_DOWNLOAD_RESULT = 'pneumoniamnist.npz'
33+
34+
35+
if __name__ == '__main__':
36+
tfds.testing.test_main()

0 commit comments

Comments
 (0)