Skip to content

Commit 65d24f8

Browse files
committed
formatted python files according to specification
1 parent 591026b commit 65d24f8

File tree

2 files changed

+173
-91
lines changed

2 files changed

+173
-91
lines changed

tensorflow_datasets/image_classification/wake_vision/wake_vision_dataset_builder.py

Lines changed: 162 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,127 +3,209 @@
33
import tensorflow_datasets.public_api as tfds
44
import os
55

6-
_TRAIN_IMAGE_IDS = [9270406, 9270356, 9270408, 9270367, 9270349, 9270351, 9270390, 9270375, 9270387, 9270370, 9270396, 9270340, 9270411, 9270369, 9270357, 9270378, 9270386, 9270376, 9270341, 9270392, 9270334, 9270404, 9270330, 9270321, 9270364, 9270380, 9270343, 9270335, 9270412, 9270362, 9270339, 9270331, 9270399, 9270410, 9270393, 9270325, 9270346, 9270337, 9270391, 9270361, 9270363, 9270372, 9270326, 9270322, 9270329, 9270381, 9270338, 9270397, 9270405, 9270379, 9270352, 9270400, 9270384, 9270383, 9270388, 9270324, 9270407, 9270348, 9270347, 9270371, 9270358, 9270350, 9270323, 9270401, 9270368, 9270360, 9270328, 9270327, 9270382, 9270332, 9270394, 9270409, 9270345, 9270342, 9270353, 9270403, 9270398, 9270402, 9270395, 9270333, 9270373, 9270336, 9270385, 9270320, 9270366, 9270374, 9270377, 9270354, 9270344, 9270359]
6+
_TRAIN_IMAGE_IDS = [
7+
9270406, 9270356, 9270408, 9270367, 9270349, 9270351, 9270390, 9270375,
8+
9270387, 9270370, 9270396, 9270340, 9270411, 9270369, 9270357, 9270378,
9+
9270386, 9270376, 9270341, 9270392, 9270334, 9270404, 9270330, 9270321,
10+
9270364, 9270380, 9270343, 9270335, 9270412, 9270362, 9270339, 9270331,
11+
9270399, 9270410, 9270393, 9270325, 9270346, 9270337, 9270391, 9270361,
12+
9270363, 9270372, 9270326, 9270322, 9270329, 9270381, 9270338, 9270397,
13+
9270405, 9270379, 9270352, 9270400, 9270384, 9270383, 9270388, 9270324,
14+
9270407, 9270348, 9270347, 9270371, 9270358, 9270350, 9270323, 9270401,
15+
9270368, 9270360, 9270328, 9270327, 9270382, 9270332, 9270394, 9270409,
16+
9270345, 9270342, 9270353, 9270403, 9270398, 9270402, 9270395, 9270333,
17+
9270373, 9270336, 9270385, 9270320, 9270366, 9270374, 9270377, 9270354,
18+
9270344, 9270359
19+
]
720

821
_URLS = {
9-
'train_images': [
22+
'train_images':
23+
[
1024
tfds.download.Resource(
11-
url=f'https://dataverse.harvard.edu/api/access/datafile/{id}?format=original',
25+
url=
26+
f'https://dataverse.harvard.edu/api/access/datafile/{id}?format=original',
1227
extract_method=tfds.download.ExtractMethod.GZIP,
13-
)
14-
for id in _TRAIN_IMAGE_IDS
28+
) for id in _TRAIN_IMAGE_IDS
1529
],
16-
'validation_images': [
30+
'validation_images':
31+
[
1732
tfds.download.Resource(
18-
url='https://dataverse.harvard.edu/api/access/datafile/9270355?format=original',
33+
url=
34+
'https://dataverse.harvard.edu/api/access/datafile/9270355?format=original',
1935
extract_method=tfds.download.ExtractMethod.GZIP,
2036
)
21-
]
22-
,
23-
'test_images': [
37+
],
38+
'test_images':
39+
[
2440
tfds.download.Resource(
25-
url='https://dataverse.harvard.edu/api/access/datafile/9270389?format=original',
41+
url=
42+
'https://dataverse.harvard.edu/api/access/datafile/9270389?format=original',
2643
extract_method=tfds.download.ExtractMethod.GZIP,
2744
)
28-
]
29-
,
30-
'train_large_metadata': 'https://dataverse.harvard.edu/api/access/datafile/9844933?format=original',
31-
'train_quality_metadata': 'https://dataverse.harvard.edu/api/access/datafile/9844934?format=original',
32-
'validation_metadata': 'https://dataverse.harvard.edu/api/access/datafile/10069808?format=original',
33-
'test_metadata': 'https://dataverse.harvard.edu/api/access/datafile/10069809?format=original',
45+
],
46+
'train_large_metadata':
47+
'https://dataverse.harvard.edu/api/access/datafile/9844933?format=original',
48+
'train_quality_metadata':
49+
'https://dataverse.harvard.edu/api/access/datafile/9844934?format=original',
50+
'validation_metadata':
51+
'https://dataverse.harvard.edu/api/access/datafile/10069808?format=original',
52+
'test_metadata':
53+
'https://dataverse.harvard.edu/api/access/datafile/10069809?format=original',
3454
}
3555

56+
3657
class Builder(tfds.core.GeneratorBasedBuilder):
3758
"""DatasetBuilder for wake_vision dataset."""
3859

3960
VERSION = tfds.core.Version('1.0.0')
4061
RELEASE_NOTES = {
41-
'1.0.0': 'Initial TensorFlow Datasets release. Note that this is based on the 2.0 version of Wake Vision on Harvard Dataverse.',
62+
'1.0.0':
63+
'Initial TensorFlow Datasets release. Note that this is based on the 2.0 version of Wake Vision on Harvard Dataverse.',
4264
}
4365

4466
def _info(self) -> tfds.core.DatasetInfo:
4567
"""Returns the dataset metadata."""
4668
return self.dataset_info_from_configs(
47-
description=
48-
"""Wake Vision" is a large, high-quality dataset featuring over 6 million images, significantly exceeding the scale and diversity of current tinyML datasets (100x). This dataset includes images with annotations of whether each image contains a person. Additionally, it incorporates a comprehensive fine-grained benchmark to assess fairness and robustness, covering perceived gender, perceived age, subject distance, lighting conditions, and depictions.
49-
The Wake Vision labels are derived from Open Image's annotations which are licensed by Google LLC under CC BY 4.0 license. The images are listed as having a CC BY 2.0 license. Note from Open Images: "while we tried to identify images that are licensed under a Creative Commons Attribution license, we make no representations or warranties regarding the license status of each image and you should verify the license for each image yourself."""
50-
,
51-
features=tfds.features.FeaturesDict({
52-
# These are the features of your dataset like images, labels ...
53-
'image': tfds.features.Image(shape=(None, None, 3)),
54-
'filename': tfds.features.Text(),
55-
'person': tfds.features.ClassLabel(names=['No', 'Yes']),
56-
'depiction': tfds.features.ClassLabel(names=['No', 'Yes']),
57-
'body_part': tfds.features.ClassLabel(names=['No', 'Yes']),
58-
'predominantly_female': tfds.features.ClassLabel(names=['No', 'Yes']),
59-
'predominantly_male': tfds.features.ClassLabel(names=['No', 'Yes']),
60-
'gender_unknown': tfds.features.ClassLabel(names=['No', 'Yes']),
61-
'young': tfds.features.ClassLabel(names=['No', 'Yes']),
62-
'middle_age': tfds.features.ClassLabel(names=['No', 'Yes']),
63-
'older': tfds.features.ClassLabel(names=['No', 'Yes']),
64-
'age_unknown': tfds.features.ClassLabel(names=['No', 'Yes']),
65-
'near': tfds.features.ClassLabel(names=['No', 'Yes']),
66-
'medium_distance': tfds.features.ClassLabel(names=['No', 'Yes']),
67-
'far': tfds.features.ClassLabel(names=['No', 'Yes']),
68-
'dark': tfds.features.ClassLabel(names=['No', 'Yes']),
69-
'normal_lighting': tfds.features.ClassLabel(names=['No', 'Yes']),
70-
'bright': tfds.features.ClassLabel(names=['No', 'Yes']),
71-
'person_depiction': tfds.features.ClassLabel(names=['No', 'Yes']),
72-
'non-person_depiction': tfds.features.ClassLabel(names=['No', 'Yes']),
73-
'non-person_non-depiction': tfds.features.ClassLabel(names=['No', 'Yes']),
74-
}),
75-
# If there's a common (input, target) tuple from the
76-
# features, specify them here. They'll be used if
77-
# `as_supervised=True` in `builder.as_dataset`.
78-
supervised_keys=('image', 'person'), # Set to `None` to disable
79-
homepage='https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910%2FDVN%2F1HOPXC',
80-
license='See homepage for license information.',
69+
description=
70+
"""Wake Vision" is a large, high-quality dataset featuring over 6 million images, significantly exceeding the scale anddiversity of current tinyML datasets (100x). This dataset includes images with annotations of whether each image contains aperson. Additionally, it incorporates a comprehensive fine-grained benchmark to assess fairness and robustness, coveringperceived gender, perceived age, subject distance, lighting conditions, and depictions.
71+
The Wake Vision labels are derived from Open Image's annotations which are licensed by Google LLC under CC BY 4.0 license. The images are listed as having a CC BY 2.0 license. Note from Open Images: "while we tried to identify images that are licensed under a Creative Commons Attribution license, we make no representations or warranties regarding the license status of each image and you should verify the license for each image yourself.""",
72+
features=tfds.features.FeaturesDict(
73+
{
74+
# These are the features of your dataset like images, labels ...
75+
'image':
76+
tfds.features.Image(shape=(None, None, 3)),
77+
'filename':
78+
tfds.features.Text(),
79+
'person':
80+
tfds.features.ClassLabel(names=['No', 'Yes']),
81+
'depiction':
82+
tfds.features.ClassLabel(names=['No', 'Yes']),
83+
'body_part':
84+
tfds.features.ClassLabel(names=['No', 'Yes']),
85+
'predominantly_female':
86+
tfds.features.ClassLabel(names=['No', 'Yes']),
87+
'predominantly_male':
88+
tfds.features.ClassLabel(names=['No', 'Yes']),
89+
'gender_unknown':
90+
tfds.features.ClassLabel(names=['No', 'Yes']),
91+
'young':
92+
tfds.features.ClassLabel(names=['No', 'Yes']),
93+
'middle_age':
94+
tfds.features.ClassLabel(names=['No', 'Yes']),
95+
'older':
96+
tfds.features.ClassLabel(names=['No', 'Yes']),
97+
'age_unknown':
98+
tfds.features.ClassLabel(names=['No', 'Yes']),
99+
'near':
100+
tfds.features.ClassLabel(names=['No', 'Yes']),
101+
'medium_distance':
102+
tfds.features.ClassLabel(names=['No', 'Yes']),
103+
'far':
104+
tfds.features.ClassLabel(names=['No', 'Yes']),
105+
'dark':
106+
tfds.features.ClassLabel(names=['No', 'Yes']),
107+
'normal_lighting':
108+
tfds.features.ClassLabel(names=['No', 'Yes']),
109+
'bright':
110+
tfds.features.ClassLabel(names=['No', 'Yes']),
111+
'person_depiction':
112+
tfds.features.ClassLabel(names=['No', 'Yes']),
113+
'non-person_depiction':
114+
tfds.features.ClassLabel(names=['No', 'Yes']),
115+
'non-person_non-depiction':
116+
tfds.features.ClassLabel(names=['No', 'Yes']),
117+
}
118+
),
119+
# If there's a common (input, target) tuple from the
120+
# features, specify them here. They'll be used if
121+
# `as_supervised=True` in `builder.as_dataset`.
122+
supervised_keys=('image', 'person'), # Set to `None` to disable
123+
homepage=
124+
'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910%2FDVN%2F1HOPXC',
125+
license='See homepage for license information.',
81126
)
82127

83128
def _split_generators(self, dl_manager: tfds.download.DownloadManager):
84129
"""Returns SplitGenerators."""
85130
paths = dl_manager.download_and_extract(_URLS)
86131

87132
return {
88-
'train_large': self._generate_examples(paths['train_images'], paths['train_large_metadata']),
89-
'train_quality': self._generate_examples(paths['train_images'], paths['train_quality_metadata']),
90-
'validation' : self._generate_examples(paths['validation_images'], paths['validation_metadata']),
91-
'test' : self._generate_examples(paths['test_images'], paths['test_metadata']),
133+
'train_large':
134+
self._generate_examples(
135+
paths['train_images'], paths['train_large_metadata']
136+
),
137+
'train_quality':
138+
self._generate_examples(
139+
paths['train_images'], paths['train_quality_metadata']
140+
),
141+
'validation':
142+
self._generate_examples(
143+
paths['validation_images'], paths['validation_metadata']
144+
),
145+
'test':
146+
self._generate_examples(
147+
paths['test_images'], paths['test_metadata']
148+
),
92149
}
93150

94151
def _generate_examples(self, image_paths, metadata_path):
95152
"""Yields examples."""
96-
metadata = tfds.core.lazy_imports.pandas.read_csv(metadata_path, index_col='filename')
153+
metadata = tfds.core.lazy_imports.pandas.read_csv(
154+
metadata_path, index_col='filename'
155+
)
97156

98157
for tar_file in image_paths:
99-
for sample_path, sample_object in tfds.download.iter_archive(tar_file, tfds.download.ExtractMethod.TAR_STREAM):
158+
for sample_path, sample_object in tfds.download.iter_archive(
159+
tar_file, tfds.download.ExtractMethod.TAR_STREAM
160+
):
100161
file_name = os.path.basename(sample_path)
101162

102163
if file_name not in metadata.index:
103-
continue
164+
continue
104165

105166
sample_metadata = metadata.loc[file_name]
106167

107168
yield file_name, {
108-
'image': sample_object,
109-
'filename': file_name,
110-
'person': sample_metadata['person'],
111-
'depiction': sample_metadata.get('depiction', -1),
112-
'body_part': sample_metadata.get('body_part', -1),
113-
'predominantly_female': sample_metadata.get('predominantly_female', -1),
114-
'predominantly_male': sample_metadata.get('predominantly_male',-1),
115-
'gender_unknown': sample_metadata.get('gender_unknown', -1),
116-
'young': sample_metadata.get('young', -1),
117-
'middle_age': sample_metadata.get('middle_age', -1),
118-
'older': sample_metadata.get('older', -1),
119-
'age_unknown': sample_metadata.get('age_unknown', -1),
120-
'near': sample_metadata.get('near', -1),
121-
'medium_distance': sample_metadata.get('medium_distance', -1),
122-
'far': sample_metadata.get('far', -1),
123-
'dark': sample_metadata.get('dark', -1),
124-
'normal_lighting': sample_metadata.get('normal_lighting', -1),
125-
'bright': sample_metadata.get('bright', -1),
126-
'person_depiction': sample_metadata.get('person_depiction', -1),
127-
'non-person_depiction': sample_metadata.get('non-person_depiction', -1),
128-
'non-person_non-depiction': sample_metadata.get('non-person_non-depiction', -1),
169+
'image':
170+
sample_object,
171+
'filename':
172+
file_name,
173+
'person':
174+
sample_metadata['person'],
175+
'depiction':
176+
sample_metadata.get('depiction', -1),
177+
'body_part':
178+
sample_metadata.get('body_part', -1),
179+
'predominantly_female':
180+
sample_metadata.get('predominantly_female', -1),
181+
'predominantly_male':
182+
sample_metadata.get('predominantly_male', -1),
183+
'gender_unknown':
184+
sample_metadata.get('gender_unknown', -1),
185+
'young':
186+
sample_metadata.get('young', -1),
187+
'middle_age':
188+
sample_metadata.get('middle_age', -1),
189+
'older':
190+
sample_metadata.get('older', -1),
191+
'age_unknown':
192+
sample_metadata.get('age_unknown', -1),
193+
'near':
194+
sample_metadata.get('near', -1),
195+
'medium_distance':
196+
sample_metadata.get('medium_distance', -1),
197+
'far':
198+
sample_metadata.get('far', -1),
199+
'dark':
200+
sample_metadata.get('dark', -1),
201+
'normal_lighting':
202+
sample_metadata.get('normal_lighting', -1),
203+
'bright':
204+
sample_metadata.get('bright', -1),
205+
'person_depiction':
206+
sample_metadata.get('person_depiction', -1),
207+
'non-person_depiction':
208+
sample_metadata.get('non-person_depiction', -1),
209+
'non-person_non-depiction':
210+
sample_metadata.get('non-person_non-depiction', -1),
129211
}

tensorflow_datasets/image_classification/wake_vision/wake_vision_dataset_builder_test.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@ class WakeVisionTest(tfds.testing.DatasetBuilderTestCase):
77
"""Tests for wake_vision dataset."""
88
DATASET_CLASS = wake_vision_dataset_builder.Builder
99
SPLITS = {
10-
'train_large': 16, # Number of fake train image examples
11-
'train_quality': 4, # Number of fake train bbox examples
12-
'validation': 11, # Number of fake validation examples
13-
'test': 10, # Number of fake test examples
10+
'train_large': 16, # Number of fake train image examples
11+
'train_quality': 4, # Number of fake train bbox examples
12+
'validation': 11, # Number of fake validation examples
13+
'test': 10, # Number of fake test examples
1414
}
1515

1616
DL_EXTRACT_RESULT = {
17-
'train_images': ['wake-vision-train-dummy-1.tar.gz', 'wake-vision-train-dummy-2.tar.gz'],
18-
'validation_images': ['wake-vision-validation-dummy.tar.gz'],
19-
'test_images': ['wake-vision-test-dummy.tar.gz'],
20-
'train_large_metadata': 'wake_vision_train_large.csv',
21-
'train_quality_metadata': 'wake_vision_train_quality.csv',
22-
'validation_metadata': 'wake_vision_validation.csv',
23-
'test_metadata': 'wake_vision_test.csv',
17+
'train_images': ['wake-vision-train-dummy-1.tar.gz', 'wake-vision-train-dummy-2.tar.gz'],
18+
'validation_images': ['wake-vision-validation-dummy.tar.gz'],
19+
'test_images': ['wake-vision-test-dummy.tar.gz'],
20+
'train_large_metadata': 'wake_vision_train_large.csv',
21+
'train_quality_metadata': 'wake_vision_train_quality.csv',
22+
'validation_metadata': 'wake_vision_validation.csv',
23+
'test_metadata': 'wake_vision_test.csv',
2424
}
2525

2626

0 commit comments

Comments
 (0)