Skip to content

Commit 90c0dc0

Browse files
jpuigcerverThe TensorFlow Datasets Authors
authored andcommitted
Avoid rewriting Oxford IIIT Pet images to disk.
PiperOrigin-RevId: 662817769
1 parent 4673ae1 commit 90c0dc0

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

tensorflow_datasets/datasets/oxford_iiit_pet/oxford_iiit_pet_dataset_builder.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515

1616
"""Oxford-IIIT pet dataset."""
17-
17+
import io
1818
import os
1919
import xml.etree.ElementTree as ET
2020

@@ -119,7 +119,7 @@ class Builder(tfds.core.GeneratorBasedBuilder):
119119
def _info(self):
120120
return self.dataset_info_from_configs(
121121
features=tfds.features.FeaturesDict({
122-
"image": tfds.features.Image(),
122+
"image": tfds.features.Image(encoding_format="jpeg"),
123123
"label": tfds.features.ClassLabel(names=_LABEL_CLASSES),
124124
"species": tfds.features.ClassLabel(names=_SPECIES_CLASSES),
125125
"file_name": tfds.features.Text(),
@@ -175,21 +175,19 @@ def _generate_examples(
175175
image_name, label, species, _ = line.strip().split(" ")
176176

177177
image_path = os.path.join(images_dir_path, image_name + ".jpg")
178+
with epath.Path(image_path).open("rb") as image_file:
179+
img_data = image_file.read()
178180

179181
if image_name in _CORRUPT_SAMPLES:
180182
# some images caused 'Corrupt JPEG data...' messages during training
181183
# or any other iteration recoding them once fixes the issue
182184
# (discussion: https://github.com/tensorflow/datasets/issues/2188)
183-
with epath.Path(image_path).open("rb") as image_file:
184-
img_data = image_file.read()
185-
img_tensor = tf.image.decode_image(img_data)
186-
if (
187-
tf.shape(img_tensor)[-1] == 4
188-
): # some files have an alpha channel -> remove
189-
img_tensor = img_tensor[:, :, :-1]
190-
img_recoded = tf.io.encode_jpeg(img_tensor)
191-
with epath.Path(image_path).open("wb") as image_file:
192-
image_file.write(img_recoded.numpy())
185+
img_tensor = tf.image.decode_image(img_data)
186+
if (
187+
tf.shape(img_tensor)[-1] == 4
188+
): # some files have an alpha channel -> remove
189+
img_tensor = img_tensor[:, :, :-1]
190+
img_data = tf.io.encode_jpeg(img_tensor).numpy()
193191

194192
trimaps_dir_path = os.path.join(annotations_dir_path, "trimaps")
195193
xmls_dir_path = os.path.join(annotations_dir_path, "xmls")
@@ -207,7 +205,7 @@ def _generate_examples(
207205
head_bbox = _EMPTY_BBOX
208206

209207
record = {
210-
"image": os.path.join(images_dir_path, image_name),
208+
"image": io.BytesIO(img_data),
211209
"label": int(label),
212210
"species": species,
213211
"file_name": image_name,

0 commit comments

Comments
 (0)