16
16
from unittest import mock
17
17
18
18
import datasets as hf_datasets
19
+ import numpy as np
19
20
import pytest
21
+ from tensorflow_datasets .core import lazy_imports_lib
20
22
from tensorflow_datasets .core .dataset_builders import huggingface_dataset_builder
21
23
24
+ PIL_Image = lazy_imports_lib .lazy_imports .PIL_Image
25
+
22
26
23
27
class DummyHuggingfaceBuilder (hf_datasets .GeneratorBasedBuilder ):
24
28
@@ -35,7 +39,11 @@ def _split_generators(self, dl_manager):
35
39
36
40
def _generate_examples (self ):
37
41
for i in range (2 ):
38
- yield i , {'feature' : i }
42
+ yield i , {
43
+ 'number' : i ,
44
+ 'text' : f'{ i } ' ,
45
+ 'image' : PIL_Image .new (mode = 'L' , size = (4 , 4 )),
46
+ }
39
47
40
48
def download_and_prepare (self , * args , ** kwargs ):
41
49
# Disable downloads from GCS
@@ -87,8 +95,17 @@ def mock_huggingface_dataset_builder(
87
95
def test_download_and_prepare (builder ):
88
96
builder .download_and_prepare ()
89
97
ds = builder .as_data_source ()
98
+ expected_image = PIL_Image .new (mode = 'RGB' , size = (4 , 4 ))
90
99
# Split names are sanitized, eg train.clean -> train_clean
91
- assert list (ds ['train_clean' ]) == [{'feature' : 0 }, {'feature' : 1 }]
100
+ for element , expected in zip (
101
+ ds ['train_clean' ],
102
+ [
103
+ {'number' : 0 , 'text' : b'0' , 'image' : expected_image },
104
+ {'number' : 1 , 'text' : b'1' , 'image' : expected_image },
105
+ ],
106
+ ):
107
+ for feature in ['number' , 'text' , 'image' ]:
108
+ assert np .array_equal (element [feature ], expected [feature ])
92
109
93
110
94
111
def test_all_parameters_are_passed_down_to_hf (builder ):
@@ -98,4 +115,8 @@ def test_all_parameters_are_passed_down_to_hf(builder):
98
115
99
116
100
117
def test_hf_features (builder ):
101
- assert builder ._hf_features () == {'feature' : hf_datasets .Value ('int64' )}
118
+ assert builder ._hf_features () == {
119
+ 'number' : hf_datasets .Value ('int64' ),
120
+ 'text' : hf_datasets .Value ('string' ),
121
+ 'image' : hf_datasets .Image (),
122
+ }
0 commit comments