@@ -130,5 +130,146 @@ def test_list_dataset_variants_without_configs(mock_fs: testing.MockFs):
130
130
]
131
131
132
132
133
+ def test_list_datasets_in_data_dir (mock_fs : testing .MockFs ):
134
+ data_dir = epath .Path ('/a' )
135
+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/dataset_info.json' )
136
+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/features.json' )
137
+ mock_fs .add_file (data_dir / 'ds1/config1/2.0.0/dataset_info.json' )
138
+ mock_fs .add_file (data_dir / 'ds1/config1/2.0.0/features.json' )
139
+ mock_fs .add_file (data_dir / 'ds1/config2/1.0.0/dataset_info.json' )
140
+ mock_fs .add_file (data_dir / 'ds1/config2/1.0.0/features.json' )
141
+ mock_fs .add_file (data_dir / 'ds2/1.0.0/dataset_info.json' )
142
+ mock_fs .add_file (data_dir / 'ds2/1.0.0/features.json' )
143
+
144
+ # The following are problematic and should thus be ignored.
145
+ mock_fs .add_file (
146
+ os .path .join (data_dir , 'invalid-name/1.0.0/features.json' ), content = 'x'
147
+ )
148
+ mock_fs .add_file (
149
+ os .path .join (data_dir , 'invalid_version1/1.a.b/features.json' ),
150
+ content = 'x' ,
151
+ )
152
+ mock_fs .add_file (
153
+ os .path .join (data_dir , 'invalid_version2/1.2.3.4/features.json' ),
154
+ content = 'x' ,
155
+ )
156
+
157
+ references = sorted (
158
+ file_utils .list_datasets_in_data_dir (data_dir = epath .Path (data_dir ))
159
+ )
160
+ data_dir = epath .Path ('/a' )
161
+ assert references == [
162
+ naming .DatasetReference (
163
+ dataset_name = 'ds1' ,
164
+ config = 'config1' ,
165
+ version = '1.0.0' ,
166
+ data_dir = data_dir ,
167
+ ),
168
+ naming .DatasetReference (
169
+ dataset_name = 'ds1' ,
170
+ config = 'config1' ,
171
+ version = '2.0.0' ,
172
+ data_dir = data_dir ,
173
+ ),
174
+ naming .DatasetReference (
175
+ dataset_name = 'ds1' ,
176
+ config = 'config2' ,
177
+ version = '1.0.0' ,
178
+ data_dir = data_dir ,
179
+ ),
180
+ naming .DatasetReference (
181
+ dataset_name = 'ds2' , version = '1.0.0' , data_dir = data_dir
182
+ ),
183
+ ]
184
+
185
+
186
+ def test_list_datasets_in_data_dir_with_namespace (mock_fs : testing .MockFs ):
187
+ namespace = 'ns'
188
+ data_dir = epath .Path ('/a' )
189
+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/dataset_info.json' )
190
+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/features.json' )
191
+
192
+ references = sorted (
193
+ file_utils .list_datasets_in_data_dir (
194
+ data_dir = epath .Path (data_dir ),
195
+ namespace = namespace ,
196
+ include_configs = True ,
197
+ include_versions = True ,
198
+ )
199
+ )
200
+ data_dir = epath .Path ('/a' )
201
+ assert references == [
202
+ naming .DatasetReference (
203
+ dataset_name = 'ds1' ,
204
+ namespace = namespace ,
205
+ config = 'config1' ,
206
+ version = '1.0.0' ,
207
+ data_dir = data_dir ,
208
+ ),
209
+ ]
210
+
211
+
212
+ def test_find_files_without_glob (mock_fs : testing .MockFs ):
213
+ folder = epath .Path ('/' )
214
+ mock_fs .add_file (folder / 'a' / 'b' / 'x' )
215
+ mock_fs .add_file (folder / 'a' / 'c' / 'x' )
216
+ mock_fs .add_file (folder / 'b' / 'd' / 'x' )
217
+ mock_fs .add_file (folder / 'b' / 'd' / 'y' ) # Should be ignored.
218
+ mock_fs .add_file (folder / 'b' / '.config' / 'x' ) # Should be ignored.
219
+ mock_fs .add_file (folder / 'b' / 'x' )
220
+ mock_fs .add_file (folder / 'b' / 'y' ) # Should be ignored.
221
+ actual = file_utils ._find_files_without_glob (
222
+ folder , globs = ['*/*' , '*/*/*' ], file_names = ['x' ]
223
+ )
224
+ actual = [os .fspath (p ) for p in actual ]
225
+ assert sorted (actual ) == ['/a/b/x' , '/a/c/x' , '/b/d/x' , '/b/x' ]
226
+
227
+
228
+ @pytest .mark .parametrize (
229
+ ['filename' , 'result' ],
230
+ [
231
+ ('abc' , False ),
232
+ ('dataset_info.json' , True ),
233
+ ('features.json' , True ),
234
+ ('mnist-test.tfrecord-00000-of-00001' , True ),
235
+ ('mnist-test.arrayrecord-00000-of-00001' , True ),
236
+ ],
237
+ )
238
+ def test_looks_like_a_tfds_file (filename , result ):
239
+ assert file_utils ._looks_like_a_tfds_file (filename ) == result
240
+
241
+
242
+ @pytest .mark .parametrize (
243
+ ['path' , 'glob_result' , 'expected' ],
244
+ [
245
+ ('/a/*' , ['/a/b' , '/a/c' ], ['/a/b' , '/a/c' ]),
246
+ ('/a/b' , None , ['/a/b' ]),
247
+ ('a/*' , None , ['a/*' ]),
248
+ ('/a/b@*' , None , ['/a/b@*' ]),
249
+ ],
250
+ )
251
+ def test_expand_glob (path , glob_result , expected ):
252
+ with mock .patch .object (epath , 'Path' ) as mock_epath :
253
+ mock_epath .return_value .expanduser .return_value = path
254
+ mock_epath .return_value .glob .return_value = glob_result
255
+ actual = file_utils .expand_glob (path )
256
+ if glob_result is not None :
257
+ mock_epath .return_value .glob .assert_called_once_with (path [1 :])
258
+ else :
259
+ mock_epath .return_value .glob .assert_not_called ()
260
+ actual = [os .fspath (p ) for p in actual ]
261
+ assert actual == expected
262
+
263
+
264
+ def test_publish_data (mock_fs : testing .MockFs ):
265
+ from_data_dir = epath .Path ('/tmp' ) / 'dummy_mnist/3.0.1'
266
+ filename = 'dataset_info.json'
267
+ content = 'a'
268
+ mock_fs .add_file (path = from_data_dir / filename , content = content )
269
+ to_data_dir = epath .Path ('/a/b' )
270
+ file_utils .publish_data (from_data_dir = from_data_dir , to_data_dir = to_data_dir )
271
+ assert mock_fs .read_file (to_data_dir / filename ) == content
272
+
273
+
133
274
if __name__ == '__main__' :
134
275
testing .test_main ()
0 commit comments