Skip to content

Commit 9b888a4

Browse files
author
The TensorFlow Datasets Authors
committed
Accept empty list as valid entry for dummy_croissant (as specified in the docstring).
PiperOrigin-RevId: 649873646
1 parent 7b7b708 commit 9b888a4

File tree

2 files changed

+37
-16
lines changed

2 files changed

+37
-16
lines changed

tensorflow_datasets/testing/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ def dummy_croissant_file(
731731
raw_data_filename: Filename of the raw data file.
732732
croissant_filename: Filename of the Croissant JSON-LD file.
733733
"""
734-
if not entries:
734+
if entries is None:
735735
entries = [{'index': i, 'text': f'Dummy example {i}'} for i in range(2)]
736736

737737
fields = [

tensorflow_datasets/testing/test_utils_test.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -230,19 +230,42 @@ def is_lambda(fn):
230230

231231

232232
@pytest.mark.parametrize(
233-
'entries',
233+
'entries,expected_records',
234234
[
235-
[
236-
{'text': 'Dummy example 0', 'index': 0},
237-
{'text': 'Dummy example 1', 'index': 1},
238-
],
239-
[
240-
{'text': 'Dummy example 0', 'index': 0},
241-
{'text': None, 'index': 1},
242-
],
235+
(
236+
[
237+
{'text': 'Dummy example 0', 'index': 0},
238+
],
239+
[
240+
{'text': b'Dummy example 0', 'index': 0},
241+
],
242+
),
243+
(
244+
[
245+
{'text': 'Dummy example 0', 'index': 0},
246+
{'text': None, 'index': 1},
247+
],
248+
[
249+
{'text': b'Dummy example 0', 'index': 0},
250+
{'text': None, 'index': 1},
251+
],
252+
),
253+
(
254+
[],
255+
[],
256+
),
257+
# If entries is None, dummy_croissant_file will create two dummy
258+
# entries.
259+
(
260+
None,
261+
[
262+
{'text': b'Dummy example 0', 'index': 0},
263+
{'text': b'Dummy example 1', 'index': 1},
264+
],
265+
),
243266
],
244267
)
245-
def test_dummy_croissant_file(entries):
268+
def test_dummy_croissant_file(entries, expected_records):
246269
with test_utils.dummy_croissant_file(entries=entries) as croissant_file:
247270
dataset = mlc.Dataset(jsonld=croissant_file)
248271

@@ -255,9 +278,7 @@ def test_dummy_croissant_file(entries):
255278
assert [record_set.id for record_set in dataset.metadata.record_sets] == [
256279
'jsonl'
257280
]
281+
if entries is not None:
282+
assert len(tuple(dataset.records('jsonl'))) == len(expected_records)
258283
for i, record in enumerate(dataset.records('jsonl')):
259-
assert record['index'] == entries[i]['index']
260-
if record['text'] is not None:
261-
assert record['text'].decode() == entries[i]['text']
262-
else:
263-
assert record['text'] == entries[i]['text']
284+
assert record == expected_records[i]

0 commit comments

Comments
 (0)