Skip to content

Commit 07a92bc

Browse files
author
The TensorFlow Datasets Authors
committed
Set both names and IDs in mlc's test fixtures.
PiperOrigin-RevId: 679557238
1 parent d3d5b93 commit 07a92bc

File tree

4 files changed

+26
-15
lines changed

4 files changed

+26
-15
lines changed

tensorflow_datasets/core/dataset_builders/croissant_builder_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,5 +186,5 @@ def test_download_and_prepare(crs_builder, expected_entries, split_name):
186186
data_source = crs_builder.as_data_source(split=split_name)
187187
assert len(data_source) == 2
188188
for i in range(2):
189-
assert data_source[i]["index"] == expected_entries[i]["index"]
190-
assert data_source[i]["text"].decode() == expected_entries[i]["text"]
189+
assert data_source[i]["jsonl/index"] == expected_entries[i]["index"]
190+
assert data_source[i]["jsonl/text"].decode() == expected_entries[i]["text"]

tensorflow_datasets/core/utils/croissant_utils_test.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,15 @@ def test_get_record_set_ids():
4848
mlc.RecordSet(
4949
id='record_set_2',
5050
data_types=['http://mlcommons.org/croissant/Split'],
51-
fields=[mlc.Field(name='name', data_types=mlc.DataType.TEXT)],
52-
data=[{'name': 'train'}, {'name': 'test'}],
51+
fields=[
52+
mlc.Field(
53+
id='record_set_2/name', data_types=mlc.DataType.TEXT
54+
)
55+
],
56+
data=[
57+
{'record_set_2/name': 'train'},
58+
{'record_set_2/name': 'test'},
59+
],
5360
),
5461
],
5562
)
@@ -71,14 +78,14 @@ def test_get_split_recordset():
7178
),
7279
mlc.RecordSet(
7380
id='splits',
74-
key='name',
81+
key='splits/name',
7582
data_types=[mlc.DataType.SPLIT],
7683
fields=[
7784
mlc.Field(
7885
id='splits/name', name='name', data_types=mlc.DataType.TEXT
7986
)
8087
],
81-
data=[{'name': 'train'}, {'name': 'test'}],
88+
data=[{'splits/name': 'train'}, {'splits/name': 'test'}],
8289
),
8390
]
8491
metadata = mlc.Metadata(name='dummy', url='dum.my', record_sets=record_sets)
@@ -95,15 +102,15 @@ def test_get_split_recordset_with_no_split_recordset():
95102
record_sets = [
96103
mlc.RecordSet(
97104
id='labels',
98-
key='name',
105+
key='labels/label',
99106
fields=[
100107
mlc.Field(
101108
id='labels/label',
102109
name='label',
103110
data_types=mlc.DataType.TEXT,
104111
)
105112
],
106-
data=[{'label': 'bird'}, {'label': 'bike'}],
113+
data=[{'labels/label': 'bird'}, {'labels/label': 'bike'}],
107114
),
108115
mlc.RecordSet(
109116
id='samples',

tensorflow_datasets/testing/test_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,8 @@ def dummy_croissant_file(
736736

737737
fields = [
738738
mlc.Field(
739-
name='index',
739+
id='jsonl/index',
740+
name='jsonl/index',
740741
description='The sample index.',
741742
data_types=mlc.DataType.INTEGER,
742743
source=mlc.Source(
@@ -745,7 +746,8 @@ def dummy_croissant_file(
745746
),
746747
),
747748
mlc.Field(
748-
name='text',
749+
id='jsonl/text',
750+
name='jsonl/text',
749751
description='The dummy sample text.',
750752
data_types=mlc.DataType.TEXT,
751753
source=mlc.Source(
@@ -758,6 +760,7 @@ def dummy_croissant_file(
758760
record_sets = [
759761
mlc.RecordSet(
760762
id='jsonl',
763+
name='jsonl',
761764
description='Dummy record set.',
762765
fields=fields,
763766
)
@@ -778,6 +781,7 @@ def dummy_croissant_file(
778781
distribution = [
779782
mlc.FileObject(
780783
id='raw_data',
784+
name='raw_data',
781785
description='File with the data.',
782786
encoding_format='application/jsonlines',
783787
content_url=f'data/{raw_data_filename}',

tensorflow_datasets/testing/test_utils_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def is_lambda(fn):
237237
{'text': 'Dummy example 0', 'index': 0},
238238
],
239239
[
240-
{'text': b'Dummy example 0', 'index': 0},
240+
{'jsonl/text': b'Dummy example 0', 'jsonl/index': 0},
241241
],
242242
),
243243
(
@@ -246,8 +246,8 @@ def is_lambda(fn):
246246
{'text': None, 'index': 1},
247247
],
248248
[
249-
{'text': b'Dummy example 0', 'index': 0},
250-
{'text': None, 'index': 1},
249+
{'jsonl/text': b'Dummy example 0', 'jsonl/index': 0},
250+
{'jsonl/text': None, 'jsonl/index': 1},
251251
],
252252
),
253253
(
@@ -259,8 +259,8 @@ def is_lambda(fn):
259259
(
260260
None,
261261
[
262-
{'text': b'Dummy example 0', 'index': 0},
263-
{'text': b'Dummy example 1', 'index': 1},
262+
{'jsonl/text': b'Dummy example 0', 'jsonl/index': 0},
263+
{'jsonl/text': b'Dummy example 1', 'jsonl/index': 1},
264264
],
265265
),
266266
],

0 commit comments

Comments
 (0)