Skip to content

Commit a08d8d5

Browse files
marcenacpThe TensorFlow Datasets Authors
authored andcommitted
Add a test testing that there is no VERSION < max(RELEASE_NOTE_VERSION).
PiperOrigin-RevId: 688061651
1 parent ae9467a commit a08d8d5

File tree

7 files changed

+81
-6
lines changed

7 files changed

+81
-6
lines changed

.github/workflows/pytest-template.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ jobs:
6161
# * github_api is run separately to not overuse API quota.
6262
# * wmt is run separately to avoid worker hanging.
6363
# * Huggingface requires `datasets` library.
64+
# * version_test loads datasets that fail in OSS (e.g. smart_buildings).
6465
- name: Run core tests
6566
run: |
6667
pytest --durations=100 -vv -n auto --shard-id=$((${{ matrix.shard-id }} - 1)) --num-shards=${{ matrix.num-shards }} \
@@ -72,7 +73,8 @@ jobs:
7273
--ignore="tensorflow_datasets/core/github_api/github_path_test.py" \
7374
--ignore="tensorflow_datasets/translate/wmt19_test.py" \
7475
--ignore="tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py" \
75-
--ignore="tensorflow_datasets/core/utils/huggingface_utils_test.py"
76+
--ignore="tensorflow_datasets/core/utils/huggingface_utils_test.py" \
77+
--ignore="tensorflow_datasets/testing/version_test.py"
7678
7779
# Run tests without any pytest plugins. The tests should be triggered for a single shard only.
7880
- name: Run leftover tests

tensorflow_datasets/datasets/asqa/asqa_dataset_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _features():
8080
class Builder(tfds.core.GeneratorBasedBuilder):
8181
"""DatasetBuilder for asqa dataset."""
8282

83-
VERSION = tfds.core.Version('1.0.0')
83+
VERSION = tfds.core.Version('2.0.0')
8484
RELEASE_NOTES = {
8585
'2.0.0': 'Sample ID goes from int32 (overflowing) to int64.',
8686
'1.0.0': 'Initial release.',

tensorflow_datasets/datasets/duke_ultrasound/duke_ultrasound_dataset_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
class Builder(tfds.core.GeneratorBasedBuilder):
4343
"""DAS beamformed phantom images and paired post-processed images."""
4444

45-
VERSION = tfds.core.Version('1.0.1')
45+
VERSION = tfds.core.Version('2.0.0')
4646
RELEASE_NOTES = {
4747
'2.0.0': r'Fix timestamp_id from %Y%m%d%H%M%S to posix timestamp.',
4848
'1.0.1': 'Fixes parsing of boolean field `harmonic`.',

tensorflow_datasets/datasets/imagenet_v2/imagenet_v2_dataset_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def _create_builder_configs():
6969
class Builder(tfds.core.GeneratorBasedBuilder):
7070
"""An ImageNet test set recollected by following the original protocol."""
7171

72-
VERSION = tfds.core.Version('3.0.0')
72+
VERSION = tfds.core.Version('3.1.0')
7373
SUPPORTED_VERSIONS = [
7474
tfds.core.Version('2.0.0'),
7575
]

tensorflow_datasets/datasets/svhn_cropped/svhn_cropped_dataset_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class Builder(tfds.core.GeneratorBasedBuilder):
2727
"""Street View House Numbers (SVHN) Dataset, cropped version."""
2828

29-
VERSION = tfds.core.Version("3.0.0")
29+
VERSION = tfds.core.Version("3.1.0")
3030
SUPPORTED_VERSIONS = [
3131
tfds.core.Version("3.1.0"),
3232
]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# coding=utf-8
2+
# Copyright 2024 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import dataclasses
17+
import inspect
18+
19+
import tensorflow_datasets as tfds
20+
21+
22+
@dataclasses.dataclass(frozen=True, kw_only=True)
23+
class _BuilderWithVersionMismatch:
24+
name: str
25+
file: str
26+
version: str
27+
max_version_in_release_notes: str
28+
29+
30+
def test_internal_datasets_have_versions_on_line_with_the_release_notes():
31+
builders = tfds.list_builders(with_community_datasets=False)
32+
assert builders
33+
builders_with_version_mismatch: list[_BuilderWithVersionMismatch] = []
34+
for builder in builders:
35+
builder_cls = tfds.core.registered.imported_builder_cls(builder)
36+
if not (
37+
hasattr(builder_cls, 'VERSION')
38+
and hasattr(builder_cls, 'RELEASE_NOTES')
39+
):
40+
# This can be the case for test datasets
41+
continue
42+
if builder_cls.VERSION and builder_cls.RELEASE_NOTES:
43+
max_version_in_release_notes = max(
44+
[tfds.core.Version(version) for version in builder_cls.RELEASE_NOTES]
45+
)
46+
version = tfds.core.Version(builder_cls.VERSION)
47+
if version < max_version_in_release_notes:
48+
# This means the builder is as follow:
49+
# ```
50+
# RELEASE_NOTES = {
51+
# '1.0.1': 'Bug fix.',
52+
# '1.0.0': 'Initial release.',
53+
# }
54+
# VERSION = '1.0.0' # <- Someone forgot to increment this version.
55+
# ```
56+
file = inspect.getfile(builder_cls).split('tensorflow_datasets/')[-1]
57+
builders_with_version_mismatch.append(
58+
_BuilderWithVersionMismatch(
59+
name=builder_cls.name,
60+
file=file,
61+
version=version,
62+
max_version_in_release_notes=max_version_in_release_notes,
63+
)
64+
)
65+
if builders_with_version_mismatch:
66+
error = 'The following datasets have a version mismatch:'
67+
for builder_cls in builders_with_version_mismatch:
68+
error += (
69+
f'\n - Dataset {builder_cls.name} ({builder_cls.file}) has VERSION='
70+
f'"{builder_cls.version}" but RELEASE_NOTES contains version'
71+
f' "{builder_cls.max_version_in_release_notes}".'
72+
)
73+
raise ValueError(error)

tensorflow_datasets/text/dolphin_number_word/dolphin_number_word.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class DolphinNumberWord(tfds.core.GeneratorBasedBuilder):
4545

4646
__count__ = 0
4747

48-
VERSION = tfds.core.Version('0.0.2')
48+
VERSION = tfds.core.Version('0.0.3')
4949
RELEASE_NOTES = {
5050
'0.0.1': 'Initial release.',
5151
'0.0.2': (

0 commit comments

Comments
 (0)