Add split name to error when no examples were generated with Beam

tomvdw · The TensorFlow Datasets Authors · commit cd25e162ed79 · 2024-07-03T22:39:27.000-07:00
The error currently is: `ValueError: The total number of generated examples is 0. This should be &gt;0!` However, it would be handy to know what split has this problem.

PiperOrigin-RevId: 649297162
diff --git a/tensorflow_datasets/core/writer.py b/tensorflow_datasets/core/writer.py
@@ -445,8 +445,8 @@ def _serialize_example(
   def _check_num_examples(self, num_examples: int) -> int:
     if num_examples <= 0:
       raise ValueError(
-          f"The total number of generated examples is {num_examples}. This"
-          " should be >0!"
+          f"The total number of generated examples is {num_examples} for split"
+          f" {self._filename_template.split}. This should be >0!"
       )
     return num_examples
 
diff --git a/tensorflow_datasets/core/writer_test.py b/tensorflow_datasets/core/writer_test.py
@@ -31,7 +31,6 @@
 from tensorflow_datasets.core import naming
 from tensorflow_datasets.core import writer as writer_lib
 from tensorflow_datasets.core.utils import shard_utils
-from tensorflow_datasets.core.writer import _ShardSpec
 
 
 class GetShardSpecsTest(testing.TestCase):
@@ -55,7 +54,7 @@ def test_1bucket_6shards(self):
         specs,
         [
             # Shard#, path, from_bucket, examples_number, reading instructions.
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 0,
                 '/bar-train.tfrecord-00000-of-00006',
                 '/bar-train.tfrecord-00000-of-00006_index.json',
@@ -66,7 +65,7 @@ def test_1bucket_6shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 1,
                 '/bar-train.tfrecord-00001-of-00006',
                 '/bar-train.tfrecord-00001-of-00006_index.json',
@@ -77,7 +76,7 @@ def test_1bucket_6shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 2,
                 '/bar-train.tfrecord-00002-of-00006',
                 '/bar-train.tfrecord-00002-of-00006_index.json',
@@ -88,7 +87,7 @@ def test_1bucket_6shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 3,
                 '/bar-train.tfrecord-00003-of-00006',
                 '/bar-train.tfrecord-00003-of-00006_index.json',
@@ -99,7 +98,7 @@ def test_1bucket_6shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 4,
                 '/bar-train.tfrecord-00004-of-00006',
                 '/bar-train.tfrecord-00004-of-00006_index.json',
@@ -110,7 +109,7 @@ def test_1bucket_6shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 5,
                 '/bar-train.tfrecord-00005-of-00006',
                 '/bar-train.tfrecord-00005-of-00006_index.json',
@@ -141,7 +140,7 @@ def test_4buckets_2shards(self):
         specs,
         [
             # Shard#, path, examples_number, reading instructions.
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 0,
                 '/bar-train.tfrecord-00000-of-00002',
                 '/bar-train.tfrecord-00000-of-00002_index.json',
@@ -155,7 +154,7 @@ def test_4buckets_2shards(self):
                     ),
                 ],
             ),
-            _ShardSpec(
+            writer_lib._ShardSpec(
                 1,
                 '/bar-train.tfrecord-00001-of-00002',
                 '/bar-train.tfrecord-00001-of-00002_index.json',
@@ -520,7 +519,8 @@ def test_write_tfrecord_with_ignored_duplicates(self):
   def test_empty_split(self):
     with self.assertRaisesWithPredicateMatch(
         ValueError,
-        'The total number of generated examples is 0. This should be >0!',
+        'The total number of generated examples is 0 for split train. This'
+        ' should be >0!',
     ):
       self._write(to_write=[])