Skip to content

Commit c6210d0

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Fix grammar in error message
Also use more recent Python conventions PiperOrigin-RevId: 652560451
1 parent 94ef43d commit c6210d0

File tree

1 file changed

+32
-31
lines changed

1 file changed

+32
-31
lines changed

tensorflow_datasets/core/splits.py

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from __future__ import annotations
1919

2020
import abc
21+
from collections.abc import Iterable
2122
import dataclasses
2223
import functools
2324
import itertools
@@ -26,7 +27,7 @@
2627
import os
2728
import re
2829
import typing
29-
from typing import Any, Dict, Iterable, List, Optional, Union
30+
from typing import Any, Union
3031

3132
from absl import logging
3233
from etils import epath
@@ -73,7 +74,7 @@ class _AbsoluteInstruction:
7374
from_: int # uint (starting index).
7475
to: int # uint (ending index).
7576

76-
def to_absolute(self, split_infos) -> List['_AbsoluteInstruction']:
77+
def to_absolute(self, split_infos) -> list['_AbsoluteInstruction']:
7778
del split_infos # unused
7879
return [self]
7980

@@ -94,9 +95,9 @@ class SplitInfo:
9495
"""
9596

9697
name: str
97-
shard_lengths: List[int]
98+
shard_lengths: list[int]
9899
num_bytes: int
99-
filename_template: Optional[naming.ShardedFileTemplate] = None
100+
filename_template: naming.ShardedFileTemplate | None = None
100101
statistics: statistics_pb2.DatasetFeatureStatistics = dataclasses.field(
101102
default_factory=statistics_pb2.DatasetFeatureStatistics,
102103
)
@@ -163,7 +164,7 @@ def __repr__(self) -> str:
163164
)
164165

165166
@property
166-
def file_instructions(self) -> List[shard_utils.FileInstruction]:
167+
def file_instructions(self) -> list[shard_utils.FileInstruction]:
167168
"""Returns the list of dict(filename, take, skip).
168169
169170
This allows for creating your own `tf.data.Dataset` using the low-level
@@ -199,7 +200,7 @@ def file_instructions(self) -> List[shard_utils.FileInstruction]:
199200
)
200201

201202
@property
202-
def filenames(self) -> List[str]:
203+
def filenames(self) -> list[str]:
203204
"""Returns the list of filenames."""
204205
if not self.filename_template:
205206
raise ValueError('No filename templates available.')
@@ -208,7 +209,7 @@ def filenames(self) -> List[str]:
208209
)
209210

210211
@property
211-
def filepaths(self) -> List[epath.Path]:
212+
def filepaths(self) -> list[epath.Path]:
212213
"""All the paths for all the files that are part of this split."""
213214
if not self.filename_template:
214215
raise ValueError('No filename templates available.')
@@ -228,9 +229,9 @@ class MultiSplitInfo(SplitInfo):
228229
This should only be used to read data and not when producing data.
229230
"""
230231

231-
split_infos: List[SplitInfo] = dataclasses.field(default_factory=list)
232+
split_infos: list[SplitInfo] = dataclasses.field(default_factory=list)
232233

233-
def __init__(self, name: str, split_infos: List[SplitInfo]):
234+
def __init__(self, name: str, split_infos: list[SplitInfo]):
234235
if not split_infos:
235236
raise ValueError('Need to pass a non-empty list of SplitInfos')
236237
object.__setattr__(self, 'split_infos', split_infos)
@@ -262,22 +263,22 @@ def __repr__(self) -> str:
262263
)
263264

264265
@property
265-
def file_instructions(self) -> List[shard_utils.FileInstruction]:
266+
def file_instructions(self) -> list[shard_utils.FileInstruction]:
266267
result = []
267268
for split_info in self.split_infos:
268269
result.extend(split_info.file_instructions)
269270
return result
270271

271272
@property
272-
def filenames(self) -> List[str]:
273+
def filenames(self) -> list[str]:
273274
"""Returns the list of filenames."""
274275
result = []
275276
for split_info in self.split_infos:
276277
result.extend(split_info.filenames)
277278
return result
278279

279280
@property
280-
def filepaths(self) -> List[epath.Path]:
281+
def filepaths(self) -> list[epath.Path]:
281282
"""All the paths for all the files that are part of this split."""
282283
result = []
283284
for split_info in self.split_infos:
@@ -301,10 +302,10 @@ class SubSplitInfo:
301302
"""
302303

303304
name: str
304-
file_instructions: List[shard_utils.FileInstruction]
305+
file_instructions: list[shard_utils.FileInstruction]
305306

306307
@property
307-
def shard_lengths(self) -> List[int]:
308+
def shard_lengths(self) -> list[int]:
308309
return [f.take for f in self.file_instructions]
309310

310311
@property
@@ -321,12 +322,12 @@ def num_shards(self) -> int:
321322
return len(self.file_instructions)
322323

323324
@property
324-
def filenames(self) -> List[str]:
325+
def filenames(self) -> list[str]:
325326
"""Returns the list of filenames."""
326327
return sorted(os.path.basename(f.filename) for f in self.file_instructions)
327328

328329
@property
329-
def filepaths(self) -> List[epath.Path]:
330+
def filepaths(self) -> list[epath.Path]:
330331
"""Returns the list of filepaths."""
331332
return sorted(epath.Path(f.filename) for f in self.file_instructions)
332333

@@ -384,7 +385,7 @@ def __init__(
384385
split_infos: Iterable[SplitInfo],
385386
*,
386387
# TODO(b/216470058): remove this parameter
387-
dataset_name: Optional[str] = None, # deprecated, please don't use
388+
dataset_name: str | None = None, # deprecated, please don't use
388389
):
389390
super(SplitDict, self).__init__(
390391
{split_info.name: split_info for split_info in split_infos},
@@ -401,7 +402,7 @@ def __getitem__(self, key):
401402
if not self:
402403
raise KeyError(
403404
f'Trying to access `splits[{key!r}]` but `splits` is empty. '
404-
'This likely indicate the dataset has not been generated yet.'
405+
'This likely indicates the dataset has not been generated yet.'
405406
)
406407
# 1st case: The key exists: `info.splits['train']`
407408
elif str(key) in self.keys():
@@ -435,11 +436,11 @@ def to_proto(self):
435436

436437
@property
437438
def total_num_examples(self):
438-
"""Return the total number of examples."""
439+
"""Returns the total number of examples."""
439440
return sum(s.num_examples for s in self.values())
440441

441442
@classmethod
442-
def merge_multiple(cls, split_dicts: List['SplitDict']) -> 'SplitDict':
443+
def merge_multiple(cls, split_dicts: list['SplitDict']) -> 'SplitDict':
443444
info_per_split = []
444445
for split in set(itertools.chain(*split_dicts)):
445446
infos_of_split = []
@@ -461,7 +462,7 @@ def merge_multiple(cls, split_dicts: List['SplitDict']) -> 'SplitDict':
461462
def _make_absolute_instructions(
462463
split_infos: Iterable[SplitInfo],
463464
instruction: SplitArg,
464-
) -> List[_AbsoluteInstruction]:
465+
) -> list[_AbsoluteInstruction]:
465466
if isinstance(instruction, str):
466467
instruction = AbstractSplit.from_spec(instruction)
467468

@@ -473,7 +474,7 @@ def _make_absolute_instructions(
473474
def _file_instructions_for_split(
474475
instruction: _AbsoluteInstruction,
475476
split_info: SplitInfo,
476-
) -> List[shard_utils.FileInstruction]:
477+
) -> list[shard_utils.FileInstruction]:
477478
"""Returns the file instructions from the given instruction applied to the given split info."""
478479
if not split_info.num_examples:
479480
logging.warning(
@@ -491,9 +492,9 @@ def _file_instructions_for_split(
491492

492493

493494
def _make_file_instructions(
494-
split_infos: List[SplitInfo],
495+
split_infos: list[SplitInfo],
495496
instruction: SplitArg,
496-
) -> List[shard_utils.FileInstruction]:
497+
) -> list[shard_utils.FileInstruction]:
497498
"""Returns file instructions by applying the given instruction on the given splits.
498499
499500
Args:
@@ -566,7 +567,7 @@ def from_spec(cls, spec: SplitArg) -> 'AbstractSplit':
566567
return functools.reduce(operator.add, instructions)
567568

568569
@abc.abstractmethod
569-
def to_absolute(self, split_infos) -> List[_AbsoluteInstruction]:
570+
def to_absolute(self, split_infos) -> list[_AbsoluteInstruction]:
570571
"""Translate instruction into a list of absolute instructions.
571572
572573
Those absolute instructions are then to be added together.
@@ -603,7 +604,7 @@ class _SplitAdd(AbstractSplit):
603604
def __repr__(self):
604605
return f'{self.left!r}+{self.right!r}'
605606

606-
def to_absolute(self, split_infos) -> List[_AbsoluteInstruction]:
607+
def to_absolute(self, split_infos) -> list[_AbsoluteInstruction]:
607608
# Merge instructions from left and right
608609
return self.left.to_absolute(split_infos) + self.right.to_absolute(
609610
split_infos
@@ -613,7 +614,7 @@ def to_absolute(self, split_infos) -> List[_AbsoluteInstruction]:
613614
class _SplitAll(AbstractSplit):
614615
"""Union of all splits of the dataset."""
615616

616-
def to_absolute(self, split_infos) -> List[_AbsoluteInstruction]:
617+
def to_absolute(self, split_infos) -> list[_AbsoluteInstruction]:
617618
# Create the union of all splits
618619
split_names = split_infos.keys()
619620
split = AbstractSplit.from_spec('+'.join(split_names))
@@ -645,8 +646,8 @@ class ReadInstruction(AbstractSplit):
645646

646647
split_name: str
647648
# TODO(py3.10): Add `_ = dataclasses.KW_ONLY`
648-
from_: Optional[int | float] = None
649-
to: Optional[int | float] = None
649+
from_: int | float | None = None
650+
to: int | float | None = None
650651
unit: str = 'abs'
651652
rounding: str = 'closest'
652653

@@ -681,7 +682,7 @@ def __repr__(self) -> str:
681682
rounding = f', rounding={self.rounding!r}' if self.unit == '%' else ''
682683
return f"ReadInstruction('{self.split_name}{slice_str}'{rounding})"
683684

684-
def to_absolute(self, split_infos) -> List[_AbsoluteInstruction]:
685+
def to_absolute(self, split_infos) -> list[_AbsoluteInstruction]:
685686
return [_rel_to_abs_instr(self, split_infos)]
686687

687688

@@ -763,7 +764,7 @@ def _pct_to_abs_closest(boundary, num_examples: int) -> int:
763764

764765
def _rel_to_abs_instr(
765766
rel_instr: ReadInstruction,
766-
split_infos: Dict[str, SplitInfo],
767+
split_infos: dict[str, SplitInfo],
767768
) -> _AbsoluteInstruction:
768769
"""Returns _AbsoluteInstruction instance for given RelativeInstruction.
769770

0 commit comments

Comments
 (0)