Skip to content

Commit d9c6d4f

Browse files
marcenacpThe TensorFlow Datasets Authors
authored andcommitted
Pass memoryviews on pyarrow.Buffers instead of copying to bytes.
PiperOrigin-RevId: 627396776
1 parent f010c1a commit d9c6d4f

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

tensorflow_datasets/core/data_sources/parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ def __getitems__(self, keys: Iterable[int]) -> Sequence[Any]:
3838
return []
3939
# All elements are written in the first column (see core.ParquetFileAdapter)
4040
elements = self.table.take(keys).column(0)
41-
return [bytes(element.as_buffer()) for element in elements]
41+
return [memoryview(element.as_buffer()) for element in elements]
4242

4343
def __getitem__(self, key: int) -> Any:
4444
"""Retrieves the n-th element from the Parquet table."""
4545
# The element is written in the first column (see core.ParquetFileAdapter)
4646
element = self.table.slice(key, 1).column(0)
4747
if len(element) == 1:
48-
return bytes(element[0].as_buffer())
48+
return memoryview(element[0].as_buffer())
4949
raise IndexError(f'Could not find element at index {key}')
5050

5151
def __len__(self) -> int:

tensorflow_datasets/core/example_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def __post_init__(self):
128128
self._flat_example_specs = utils.flatten_nest_dict(self.example_specs)
129129

130130
def parse_example(
131-
self, serialized_example: bytes
131+
self, serialized_example: bytes | memoryview
132132
) -> Mapping[str, Union[np.ndarray, list[Any]]]:
133133
example = tf_example_pb2.Example.FromString(serialized_example)
134134
np_example = _features_to_numpy(example.features, self._flat_example_specs)

tensorflow_datasets/core/features/top_level_feature.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import enum
2121
import functools
22-
from typing import Any, List, Union
22+
from typing import Any
2323

2424
from tensorflow_datasets.core import example_parser
2525
from tensorflow_datasets.core import example_serializer
@@ -44,15 +44,15 @@ class TopLevelFeature(feature_lib.FeatureConnector):
4444
"""
4545

4646
@functools.cached_property
47-
def flat_features(self) -> List[Any]:
47+
def flat_features(self) -> list[Any]:
4848
return self._flatten(self)
4949

5050
@functools.cached_property
51-
def flat_serialized_info(self) -> List[Any]:
51+
def flat_serialized_info(self) -> list[Any]:
5252
return self._flatten(self.get_serialized_info())
5353

5454
@functools.cached_property
55-
def flat_sequence_ranks(self) -> List[int]:
55+
def flat_sequence_ranks(self) -> list[int]:
5656
return [_get_sequence_rank(s) for s in self.flat_serialized_info] # pylint: disable=not-an-iterable
5757

5858
def _decode_example_generic(
@@ -129,7 +129,7 @@ def serialize_example(self, example_data) -> bytes:
129129

130130
def deserialize_example(
131131
self,
132-
serialized_example: Union[tf.Tensor, bytes],
132+
serialized_example: tf.Tensor | bytes,
133133
*,
134134
decoders=None,
135135
) -> utils.TensorDict:
@@ -151,7 +151,7 @@ def deserialize_example(
151151

152152
def deserialize_example_np(
153153
self,
154-
serialized_example: Union[tf.Tensor, bytes],
154+
serialized_example: tf.Tensor | bytes | memoryview,
155155
*,
156156
decoders=None,
157157
) -> utils.NpArrayOrScalarDict:

0 commit comments

Comments
 (0)