Skip to content

Commit f10f952

Browse files
authored
(convention): Add Arrow prefix to parquet datasource for consistency (#1724)
1 parent 8436e1d commit f10f952

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
"""Ray Datasources Module."""
22

33
from awswrangler.distributed.ray.datasources.arrow_csv_datasource import ArrowCSVDatasource
4+
from awswrangler.distributed.ray.datasources.arrow_parquet_datasource import ArrowParquetDatasource
45
from awswrangler.distributed.ray.datasources.pandas_file_based_datasource import UserProvidedKeyBlockWritePathProvider
56
from awswrangler.distributed.ray.datasources.pandas_text_datasource import (
67
PandasCSVDataSource,
78
PandasFWFDataSource,
89
PandasJSONDatasource,
910
PandasTextDatasource,
1011
)
11-
from awswrangler.distributed.ray.datasources.parquet_datasource import ParquetDatasource
1212

1313
__all__ = [
1414
"ArrowCSVDatasource",
15+
"ArrowParquetDatasource",
1516
"PandasCSVDataSource",
1617
"PandasFWFDataSource",
1718
"PandasJSONDatasource",
18-
"ParquetDatasource",
1919
"PandasTextDatasource",
2020
"UserProvidedKeyBlockWritePathProvider",
2121
]

awswrangler/distributed/ray/datasources/parquet_datasource.py renamed to awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Ray ParquetDatasource Module."""
1+
"""Ray ArrowParquetDatasource Module."""
22

33
import logging
44
from typing import Any, Callable, Dict, Iterator, List, Optional, Union
@@ -86,7 +86,7 @@ def _read_pieces(
8686
ray.data.datasource.parquet_datasource._read_pieces = _read_pieces # pylint: disable=protected-access
8787

8888

89-
class ParquetDatasource(PandasFileBasedDatasource): # pylint: disable=abstract-method
89+
class ArrowParquetDatasource(PandasFileBasedDatasource): # pylint: disable=abstract-method
9090
"""Parquet datasource, for reading and writing Parquet files."""
9191

9292
_FILE_EXTENSION = "parquet"

awswrangler/distributed/ray/modin/s3/_read_parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pyarrow as pa
77
from ray.data import read_datasource
88

9-
from awswrangler.distributed.ray.datasources import ParquetDatasource
9+
from awswrangler.distributed.ray.datasources import ArrowParquetDatasource
1010
from awswrangler.distributed.ray.modin._utils import _to_modin
1111

1212

@@ -27,7 +27,7 @@ def _read_parquet_distributed( # pylint: disable=unused-argument
2727
if coerce_int96_timestamp_unit:
2828
dataset_kwargs["coerce_int96_timestamp_unit"] = coerce_int96_timestamp_unit
2929
dataset = read_datasource(
30-
datasource=ParquetDatasource(), # type: ignore
30+
datasource=ArrowParquetDatasource(), # type: ignore
3131
parallelism=parallelism,
3232
use_threads=use_threads,
3333
paths=paths,

awswrangler/distributed/ray/modin/s3/_write_parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ray.data import from_modin, from_pandas
1111
from ray.data.datasource.file_based_datasource import DefaultBlockWritePathProvider
1212

13-
from awswrangler.distributed.ray.datasources import ParquetDatasource, UserProvidedKeyBlockWritePathProvider
13+
from awswrangler.distributed.ray.datasources import ArrowParquetDatasource, UserProvidedKeyBlockWritePathProvider
1414

1515
_logger: logging.Logger = logging.getLogger(__name__)
1616

@@ -49,7 +49,7 @@ def _to_parquet_distributed( # pylint: disable=unused-argument
4949
# Repartition by max_rows_by_file
5050
elif max_rows_by_file and (max_rows_by_file > 0):
5151
ds = ds.repartition(math.ceil(ds.count() / max_rows_by_file))
52-
datasource = ParquetDatasource()
52+
datasource = ArrowParquetDatasource()
5353
ds.write_datasource(
5454
datasource, # type: ignore
5555
path=path or path_root,

0 commit comments

Comments
 (0)