Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/ray/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ray.data._internal.utils.arrow_utils import get_pyarrow_version

from ray.data._internal.compute import ActorPoolStrategy, TaskPoolStrategy
from ray.data._internal.datasource.tfrecords_datasource import TFXReadOptions
from ray.data._internal.datasource import TFXReadOptions
from ray.data._internal.execution.interfaces import (
ExecutionOptions,
ExecutionResources,
Expand Down
127 changes: 127 additions & 0 deletions python/ray/data/_internal/datasource/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Ray Data internal datasource package.

This package encapsulates datasources and datasinks. Import from this package
instead of individual modules for a simpler API.
"""

from ray.data._internal.datasource.audio_datasource import AudioDatasource
from ray.data._internal.datasource.avro_datasource import AvroDatasource
from ray.data._internal.datasource.bigquery_datasink import BigQueryDatasink
from ray.data._internal.datasource.bigquery_datasource import BigQueryDatasource
from ray.data._internal.datasource.binary_datasource import BinaryDatasource
from ray.data._internal.datasource.clickhouse_datasink import (
ClickHouseDatasink,
ClickHouseTableSettings,
SinkMode,
)
from ray.data._internal.datasource.clickhouse_datasource import ClickHouseDatasource
from ray.data._internal.datasource.csv_datasink import CSVDatasink
from ray.data._internal.datasource.csv_datasource import CSVDatasource
from ray.data._internal.datasource.databricks_credentials import (
DatabricksCredentialProvider,
EnvironmentCredentialProvider,
StaticCredentialProvider,
build_headers,
request_with_401_retry,
resolve_credential_provider,
)
from ray.data._internal.datasource.databricks_uc_datasource import (
DatabricksUCDatasource,
)
from ray.data._internal.datasource.delta_sharing_datasource import (
DeltaSharingDatasource,
)
from ray.data._internal.datasource.hudi_datasource import HudiDatasource
from ray.data._internal.datasource.huggingface_datasource import HuggingFaceDatasource
from ray.data._internal.datasource.iceberg_datasink import IcebergDatasink
from ray.data._internal.datasource.iceberg_datasource import IcebergDatasource
from ray.data._internal.datasource.image_datasink import ImageDatasink
from ray.data._internal.datasource.image_datasource import ImageDatasource
from ray.data._internal.datasource.json_datasink import JSONDatasink
from ray.data._internal.datasource.json_datasource import (
ArrowJSONDatasource,
PandasJSONDatasource,
)
from ray.data._internal.datasource.kafka_datasource import (
KafkaAuthConfig,
KafkaDatasource,
)
from ray.data._internal.datasource.lance_datasink import LanceDatasink
from ray.data._internal.datasource.lance_datasource import LanceDatasource
from ray.data._internal.datasource.mcap_datasource import MCAPDatasource, TimeRange
from ray.data._internal.datasource.mongo_datasink import MongoDatasink
from ray.data._internal.datasource.mongo_datasource import MongoDatasource
from ray.data._internal.datasource.numpy_datasink import NumpyDatasink
from ray.data._internal.datasource.numpy_datasource import NumpyDatasource
from ray.data._internal.datasource.parquet_datasink import ParquetDatasink
from ray.data._internal.datasource.parquet_datasource import ParquetDatasource
from ray.data._internal.datasource.range_datasource import RangeDatasource
from ray.data._internal.datasource.sql_datasink import SQLDatasink
from ray.data._internal.datasource.sql_datasource import Connection, SQLDatasource
from ray.data._internal.datasource.text_datasource import TextDatasource
from ray.data._internal.datasource.tfrecords_datasink import TFRecordDatasink
from ray.data._internal.datasource.tfrecords_datasource import (
TFRecordDatasource,
TFXReadOptions,
)
from ray.data._internal.datasource.torch_datasource import TorchDatasource
from ray.data._internal.datasource.uc_datasource import UnityCatalogConnector
from ray.data._internal.datasource.video_datasource import VideoDatasource
from ray.data._internal.datasource.webdataset_datasink import WebDatasetDatasink
from ray.data._internal.datasource.webdataset_datasource import WebDatasetDatasource

__all__ = [
"ArrowJSONDatasource",
"AudioDatasource",
"AvroDatasource",
"BigQueryDatasink",
"BigQueryDatasource",
"BinaryDatasource",
"build_headers",
"ClickHouseDatasink",
"ClickHouseDatasource",
"ClickHouseTableSettings",
"Connection",
"CSVDatasink",
"CSVDatasource",
"DatabricksCredentialProvider",
"DatabricksUCDatasource",
"DeltaSharingDatasource",
"EnvironmentCredentialProvider",
"HudiDatasource",
"HuggingFaceDatasource",
"IcebergDatasink",
"IcebergDatasource",
"ImageDatasink",
"ImageDatasource",
"JSONDatasink",
"KafkaAuthConfig",
"KafkaDatasource",
"LanceDatasink",
"LanceDatasource",
"MCAPDatasource",
"MongoDatasink",
"MongoDatasource",
"NumpyDatasink",
"NumpyDatasource",
"PandasJSONDatasource",
"ParquetDatasink",
"ParquetDatasource",
"RangeDatasource",
"request_with_401_retry",
"resolve_credential_provider",
"SinkMode",
"SQLDatasink",
"SQLDatasource",
"StaticCredentialProvider",
"TextDatasource",
"TFRecordDatasink",
"TFRecordDatasource",
"TFXReadOptions",
"TimeRange",
"TorchDatasource",
"UnityCatalogConnector",
"VideoDatasource",
"WebDatasetDatasink",
"WebDatasetDatasource",
]
26 changes: 13 additions & 13 deletions python/ray/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@
from ray._private.thirdparty.tabulate.tabulate import tabulate
from ray.data._internal.compute import ComputeStrategy, TaskPoolStrategy
from ray.data._internal.dataset_repr import _build_dataset_ascii_repr
from ray.data._internal.datasource.bigquery_datasink import BigQueryDatasink
from ray.data._internal.datasource.clickhouse_datasink import (
from ray.data._internal.datasource import (
BigQueryDatasink,
ClickHouseDatasink,
ClickHouseTableSettings,
CSVDatasink,
IcebergDatasink,
ImageDatasink,
JSONDatasink,
LanceDatasink,
MongoDatasink,
NumpyDatasink,
ParquetDatasink,
SinkMode,
SQLDatasink,
TFRecordDatasink,
WebDatasetDatasink,
)
from ray.data._internal.datasource.csv_datasink import CSVDatasink
from ray.data._internal.datasource.iceberg_datasink import IcebergDatasink
from ray.data._internal.datasource.image_datasink import ImageDatasink
from ray.data._internal.datasource.json_datasink import JSONDatasink
from ray.data._internal.datasource.lance_datasink import LanceDatasink
from ray.data._internal.datasource.mongo_datasink import MongoDatasink
from ray.data._internal.datasource.numpy_datasink import NumpyDatasink
from ray.data._internal.datasource.parquet_datasink import ParquetDatasink
from ray.data._internal.datasource.sql_datasink import SQLDatasink
from ray.data._internal.datasource.tfrecords_datasink import TFRecordDatasink
from ray.data._internal.datasource.webdataset_datasink import WebDatasetDatasink
from ray.data._internal.equalize import _equalize
from ray.data._internal.execution.interfaces import RefBundle
from ray.data._internal.execution.interfaces.ref_bundle import (
Expand Down
6 changes: 2 additions & 4 deletions python/ray/data/datasource/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from ray.data._internal.datasource.delta_sharing_datasource import (
from ray.data._internal.datasource import (
Connection,
DeltaSharingDatasource,
)
from ray.data._internal.datasource.mcap_datasource import (
MCAPDatasource,
TimeRange,
)
from ray.data._internal.datasource.sql_datasource import Connection
from ray.data._internal.savemode import SaveMode
from ray.data.datasource.datasink import (
Datasink,
Expand Down
81 changes: 34 additions & 47 deletions python/ray/data/read_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,38 @@
import ray
from ray._private.auto_init_hook import wrap_auto_init
from ray.data._internal.compute import TaskPoolStrategy
from ray.data._internal.datasource.audio_datasource import AudioDatasource
from ray.data._internal.datasource.avro_datasource import AvroDatasource
from ray.data._internal.datasource.bigquery_datasource import BigQueryDatasource
from ray.data._internal.datasource.binary_datasource import BinaryDatasource
from ray.data._internal.datasource.clickhouse_datasource import ClickHouseDatasource
from ray.data._internal.datasource.csv_datasource import CSVDatasource
from ray.data._internal.datasource.databricks_credentials import (
from ray.data._internal.datasource import (
ArrowJSONDatasource,
AudioDatasource,
AvroDatasource,
BigQueryDatasource,
BinaryDatasource,
ClickHouseDatasource,
CSVDatasource,
DatabricksCredentialProvider,
)
from ray.data._internal.datasource.delta_sharing_datasource import (
DeltaSharingDatasource,
)
from ray.data._internal.datasource.hudi_datasource import HudiDatasource
from ray.data._internal.datasource.image_datasource import (
HudiDatasource,
ImageDatasource,
ImageFileMetadataProvider,
)
from ray.data._internal.datasource.json_datasource import (
JSON_FILE_EXTENSIONS,
ArrowJSONDatasource,
PandasJSONDatasource,
)
from ray.data._internal.datasource.kafka_datasource import (
KafkaAuthConfig,
KafkaDatasource,
LanceDatasource,
MCAPDatasource,
MongoDatasource,
NumpyDatasource,
PandasJSONDatasource,
ParquetDatasource,
RangeDatasource,
SQLDatasource,
TextDatasource,
TFRecordDatasource,
TimeRange,
TorchDatasource,
UnityCatalogConnector,
VideoDatasource,
WebDatasetDatasource,
)
from ray.data._internal.datasource.lance_datasource import LanceDatasource
from ray.data._internal.datasource.mcap_datasource import MCAPDatasource, TimeRange
from ray.data._internal.datasource.mongo_datasource import MongoDatasource
from ray.data._internal.datasource.numpy_datasource import NumpyDatasource
from ray.data._internal.datasource.parquet_datasource import ParquetDatasource
from ray.data._internal.datasource.range_datasource import RangeDatasource
from ray.data._internal.datasource.sql_datasource import SQLDatasource
from ray.data._internal.datasource.text_datasource import TextDatasource
from ray.data._internal.datasource.tfrecords_datasource import TFRecordDatasource
from ray.data._internal.datasource.torch_datasource import TorchDatasource
from ray.data._internal.datasource.uc_datasource import UnityCatalogConnector
from ray.data._internal.datasource.video_datasource import VideoDatasource
from ray.data._internal.datasource.webdataset_datasource import WebDatasetDatasource
from ray.data._internal.datasource.image_datasource import ImageFileMetadataProvider
from ray.data._internal.datasource.json_datasource import JSON_FILE_EXTENSIONS
from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
from ray.data._internal.logical.interfaces import LogicalPlan
from ray.data._internal.logical.operators import (
Expand Down Expand Up @@ -122,7 +115,7 @@
from pyiceberg.expressions import BooleanExpression
from tensorflow_metadata.proto.v0 import schema_pb2

from ray.data._internal.datasource.tfrecords_datasource import TFXReadOptions
from ray.data._internal.datasource import TFXReadOptions

T = TypeVar("T")

Expand Down Expand Up @@ -2716,9 +2709,7 @@ def read_databricks_tables(
.. testcode::
:skipif: True

from ray.data._internal.datasource.databricks_credentials import (
DatabricksCredentialProvider,
)
from ray.data._internal.datasource import DatabricksCredentialProvider

class MyCredentialProvider(DatabricksCredentialProvider):
def get_token(self) -> str:
Expand Down Expand Up @@ -2774,11 +2765,9 @@ def invalidate(self) -> None:
A :class:`Dataset` containing the queried data.
""" # noqa: E501
# Resolve credential provider (single source of truth for token and host)
from ray.data._internal.datasource.databricks_credentials import (
resolve_credential_provider,
)
from ray.data._internal.datasource.databricks_uc_datasource import (
from ray.data._internal.datasource import (
DatabricksUCDatasource,
resolve_credential_provider,
)

resolved_provider = resolve_credential_provider(
Expand Down Expand Up @@ -3629,9 +3618,7 @@ def from_huggingface(
import datasets
from aiohttp.client_exceptions import ClientResponseError

from ray.data._internal.datasource.huggingface_datasource import (
HuggingFaceDatasource,
)
from ray.data._internal.datasource import HuggingFaceDatasource

if isinstance(dataset, (datasets.IterableDataset, datasets.Dataset)):
try:
Expand Down Expand Up @@ -3915,7 +3902,7 @@ def read_iceberg(
Returns:
:class:`~ray.data.Dataset` with rows from the Iceberg table.
"""
from ray.data._internal.datasource.iceberg_datasource import IcebergDatasource
from ray.data._internal.datasource import IcebergDatasource

# Deprecation warning for row_filter parameter
if row_filter is not None:
Expand Down Expand Up @@ -4174,7 +4161,7 @@ def read_unity_catalog(

Read using a custom credential provider:

>>> from ray.data._internal.datasource.databricks_credentials import ( # doctest: +SKIP
>>> from ray.data._internal.datasource import ( # doctest: +SKIP
... StaticCredentialProvider,
... )
>>> provider = StaticCredentialProvider( # doctest: +SKIP
Expand Down Expand Up @@ -4210,7 +4197,7 @@ def read_unity_catalog(
Returns:
A :class:`~ray.data.Dataset` containing the data from Unity Catalog.
""" # noqa: E501
from ray.data._internal.datasource.databricks_credentials import (
from ray.data._internal.datasource import (
StaticCredentialProvider,
resolve_credential_provider,
)
Expand Down
4 changes: 1 addition & 3 deletions python/ray/data/tests/datasource/databricks_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from dataclasses import dataclass, field
from typing import Optional

from ray.data._internal.datasource.databricks_credentials import (
DatabricksCredentialProvider,
)
from ray.data._internal.datasource import DatabricksCredentialProvider


@dataclass
Expand Down
3 changes: 1 addition & 2 deletions python/ray/data/tests/datasource/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from google.cloud.bigquery_storage_v1.types import stream as gcbqs_stream

import ray
from ray.data._internal.datasource.bigquery_datasink import BigQueryDatasink
from ray.data._internal.datasource.bigquery_datasource import BigQueryDatasource
from ray.data._internal.datasource import BigQueryDatasink, BigQueryDatasource
from ray.data._internal.execution.interfaces.task_context import TaskContext
from ray.data._internal.planner.plan_write_op import generate_collect_write_stats_fn
from ray.data.block import Block
Expand Down
4 changes: 2 additions & 2 deletions python/ray/data/tests/datasource/test_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from clickhouse_connect.driver.summary import QuerySummary

import ray
from ray.data._internal.datasource.clickhouse_datasink import (
from ray.data._internal.datasource import (
ClickHouseDatasink,
ClickHouseDatasource,
ClickHouseTableSettings,
SinkMode,
)
from ray.data._internal.datasource.clickhouse_datasource import ClickHouseDatasource
from ray.data._internal.execution.interfaces.task_context import TaskContext


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from ray.data._internal.datasource.databricks_credentials import (
from ray.data._internal.datasource import (
DatabricksCredentialProvider,
EnvironmentCredentialProvider,
StaticCredentialProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@

import ray
import ray.cloudpickle as pickle
from ray.data._internal.datasource.databricks_credentials import (
from ray.data._internal.datasource import (
DatabricksCredentialProvider,
StaticCredentialProvider,
)
from ray.data._internal.datasource.databricks_uc_datasource import (
DatabricksUCDatasource,
StaticCredentialProvider,
)
from ray.data._internal.util import rows_same
from ray.data.tests.datasource.databricks_test_utils import (
Expand Down
Loading