diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 653efca1f2..7febf10170 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -33,6 +33,8 @@ jobs: run: uv pip freeze - name: Run pytest run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 --constructors=pandas,pyarrow,polars[eager],polars[lazy] + - name: install-test-plugin + run: uv pip install -e tests/test_plugin --system pytest-windows: strategy: @@ -54,6 +56,8 @@ jobs: - name: install-reqs # we are not testing pyspark, modin, or dask on Windows here because nobody got time for that run: uv pip install -e ".[ibis]" --group core-tests --group extra --system + - name: install-test-plugin + run: uv pip install -e tests/test_plugin --system - name: show-deps run: uv pip freeze - name: Run pytest @@ -84,6 +88,8 @@ jobs: cache-dependency-glob: "pyproject.toml" - name: install-reqs run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra --system + - name: install-test-plugin + run: uv pip install -e tests/test_plugin --system - name: show-deps run: uv pip freeze - name: Run pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba81cc144f..a72df613e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -115,7 +115,10 @@ repos: rev: v6.0.0 hooks: - id: name-tests-test - exclude: ^tests/utils\.py + exclude: | + (?x) + ^(tests/utils\.py) + |^(tests/test_plugin/) - id: no-commit-to-branch - id: end-of-file-fixer exclude: .svg$ diff --git a/docs/extending.md b/docs/extending.md index 37ff88720f..0a59eb949f 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -6,12 +6,14 @@ If you want your own library to be recognised too, you're welcome open a PR (with tests)!. Alternatively, if you can't do that (for example, if you library is closed-source), see -the next section for what else you can do. +the next sections for what else you can do. + +## Creating an Extension We love open source, but we're not "open source absolutists". If you're unable to open -source you library, then this is how you can make your library compatible with Narwhals. +source your library, then this is how you can make your library compatible with Narwhals. -Make sure that you also define: +Make sure that you define: - `DataFrame.__narwhals_dataframe__`: return an object which implements methods from the `CompliantDataFrame` protocol in `narwhals/typing.py`. @@ -33,3 +35,36 @@ Make sure that you also define: Note that this "extension" mechanism is still experimental. If anything is not clear, or doesn't work, please do raise an issue or contact us on Discord (see the link on the README). + +## Creating a Plugin + +If it's not possible to add extra functions like `__narwhals_namespace__` and others to a dataframe object +itself, then another option is to write a plugin. Narwhals itself has the necessary utilities to detect and +handle plugins. For this integration to work, any plugin architecture must contain the following: + + 1. an entrypoint defined in a `pyproject.toml` file: + + ``` + [project.entry-points.'narwhals.plugins'] + narwhals- = 'narwhals_' + ``` + The section name needs to be the same for all plugins; inside it, plugin creators can replace their + own library name, for example `narwhals-grizzlies = 'narwhals_grizzlies'` + + + 2. a top-level `__init__.py` file containing the following: + + - `is_native` and `__narwhals_namespace__` functions + - a string constant `NATIVE_PACKAGE` which holds the name of the library for which the plugin is made + + `is_native` accepts a native object and returns a boolean indicating whether the native object is + a dataframe of the library the plugin was written for. + + `__narwhals_namespace__` takes the Narwhals version and returns a compliant namespace for the library, + i.e. one that complies with the CompliantNamespace protocol. This protocol specifies a `from_native` + function, whose input parameter is the Narwhals version and which returns a compliant Narwhals LazyFrame + which wraps the native dataframe. + +If you want to see an example of a plugin, we have implemented a bare-bones version for the `daft` library +that allows users to pass daft dataframes to Narwhals: +[narwhals-daft](https://github.com/MarcoGorelli/narwhals-daft). diff --git a/narwhals/plugins.py b/narwhals/plugins.py new file mode 100644 index 0000000000..c68e606ed4 --- /dev/null +++ b/narwhals/plugins.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import sys +from functools import cache +from typing import TYPE_CHECKING, Any, Protocol, cast + +from narwhals._compliant import CompliantNamespace +from narwhals._typing_compat import TypeVar + +if TYPE_CHECKING: + from collections.abc import Iterator + from importlib.metadata import EntryPoints + + from typing_extensions import LiteralString, TypeAlias + + from narwhals._compliant.typing import ( + CompliantDataFrameAny, + CompliantFrameAny, + CompliantLazyFrameAny, + CompliantSeriesAny, + ) + from narwhals.utils import Version + + +__all__ = ["Plugin", "from_native"] + +CompliantAny: TypeAlias = ( + "CompliantDataFrameAny | CompliantLazyFrameAny | CompliantSeriesAny" +) +"""A statically-unknown, Compliant object originating from a plugin.""" + +FrameT = TypeVar( + "FrameT", + bound="CompliantFrameAny", + default="CompliantDataFrameAny | CompliantLazyFrameAny", +) +FromNativeR_co = TypeVar( + "FromNativeR_co", bound=CompliantAny, covariant=True, default=CompliantAny +) + + +@cache +def _discover_entrypoints() -> EntryPoints: + from importlib.metadata import entry_points as eps + + group = "narwhals.plugins" + if sys.version_info < (3, 10): + return cast("EntryPoints", eps().get(group, ())) + return eps(group=group) + + +class PluginNamespace(CompliantNamespace[FrameT, Any], Protocol[FrameT, FromNativeR_co]): + def from_native(self, data: Any, /) -> FromNativeR_co: ... + + +class Plugin(Protocol[FrameT, FromNativeR_co]): + NATIVE_PACKAGE: LiteralString + + def __narwhals_namespace__( + self, version: Version + ) -> PluginNamespace[FrameT, FromNativeR_co]: ... + def is_native(self, native_object: object, /) -> bool: ... + + +@cache +def _might_be(cls: type, type_: str) -> bool: # pragma: no cover + try: + return any(type_ in o.__module__.split(".") for o in cls.mro()) + except TypeError: + return False + + +def _is_native_plugin(native_object: Any, plugin: Plugin) -> bool: + pkg = plugin.NATIVE_PACKAGE + return ( + sys.modules.get(pkg) is not None + and _might_be(type(native_object), pkg) # type: ignore[arg-type] + and plugin.is_native(native_object) + ) + + +def _iter_from_native(native_object: Any, version: Version) -> Iterator[CompliantAny]: + for entry_point in _discover_entrypoints(): + plugin: Plugin = entry_point.load() + if _is_native_plugin(native_object, plugin): + compliant_namespace = plugin.__narwhals_namespace__(version=version) + yield compliant_namespace.from_native(native_object) + + +def from_native(native_object: Any, version: Version) -> CompliantAny | None: + """Attempt to convert `native_object` to a Compliant object, using any available plugin(s). + + Arguments: + native_object: Raw object from user. + version: Narwhals API version. + + Returns: + If the following conditions are met + - at least 1 plugin is installed + - at least 1 installed plugin supports `type(native_object)` + + Then for the **first matching plugin**, the result of the call below. + This *should* be an object accepted by a Narwhals Dataframe, Lazyframe, or Series: + + plugin: Plugin + plugin.__narwhals_namespace__(version).from_native(native_object) + + In all other cases, `None` is returned instead. + """ + return next(_iter_from_native(native_object, version), None) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index fd342e9b92..dde3c86aa4 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -566,6 +566,7 @@ def from_native( # noqa: D417 eager_only=eager_only, series_only=series_only, allow_series=allow_series, + eager_or_interchange_only=False, version=Version.V2, ) diff --git a/narwhals/translate.py b/narwhals/translate.py index 1886ef2843..5a51862d0b 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -5,6 +5,7 @@ from functools import wraps from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, overload +from narwhals import plugins from narwhals._constants import EPOCH, MS_PER_SECOND from narwhals._native import ( is_native_arrow, @@ -12,7 +13,14 @@ is_native_polars, is_native_spark_like, ) -from narwhals._utils import Implementation, Version, has_native_namespace +from narwhals._utils import ( + Implementation, + Version, + has_native_namespace, + is_compliant_dataframe, + is_compliant_lazyframe, + is_compliant_series, +) from narwhals.dependencies import ( get_dask_expr, get_numpy, @@ -314,23 +322,65 @@ def from_native( # noqa: D417 ) +def _translate_if_compliant( # noqa: C901,PLR0911 + compliant_object: Any, + *, + pass_through: bool = False, + eager_only: bool = False, + # Interchange-level was removed after v1 + eager_or_interchange_only: bool, + series_only: bool, + allow_series: bool | None, + version: Version, +) -> Any: + if is_compliant_dataframe(compliant_object): + if series_only: + if not pass_through: + msg = "Cannot only use `series_only` with dataframe" + raise TypeError(msg) + return compliant_object + return version.dataframe( + compliant_object.__narwhals_dataframe__()._with_version(version), level="full" + ) + if is_compliant_lazyframe(compliant_object): + if series_only: + if not pass_through: + msg = "Cannot only use `series_only` with lazyframe" + raise TypeError(msg) + return compliant_object + if eager_only or eager_or_interchange_only: + if not pass_through: + msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe" + raise TypeError(msg) + return compliant_object + return version.lazyframe( + compliant_object.__narwhals_lazyframe__()._with_version(version), level="full" + ) + if is_compliant_series(compliant_object): + if not allow_series: + if not pass_through: + msg = "Please set `allow_series=True` or `series_only=True`" + raise TypeError(msg) + return compliant_object + return version.series( + compliant_object.__narwhals_series__()._with_version(version), level="full" + ) + # Object wasn't compliant, can't translate here. + return None + + def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 native_object: Any, *, pass_through: bool = False, eager_only: bool = False, # Interchange-level was removed after v1 - eager_or_interchange_only: bool = False, - series_only: bool = False, - allow_series: bool | None = None, + eager_or_interchange_only: bool, + series_only: bool, + allow_series: bool | None, version: Version, ) -> Any: from narwhals._interchange.dataframe import supports_dataframe_interchange - from narwhals._utils import ( - is_compliant_dataframe, - is_compliant_lazyframe, - is_compliant_series, - ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.series import Series @@ -350,38 +400,18 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 raise ValueError(msg) # Extensions - if is_compliant_dataframe(native_object): - if series_only: - if not pass_through: - msg = "Cannot only use `series_only` with dataframe" - raise TypeError(msg) - return native_object - return version.dataframe( - native_object.__narwhals_dataframe__()._with_version(version), level="full" - ) - if is_compliant_lazyframe(native_object): - if series_only: - if not pass_through: - msg = "Cannot only use `series_only` with lazyframe" - raise TypeError(msg) - return native_object - if eager_only or eager_or_interchange_only: - if not pass_through: - msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe" - raise TypeError(msg) - return native_object - return version.lazyframe( - native_object.__narwhals_lazyframe__()._with_version(version), level="full" - ) - if is_compliant_series(native_object): - if not allow_series: - if not pass_through: - msg = "Please set `allow_series=True` or `series_only=True`" - raise TypeError(msg) - return native_object - return version.series( - native_object.__narwhals_series__()._with_version(version), level="full" + if ( + translated := _translate_if_compliant( + native_object, + pass_through=pass_through, + eager_only=eager_only, + eager_or_interchange_only=eager_or_interchange_only, + series_only=series_only, + allow_series=allow_series, + version=version, ) + ) is not None: + return translated # Polars if is_native_polars(native_object): @@ -534,6 +564,18 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 raise TypeError(msg) return Version.V1.dataframe(InterchangeFrame(native_object), level="interchange") + compliant_object = plugins.from_native(native_object, version) + if compliant_object is not None: + return _translate_if_compliant( + compliant_object, + pass_through=pass_through, + eager_only=eager_only, + eager_or_interchange_only=eager_or_interchange_only, + series_only=series_only, + allow_series=allow_series, + version=version, + ) + if not pass_through: msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(native_object)}" raise TypeError(msg) diff --git a/tests/plugins_test.py b/tests/plugins_test.py new file mode 100644 index 0000000000..82fb616aa9 --- /dev/null +++ b/tests/plugins_test.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import pytest + +import narwhals as nw + + +def test_plugin() -> None: + pytest.importorskip("test_plugin") + df_native = {"a": [1, 1, 2], "b": [4, 5, 6]} + lf = nw.from_native(df_native) # type: ignore[call-overload] + assert isinstance(lf, nw.LazyFrame) + assert lf.columns == ["a", "b"] diff --git a/tests/test_plugin/__init__.py b/tests/test_plugin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_plugin/pyproject.toml b/tests/test_plugin/pyproject.toml new file mode 100644 index 0000000000..1deb3ef33b --- /dev/null +++ b/tests/test_plugin/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "test_plugin" +version = "0.1.0" + +[project.entry-points.'narwhals.plugins'] +test-plugin = 'test_plugin' diff --git a/tests/test_plugin/test_plugin/__init__.py b/tests/test_plugin/test_plugin/__init__.py new file mode 100644 index 0000000000..3845cb23f9 --- /dev/null +++ b/tests/test_plugin/test_plugin/__init__.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing_extensions import TypeIs + + from narwhals.utils import Version + from tests.test_plugin.test_plugin.dataframe import DictFrame + from tests.test_plugin.test_plugin.namespace import DictNamespace + + +def __narwhals_namespace__(version: Version) -> DictNamespace: # noqa: N807 + from tests.test_plugin.test_plugin.namespace import DictNamespace + + return DictNamespace(version=version) + + +def is_native(native_object: object) -> TypeIs[DictFrame]: + return isinstance(native_object, dict) + + +NATIVE_PACKAGE = "builtins" diff --git a/tests/test_plugin/test_plugin/dataframe.py b/tests/test_plugin/test_plugin/dataframe.py new file mode 100644 index 0000000000..74d539b2ec --- /dev/null +++ b/tests/test_plugin/test_plugin/dataframe.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from narwhals._utils import ( + Implementation, + ValidateBackendVersion, + Version, + not_implemented, +) +from narwhals.typing import CompliantLazyFrame + +if TYPE_CHECKING: + from typing_extensions import Self, TypeAlias + + from narwhals import LazyFrame # noqa: F401 + +DictFrame: TypeAlias = dict[str, list[Any]] + + +class DictLazyFrame( + CompliantLazyFrame[Any, "DictFrame", "LazyFrame[DictFrame]"], # type: ignore[type-var] + ValidateBackendVersion, +): + _implementation = Implementation.UNKNOWN + + def __init__(self, native_dataframe: DictFrame, *, version: Version) -> None: + self._native_frame: DictFrame = native_dataframe + self._version = version + + def __narwhals_lazyframe__(self) -> Self: + return self + + @property + def columns(self) -> list[str]: # pragma: no cover + return list(self._native_frame.keys()) + + _with_native = not_implemented() + + def _with_version(self, version: Version) -> Self: + return self.__class__(self._native_frame, version=version) + + # Dunders + __narwhals_namespace__ = not_implemented() + __native_namespace__ = not_implemented() + + # Properties + schema = not_implemented() # type: ignore[assignment] + + # Static + _is_native = not_implemented() # type: ignore[assignment] + + # Helpers + _iter_columns = not_implemented() + + # Functions + aggregate = not_implemented() + collect = not_implemented() + collect_schema = not_implemented() + drop = not_implemented() + drop_nulls = not_implemented() + explode = not_implemented() + filter = not_implemented() + from_native = not_implemented() + group_by = not_implemented() + head = not_implemented() + join = not_implemented() + join_asof = not_implemented() + rename = not_implemented() + select = not_implemented() + simple_select = not_implemented() + sink_parquet = not_implemented() + sort = not_implemented() + tail = not_implemented() + to_narwhals = not_implemented() + unique = not_implemented() + unpivot = not_implemented() + with_columns = not_implemented() + with_row_index = not_implemented() diff --git a/tests/test_plugin/test_plugin/namespace.py b/tests/test_plugin/test_plugin/namespace.py new file mode 100644 index 0000000000..3ab1a04443 --- /dev/null +++ b/tests/test_plugin/test_plugin/namespace.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from narwhals._compliant import CompliantNamespace +from narwhals._utils import not_implemented +from tests.test_plugin.test_plugin.dataframe import DictFrame, DictLazyFrame + +if TYPE_CHECKING: + from narwhals.utils import Version + + +class DictNamespace(CompliantNamespace[DictLazyFrame, Any]): + def __init__(self, *, version: Version) -> None: + self._version = version + + def from_native(self, native_object: DictFrame) -> DictLazyFrame: + return DictLazyFrame(native_object, version=self._version) + + is_native: Any = not_implemented() + _expr: Any = not_implemented() + _implementation: Any = not_implemented() + len: Any = not_implemented() + lit: Any = not_implemented() + all_horizontal: Any = not_implemented() + any_horizontal: Any = not_implemented() + sum_horizontal: Any = not_implemented() + mean_horizontal: Any = not_implemented() + min_horizontal: Any = not_implemented() + max_horizontal: Any = not_implemented() + concat: Any = not_implemented() + when: Any = not_implemented() + concat_str: Any = not_implemented() + selectors: Any = not_implemented() + coalesce: Any = not_implemented() diff --git a/tests/test_plugin/test_plugin/py.typed b/tests/test_plugin/test_plugin/py.typed new file mode 100644 index 0000000000..e69de29bb2