-
Notifications
You must be signed in to change notification settings - Fork 168
Description
Background
Currently we raise an AssertionError in many places Implementation.UNKNOWN could appear:
Implementation.(to_native_namespace|_backend_version)
Lines 363 to 371 in 4d73399
| def to_native_namespace(self) -> ModuleType: | |
| """Return the native namespace module corresponding to Implementation. | |
| Returns: | |
| Native module. | |
| """ | |
| if self is Implementation.UNKNOWN: | |
| msg = "Cannot return native namespace from UNKNOWN Implementation" | |
| raise AssertionError(msg) |
Lines 624 to 633 in 4d73399
| # NOTE: We can safely use an unbounded cache, the size is constrained by `len(Implementation._member_names_)` | |
| # Faster than `lru_cache` | |
| # https://docs.python.org/3/library/functools.html#functools.cache | |
| @cache | |
| def backend_version(implementation: Implementation, /) -> tuple[int, ...]: | |
| if not isinstance(implementation, Implementation): | |
| assert_never(implementation) | |
| if implementation is Implementation.UNKNOWN: # pragma: no cover | |
| msg = "Cannot return backend version from UNKNOWN Implementation" | |
| raise AssertionError(msg) |
However, this seems to be in conflict with how functions.py depends on Implementation methods:
narwhals.functions.py
narwhals/narwhals/functions.py
Lines 229 to 247 in 4d73399
| def _new_series_impl( | |
| name: str, | |
| values: Any, | |
| dtype: IntoDType | None = None, | |
| *, | |
| backend: ModuleType | Implementation | str, | |
| ) -> Series[Any]: | |
| implementation = Implementation.from_backend(backend) | |
| if is_eager_allowed(implementation): | |
| ns = Version.MAIN.namespace.from_backend(implementation).compliant | |
| series = ns._series.from_iterable(values, name=name, context=ns, dtype=dtype) | |
| return series.to_narwhals() | |
| elif implementation is Implementation.UNKNOWN: # pragma: no cover | |
| _native_namespace = implementation.to_native_namespace() | |
| try: | |
| native_series: NativeSeries = _native_namespace.new_series( | |
| name, values, dtype | |
| ) | |
| return from_native(native_series, series_only=True).alias(name) |
narwhals/narwhals/functions.py
Lines 322 to 327 in 4d73399
| elif implementation is Implementation.UNKNOWN: # pragma: no cover | |
| _native_namespace = implementation.to_native_namespace() | |
| try: | |
| # implementation is UNKNOWN, Narwhals extension using this feature should | |
| # implement `from_dict` function in the top-level namespace. | |
| native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema) |
narwhals/narwhals/functions.py
Lines 428 to 433 in 4d73399
| elif implementation is Implementation.UNKNOWN: # pragma: no cover | |
| _native_namespace = implementation.to_native_namespace() | |
| try: | |
| # implementation is UNKNOWN, Narwhals extension using this feature should | |
| # implement `from_numpy` function in the top-level namespace. | |
| native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema) |
narwhals/narwhals/functions.py
Lines 513 to 519 in 4d73399
| elif implementation is Implementation.UNKNOWN: # pragma: no cover | |
| _native_namespace = implementation.to_native_namespace() | |
| try: | |
| # implementation is UNKNOWN, Narwhals extension using this feature should | |
| # implement PyCapsule support | |
| native: NativeFrame = _native_namespace.DataFrame(native_frame) | |
| except AttributeError as e: |
narwhals/narwhals/functions.py
Lines 678 to 679 in 4d73399
| eager_backend = Implementation.from_backend(backend) | |
| native_namespace = eager_backend.to_native_namespace() |
narwhals/narwhals/functions.py
Lines 754 to 755 in 4d73399
| implementation = Implementation.from_backend(backend) | |
| native_namespace = implementation.to_native_namespace() |
narwhals/narwhals/functions.py
Lines 847 to 848 in 4d73399
| implementation = Implementation.from_backend(backend) | |
| native_namespace = implementation.to_native_namespace() |
narwhals/narwhals/functions.py
Lines 952 to 953 in 4d73399
| implementation = Implementation.from_backend(backend) | |
| native_namespace = implementation.to_native_namespace() |
There's also get_native_namespace, where we transform that into a TypeError:
narwhals.translate.get_native_namespace
narwhals/narwhals/translate.py
Lines 607 to 622 in 4d73399
| def _get_native_namespace_single_obj( | |
| obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries, | |
| ) -> Any: | |
| from contextlib import suppress | |
| from narwhals._utils import has_native_namespace | |
| with suppress(TypeError, AssertionError): | |
| return Version.MAIN.namespace.from_native_object( | |
| obj | |
| ).implementation.to_native_namespace() | |
| if has_native_namespace(obj): | |
| return obj.__native_namespace__() | |
| msg = f"Could not get native namespace from object of type: {type(obj)}" | |
| raise TypeError(msg) |
Issue
I believe this prevents extending narwhals across most IO functions.
Meaning that we require an extension to always have one of these, and use from_native, to_native:
CompliantSeries
CompliantDataFrame
CompliantLazyFrameSolution(s)
I've only really started thinking about this since (#2764) so I'm open to any ideas π
Backend version
should return (0, 0, 0) for Implementation.UNKNOWN.
Any logic we have for comparing versions isn't relevant for an extension, but we might need to adjust some other places that use Mapping[Implementation, ...] unconditionally
IO functions
They're a bit trickier, as we can't accept Implementation.UNKNOWN right now.
But, if these functions were methods on CompliantNamespace - we could accept any object implementing __narwhals_namespace__.
We could even get typing working* if we added protocols for it π
SupportsNarwhalsNamespace
Just a proof-of-concept
We would still need to do some make some decisions in (#2713) on what is public in each *Namespace protocol.
The TypeVars are needed for ._dataframe, ._lazyframe, _.series - but ideally they shouldn't need to depend on our internals
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Protocol
from narwhals._compliant.typing import (
CompliantFrameT,
CompliantLazyFrameT,
EagerDataFrameT,
EagerSeriesT,
)
if TYPE_CHECKING:
from narwhals._compliant.namespace import (
CompliantNamespace,
EagerNamespace,
LazyNamespace,
)
class SupportsNarwhalsNamespace(Protocol[CompliantFrameT]):
def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Any]: ...
class SupportsLazyNamespace(
SupportsNarwhalsNamespace[CompliantLazyFrameT], Protocol[CompliantLazyFrameT]
):
def __narwhals_namespace__(self) -> LazyNamespace[CompliantLazyFrameT, Any, Any]: ...
class SupportsEagerNamespace(
SupportsNarwhalsNamespace[EagerDataFrameT], Protocol[EagerDataFrameT, EagerSeriesT]
):
def __narwhals_namespace__(
self,
) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Any, Any, Any]: ...