diff --git a/narwhals/__init__.py b/narwhals/__init__.py index a910f0e0e4..6854b86763 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -13,6 +13,11 @@ maybe_reset_index, maybe_set_index, ) +from narwhals.api import ( + register_dataframe_namespace, + register_expr_namespace, + register_lazyframe_namespace, +) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import ( Array, @@ -161,6 +166,9 @@ "nth", "read_csv", "read_parquet", + "register_dataframe_namespace", + "register_expr_namespace", + "register_lazyframe_namespace", "scan_csv", "scan_parquet", "selectors", diff --git a/narwhals/_reexport.py b/narwhals/_reexport.py new file mode 100644 index 0000000000..42d7fe471e --- /dev/null +++ b/narwhals/_reexport.py @@ -0,0 +1,8 @@ +"""Re-export Narwhals functionality to avoid cyclical imports.""" +from __future__ import annotations + +from narwhals.dataframe import DataFrame, LazyFrame +from narwhals.expr import Expr +from narwhals.series import Series + +__all__ = ["DataFrame", "Expr", "LazyFrame", "Series"] diff --git a/narwhals/api.py b/narwhals/api.py new file mode 100644 index 0000000000..fc1bf6fec1 --- /dev/null +++ b/narwhals/api.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, Generic, TypeVar +from warnings import warn + +import narwhals._reexport as nw +from narwhals._utils import find_stacklevel + +if TYPE_CHECKING: + from narwhals import DataFrame, Expr, LazyFrame, Series + + +__all__ = [ + "register_dataframe_namespace", + "register_expr_namespace", + "register_lazyframe_namespace", + "register_series_namespace", +] + +# do not allow override of Narwhals' own namespaces (as registered by '_accessors') +_reserved_namespaces: set[str] = set.union( + *(cls._accessors for cls in (nw.DataFrame, nw.Expr, nw.LazyFrame, nw.Series)) +) + + +NS = TypeVar("NS") + + +class NameSpace(Generic[NS]): + """Establish property-like namespace object for user-defined functionality.""" + + def __init__(self, name: str, namespace: type[NS]) -> None: + self._accessor = name + self._ns = namespace + + def __get__(self, instance: NS | None, cls: type[NS]) -> NS | type[NS]: + if instance is None: + return self._ns + + ns_instance = self._ns(instance) # type: ignore[call-arg] + setattr(instance, self._accessor, ns_instance) + return ns_instance + + +def _create_namespace( + name: str, cls: type[Expr | DataFrame | LazyFrame | Series] +) -> Callable[[type[NS]], type[NS]]: + """Register custom namespace against the underlying Narwhals class.""" + + def namespace(ns_class: type[NS]) -> type[NS]: + if name in _reserved_namespaces: + msg = f"cannot override reserved namespace {name!r}" + raise AttributeError(msg) + if hasattr(cls, name): + warn( + f"Overriding existing custom namespace {name!r} (on {cls.__name__!r})", + UserWarning, + stacklevel=find_stacklevel(), + ) + + setattr(cls, name, NameSpace(name, ns_class)) + cls._accessors.add(name) + return ns_class + + return namespace + + +def register_expr_namespace(name: str) -> Callable[[type[NS]], type[NS]]: + """Decorator for registering custom functionality with a Narwhals Expr. + + Parameters + ---------- + name + Name under which the functionality will be accessed. + + See Also: + -------- + register_dataframe_namespace : Register functionality on a DataFrame. + register_lazyframe_namespace : Register functionality on a LazyFrame. + register_series_namespace : Register functionality on a Series. + + Examples: + -------- + >>> @nw.api.register_expr_namespace("pow_n") + ... class PowersOfN: + ... def __init__(self, expr: nw.Expr) -> None: + ... self._expr = expr + ... + ... def next(self, p: int) -> nw.Expr: + ... return (p ** (self._expr.log(p).ceil()).cast(nw.Int64)).cast(nw.Int64) + ... + ... def previous(self, p: int) -> nw.Expr: + ... return (p ** (self._expr.log(p).floor()).cast(nw.Int64)).cast(nw.Int64) + ... + ... def nearest(self, p: int) -> nw.Expr: + ... return (p ** (self._expr.log(p)).round(0).cast(nw.Int64)).cast(nw.Int64) + >>> + >>> df = nw.DataFrame([1.4, 24.3, 55.0, 64.001], schema=["n"]) + >>> df.select( + ... nw.col("n"), + ... nw.col("n").pow_n.next(p=2).alias("next_pow2"), + ... nw.col("n").pow_n.previous(p=2).alias("prev_pow2"), + ... nw.col("n").pow_n.nearest(p=2).alias("nearest_pow2"), + ... ) + shape: (4, 4) + ┌────────┬───────────┬───────────┬──────────────┐ + │ n ┆ next_pow2 ┆ prev_pow2 ┆ nearest_pow2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ f64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════╪═══════════╪═══════════╪══════════════╡ + │ 1.4 ┆ 2 ┆ 1 ┆ 1 │ + │ 24.3 ┆ 32 ┆ 16 ┆ 32 │ + │ 55.0 ┆ 64 ┆ 32 ┆ 64 │ + │ 64.001 ┆ 128 ┆ 64 ┆ 64 │ + └────────┴───────────┴───────────┴──────────────┘ + """ + return _create_namespace(name, nw.Expr) + + +def register_dataframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]: + """Decorator for registering custom functionality with a Narwhals DataFrame. + + Parameters + ---------- + name + Name under which the functionality will be accessed. + + See Also: + -------- + register_expr_namespace : Register functionality on an Expr. + register_lazyframe_namespace : Register functionality on a LazyFrame. + register_series_namespace : Register functionality on a Series. + + Examples: + -------- + >>> @nw.api.register_dataframe_namespace("split") + ... class SplitFrame: + ... def __init__(self, df: nw.DataFrame) -> None: + ... self._df = df + ... + ... def by_first_letter_of_column_names(self) -> list[nw.DataFrame]: + ... return [ + ... self._df.select([col for col in self._df.columns if col[0] == f]) + ... for f in dict.fromkeys(col[0] for col in self._df.columns) + ... ] + ... + ... def by_first_letter_of_column_values(self, col: str) -> list[nw.DataFrame]: + ... return [ + ... self._df.filter(nw.col(col).str.starts_with(c)) + ... for c in sorted( + ... set(df.select(nw.col(col).str.slice(0, 1)).to_series()) + ... ) + ... ] + >>> + >>> df = nw.DataFrame( + ... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]], + ... schema=["a1", "a2", "b1", "b2"], + ... orient="row", + ... ) + >>> df + shape: (4, 4) + ┌─────┬─────┬─────┬─────┐ + │ a1 ┆ a2 ┆ b1 ┆ b2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═════╡ + │ xx ┆ 2 ┆ 3 ┆ 4 │ + │ xy ┆ 4 ┆ 5 ┆ 6 │ + │ yy ┆ 5 ┆ 6 ┆ 7 │ + │ yz ┆ 6 ┆ 7 ┆ 8 │ + └─────┴─────┴─────┴─────┘ + >>> df.split.by_first_letter_of_column_names() + [shape: (4, 2) + ┌─────┬─────┐ + │ a1 ┆ a2 │ + │ --- ┆ --- │ + │ str ┆ i64 │ + ╞═════╪═════╡ + │ xx ┆ 2 │ + │ xy ┆ 4 │ + │ yy ┆ 5 │ + │ yz ┆ 6 │ + └─────┴─────┘, + shape: (4, 2) + ┌─────┬─────┐ + │ b1 ┆ b2 │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 3 ┆ 4 │ + │ 5 ┆ 6 │ + │ 6 ┆ 7 │ + │ 7 ┆ 8 │ + └─────┴─────┘] + >>> df.split.by_first_letter_of_column_values("a1") + [shape: (2, 4) + ┌─────┬─────┬─────┬─────┐ + │ a1 ┆ a2 ┆ b1 ┆ b2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═════╡ + │ xx ┆ 2 ┆ 3 ┆ 4 │ + │ xy ┆ 4 ┆ 5 ┆ 6 │ + └─────┴─────┴─────┴─────┘, shape: (2, 4) + ┌─────┬─────┬─────┬─────┐ + │ a1 ┆ a2 ┆ b1 ┆ b2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═════╡ + │ yy ┆ 5 ┆ 6 ┆ 7 │ + │ yz ┆ 6 ┆ 7 ┆ 8 │ + └─────┴─────┴─────┴─────┘] + """ + return _create_namespace(name, nw.DataFrame) + + +def register_lazyframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]: + """Decorator for registering custom functionality with a Narwhals LazyFrame. + + Parameters + ---------- + name + Name under which the functionality will be accessed. + + See Also: + -------- + register_expr_namespace : Register functionality on an Expr. + register_dataframe_namespace : Register functionality on a DataFrame. + register_series_namespace : Register functionality on a Series. + + Examples: + -------- + >>> @nw.api.register_lazyframe_namespace("types") + ... class DTypeOperations: + ... def __init__(self, lf: nw.LazyFrame) -> None: + ... self._lf = lf + ... + ... def split_by_column_dtypes(self) -> list[nw.LazyFrame]: + ... return [ + ... self._lf.select(nw.col(tp)) + ... for tp in dict.fromkeys(self._lf.collect_schema().dtypes()) + ... ] + ... + ... def upcast_integer_types(self) -> nw.LazyFrame: + ... return self._lf.with_columns( + ... nw.col(tp).cast(nw.Int64) for tp in (nw.Int8, nw.Int16, nw.Int32) + ... ) + >>> + >>> lf = nw.LazyFrame( + ... data={"a": [1, 2], "b": [3, 4], "c": [5.6, 6.7]}, + ... schema=[("a", nw.Int16), ("b", nw.Int32), ("c", nw.Float32)], + ... ) + >>> lf.collect() + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i16 ┆ i32 ┆ f32 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 3 ┆ 5.6 │ + │ 2 ┆ 4 ┆ 6.7 │ + └─────┴─────┴─────┘ + >>> lf.types.upcast_integer_types().collect() + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ f32 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 3 ┆ 5.6 │ + │ 2 ┆ 4 ┆ 6.7 │ + └─────┴─────┴─────┘ + + >>> lf = nw.LazyFrame( + ... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]], + ... schema=["a1", "a2", "b1", "b2"], + ... orient="row", + ... ) + >>> lf.collect() + shape: (4, 4) + ┌─────┬─────┬─────┬─────┐ + │ a1 ┆ a2 ┆ b1 ┆ b2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═════╡ + │ xx ┆ 2 ┆ 3 ┆ 4 │ + │ xy ┆ 4 ┆ 5 ┆ 6 │ + │ yy ┆ 5 ┆ 6 ┆ 7 │ + │ yz ┆ 6 ┆ 7 ┆ 8 │ + └─────┴─────┴─────┴─────┘ + >>> nw.collect_all(lf.types.split_by_column_dtypes()) + [shape: (4, 1) + ┌─────┐ + │ a1 │ + │ --- │ + │ str │ + ╞═════╡ + │ xx │ + │ xy │ + │ yy │ + │ yz │ + └─────┘, shape: (4, 3) + ┌─────┬─────┬─────┐ + │ a2 ┆ b1 ┆ b2 │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ 2 ┆ 3 ┆ 4 │ + │ 4 ┆ 5 ┆ 6 │ + │ 5 ┆ 6 ┆ 7 │ + │ 6 ┆ 7 ┆ 8 │ + └─────┴─────┴─────┘] + """ + return _create_namespace(name, nw.LazyFrame) + + +def register_series_namespace(name: str) -> Callable[[type[NS]], type[NS]]: + """Decorator for registering custom functionality with a Narwhals Series. + + Parameters + ---------- + name + Name under which the functionality will be accessed. + + See Also: + -------- + register_expr_namespace : Register functionality on an Expr. + register_dataframe_namespace : Register functionality on a DataFrame. + register_lazyframe_namespace : Register functionality on a LazyFrame. + + Examples: + -------- + >>> @nw.api.register_series_namespace("math") + ... class MathShortcuts: + ... def __init__(self, s: nw.Series) -> None: + ... self._s = s + ... + ... def square(self) -> nw.Series: + ... return self._s * self._s + ... + ... def cube(self) -> nw.Series: + ... return self._s * self._s * self._s + >>> + >>> s = nw.Series("n", [1.5, 31.0, 42.0, 64.5]) + >>> s.math.square().alias("s^2") + shape: (4,) + Series: 's^2' [f64] + [ + 2.25 + 961.0 + 1764.0 + 4160.25 + ] + >>> s = nw.Series("n", [1, 2, 3, 4, 5]) + >>> s.math.cube().alias("s^3") + shape: (5,) + Series: 's^3' [i64] + [ + 1 + 8 + 27 + 64 + 125 + ] + """ + return _create_namespace(name, nw.Series) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 303ff0636d..e8b13f2036 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Callable, + ClassVar, Generic, Literal, NoReturn, @@ -425,6 +426,8 @@ class DataFrame(BaseFrame[DataFrameT]): ``` """ + _accessors: ClassVar[set[str]] = set() + def _extract_compliant(self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series @@ -2181,6 +2184,8 @@ class LazyFrame(BaseFrame[FrameT]): ``` """ + _accessors: ClassVar[set[str]] = set() + def _extract_compliant(self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series diff --git a/narwhals/expr.py b/narwhals/expr.py index b5e01a3919..3bdf12212b 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2,7 +2,7 @@ import math from collections.abc import Iterable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, ClassVar from narwhals._expression_parsing import ( ExprMetadata, @@ -53,6 +53,8 @@ class Expr: + _accessors: ClassVar[set[str]] = {"cat", "dt", "list", "name", "str", "struct"} + def __init__(self, to_compliant_expr: _ToCompliant, metadata: ExprMetadata) -> None: # callable from CompliantNamespace to CompliantExpr def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]: diff --git a/narwhals/series.py b/narwhals/series.py index 147f7794ce..acc2ff9df9 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2,7 +2,7 @@ import math from collections.abc import Iterator, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, overload +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Generic, Literal, overload from narwhals._utils import ( _validate_rolling_arguments, @@ -71,6 +71,8 @@ class Series(Generic[IntoSeriesT]): ``` """ + _accessors: ClassVar[set[str]] = {"cat", "dt", "list", "str", "struct"} + @property def _dataframe(self) -> type[DataFrame[Any]]: from narwhals.dataframe import DataFrame