Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
maybe_reset_index,
maybe_set_index,
)
from narwhals.api import (
register_dataframe_namespace,
register_expr_namespace,
register_lazyframe_namespace,
)
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.dtypes import (
Array,
Expand Down Expand Up @@ -161,6 +166,9 @@
"nth",
"read_csv",
"read_parquet",
"register_dataframe_namespace",
"register_expr_namespace",
"register_lazyframe_namespace",
"scan_csv",
"scan_parquet",
"selectors",
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_reexport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Re-export Narwhals functionality to avoid cyclical imports."""
from __future__ import annotations

from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.expr import Expr
from narwhals.series import Series

__all__ = ["DataFrame", "Expr", "LazyFrame", "Series"]
366 changes: 366 additions & 0 deletions narwhals/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Generic, TypeVar
from warnings import warn

import narwhals._reexport as nw
from narwhals._utils import find_stacklevel

if TYPE_CHECKING:
from narwhals import DataFrame, Expr, LazyFrame, Series


__all__ = [
"register_dataframe_namespace",
"register_expr_namespace",
"register_lazyframe_namespace",
"register_series_namespace",
]

# do not allow override of Narwhals' own namespaces (as registered by '_accessors')
_reserved_namespaces: set[str] = set.union(
*(cls._accessors for cls in (nw.DataFrame, nw.Expr, nw.LazyFrame, nw.Series))
)


NS = TypeVar("NS")


class NameSpace(Generic[NS]):
"""Establish property-like namespace object for user-defined functionality."""

def __init__(self, name: str, namespace: type[NS]) -> None:
self._accessor = name
self._ns = namespace

def __get__(self, instance: NS | None, cls: type[NS]) -> NS | type[NS]:
if instance is None:
return self._ns

ns_instance = self._ns(instance) # type: ignore[call-arg]
setattr(instance, self._accessor, ns_instance)
return ns_instance


def _create_namespace(
name: str, cls: type[Expr | DataFrame | LazyFrame | Series]
) -> Callable[[type[NS]], type[NS]]:
"""Register custom namespace against the underlying Narwhals class."""

def namespace(ns_class: type[NS]) -> type[NS]:
if name in _reserved_namespaces:
msg = f"cannot override reserved namespace {name!r}"
raise AttributeError(msg)
if hasattr(cls, name):
warn(
f"Overriding existing custom namespace {name!r} (on {cls.__name__!r})",
UserWarning,
stacklevel=find_stacklevel(),
)

setattr(cls, name, NameSpace(name, ns_class))
cls._accessors.add(name)
return ns_class

return namespace


def register_expr_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""Decorator for registering custom functionality with a Narwhals Expr.

Parameters
----------
name
Name under which the functionality will be accessed.

See Also:
--------
register_dataframe_namespace : Register functionality on a DataFrame.
register_lazyframe_namespace : Register functionality on a LazyFrame.
register_series_namespace : Register functionality on a Series.

Examples:
--------
>>> @nw.api.register_expr_namespace("pow_n")
... class PowersOfN:
... def __init__(self, expr: nw.Expr) -> None:
... self._expr = expr
...
... def next(self, p: int) -> nw.Expr:
... return (p ** (self._expr.log(p).ceil()).cast(nw.Int64)).cast(nw.Int64)
...
... def previous(self, p: int) -> nw.Expr:
... return (p ** (self._expr.log(p).floor()).cast(nw.Int64)).cast(nw.Int64)
...
... def nearest(self, p: int) -> nw.Expr:
... return (p ** (self._expr.log(p)).round(0).cast(nw.Int64)).cast(nw.Int64)
>>>
>>> df = nw.DataFrame([1.4, 24.3, 55.0, 64.001], schema=["n"])
>>> df.select(
... nw.col("n"),
... nw.col("n").pow_n.next(p=2).alias("next_pow2"),
... nw.col("n").pow_n.previous(p=2).alias("prev_pow2"),
... nw.col("n").pow_n.nearest(p=2).alias("nearest_pow2"),
... )
shape: (4, 4)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ n ┆ next_pow2 ┆ prev_pow2 ┆ nearest_pow2 β”‚
β”‚ --- ┆ --- ┆ --- ┆ --- β”‚
β”‚ f64 ┆ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ═══════════β•ͺ══════════════║
β”‚ 1.4 ┆ 2 ┆ 1 ┆ 1 β”‚
β”‚ 24.3 ┆ 32 ┆ 16 ┆ 32 β”‚
β”‚ 55.0 ┆ 64 ┆ 32 ┆ 64 β”‚
β”‚ 64.001 ┆ 128 ┆ 64 ┆ 64 β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return _create_namespace(name, nw.Expr)


def register_dataframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""Decorator for registering custom functionality with a Narwhals DataFrame.

Parameters
----------
name
Name under which the functionality will be accessed.

See Also:
--------
register_expr_namespace : Register functionality on an Expr.
register_lazyframe_namespace : Register functionality on a LazyFrame.
register_series_namespace : Register functionality on a Series.

Examples:
--------
>>> @nw.api.register_dataframe_namespace("split")
... class SplitFrame:
... def __init__(self, df: nw.DataFrame) -> None:
... self._df = df
...
... def by_first_letter_of_column_names(self) -> list[nw.DataFrame]:
... return [
... self._df.select([col for col in self._df.columns if col[0] == f])
... for f in dict.fromkeys(col[0] for col in self._df.columns)
... ]
...
... def by_first_letter_of_column_values(self, col: str) -> list[nw.DataFrame]:
... return [
... self._df.filter(nw.col(col).str.starts_with(c))
... for c in sorted(
... set(df.select(nw.col(col).str.slice(0, 1)).to_series())
... )
... ]
>>>
>>> df = nw.DataFrame(
... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]],
... schema=["a1", "a2", "b1", "b2"],
... orient="row",
... )
>>> df
shape: (4, 4)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a1 ┆ a2 ┆ b1 ┆ b2 β”‚
β”‚ --- ┆ --- ┆ --- ┆ --- β”‚
β”‚ str ┆ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════β•ͺ═════║
β”‚ xx ┆ 2 ┆ 3 ┆ 4 β”‚
β”‚ xy ┆ 4 ┆ 5 ┆ 6 β”‚
β”‚ yy ┆ 5 ┆ 6 ┆ 7 β”‚
β”‚ yz ┆ 6 ┆ 7 ┆ 8 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
>>> df.split.by_first_letter_of_column_names()
[shape: (4, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a1 ┆ a2 β”‚
β”‚ --- ┆ --- β”‚
β”‚ str ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════║
β”‚ xx ┆ 2 β”‚
β”‚ xy ┆ 4 β”‚
β”‚ yy ┆ 5 β”‚
β”‚ yz ┆ 6 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜,
shape: (4, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ b1 ┆ b2 β”‚
β”‚ --- ┆ --- β”‚
β”‚ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════║
β”‚ 3 ┆ 4 β”‚
β”‚ 5 ┆ 6 β”‚
β”‚ 6 ┆ 7 β”‚
β”‚ 7 ┆ 8 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜]
>>> df.split.by_first_letter_of_column_values("a1")
[shape: (2, 4)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a1 ┆ a2 ┆ b1 ┆ b2 β”‚
β”‚ --- ┆ --- ┆ --- ┆ --- β”‚
β”‚ str ┆ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════β•ͺ═════║
β”‚ xx ┆ 2 ┆ 3 ┆ 4 β”‚
β”‚ xy ┆ 4 ┆ 5 ┆ 6 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜, shape: (2, 4)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a1 ┆ a2 ┆ b1 ┆ b2 β”‚
β”‚ --- ┆ --- ┆ --- ┆ --- β”‚
β”‚ str ┆ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════β•ͺ═════║
β”‚ yy ┆ 5 ┆ 6 ┆ 7 β”‚
β”‚ yz ┆ 6 ┆ 7 ┆ 8 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜]
"""
return _create_namespace(name, nw.DataFrame)


def register_lazyframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""Decorator for registering custom functionality with a Narwhals LazyFrame.

Parameters
----------
name
Name under which the functionality will be accessed.

See Also:
--------
register_expr_namespace : Register functionality on an Expr.
register_dataframe_namespace : Register functionality on a DataFrame.
register_series_namespace : Register functionality on a Series.

Examples:
--------
>>> @nw.api.register_lazyframe_namespace("types")
... class DTypeOperations:
... def __init__(self, lf: nw.LazyFrame) -> None:
... self._lf = lf
...
... def split_by_column_dtypes(self) -> list[nw.LazyFrame]:
... return [
... self._lf.select(nw.col(tp))
... for tp in dict.fromkeys(self._lf.collect_schema().dtypes())
... ]
...
... def upcast_integer_types(self) -> nw.LazyFrame:
... return self._lf.with_columns(
... nw.col(tp).cast(nw.Int64) for tp in (nw.Int8, nw.Int16, nw.Int32)
... )
>>>
>>> lf = nw.LazyFrame(
... data={"a": [1, 2], "b": [3, 4], "c": [5.6, 6.7]},
... schema=[("a", nw.Int16), ("b", nw.Int32), ("c", nw.Float32)],
... )
>>> lf.collect()
shape: (2, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a ┆ b ┆ c β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i16 ┆ i32 ┆ f32 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 1 ┆ 3 ┆ 5.6 β”‚
β”‚ 2 ┆ 4 ┆ 6.7 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
>>> lf.types.upcast_integer_types().collect()
shape: (2, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a ┆ b ┆ c β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i64 ┆ i64 ┆ f32 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 1 ┆ 3 ┆ 5.6 β”‚
β”‚ 2 ┆ 4 ┆ 6.7 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜

>>> lf = nw.LazyFrame(
... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]],
... schema=["a1", "a2", "b1", "b2"],
... orient="row",
... )
>>> lf.collect()
shape: (4, 4)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a1 ┆ a2 ┆ b1 ┆ b2 β”‚
β”‚ --- ┆ --- ┆ --- ┆ --- β”‚
β”‚ str ┆ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════β•ͺ═════║
β”‚ xx ┆ 2 ┆ 3 ┆ 4 β”‚
β”‚ xy ┆ 4 ┆ 5 ┆ 6 β”‚
β”‚ yy ┆ 5 ┆ 6 ┆ 7 β”‚
β”‚ yz ┆ 6 ┆ 7 ┆ 8 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
>>> nw.collect_all(lf.types.split_by_column_dtypes())
[shape: (4, 1)
β”Œβ”€β”€β”€β”€β”€β”
β”‚ a1 β”‚
β”‚ --- β”‚
β”‚ str β”‚
β•žβ•β•β•β•β•β•‘
β”‚ xx β”‚
β”‚ xy β”‚
β”‚ yy β”‚
β”‚ yz β”‚
β””β”€β”€β”€β”€β”€β”˜, shape: (4, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a2 ┆ b1 ┆ b2 β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i64 ┆ i64 ┆ i64 β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 2 ┆ 3 ┆ 4 β”‚
β”‚ 4 ┆ 5 ┆ 6 β”‚
β”‚ 5 ┆ 6 ┆ 7 β”‚
β”‚ 6 ┆ 7 ┆ 8 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜]
"""
return _create_namespace(name, nw.LazyFrame)


def register_series_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""Decorator for registering custom functionality with a Narwhals Series.

Parameters
----------
name
Name under which the functionality will be accessed.

See Also:
--------
register_expr_namespace : Register functionality on an Expr.
register_dataframe_namespace : Register functionality on a DataFrame.
register_lazyframe_namespace : Register functionality on a LazyFrame.

Examples:
--------
>>> @nw.api.register_series_namespace("math")
... class MathShortcuts:
... def __init__(self, s: nw.Series) -> None:
... self._s = s
...
... def square(self) -> nw.Series:
... return self._s * self._s
...
... def cube(self) -> nw.Series:
... return self._s * self._s * self._s
>>>
>>> s = nw.Series("n", [1.5, 31.0, 42.0, 64.5])
>>> s.math.square().alias("s^2")
shape: (4,)
Series: 's^2' [f64]
[
2.25
961.0
1764.0
4160.25
]
>>> s = nw.Series("n", [1, 2, 3, 4, 5])
>>> s.math.cube().alias("s^3")
shape: (5,)
Series: 's^3' [i64]
[
1
8
27
64
125
]
"""
return _create_namespace(name, nw.Series)
Loading