feat: Add support for quantile and ewm_mean in over context (#2774)

FBruzzesi · MarcoGorelli · web-flow · commit ff792be886ba · 2025-07-03T14:11:55.000+01:00
---------

Co-authored-by: Marco Edward Gorelli &lt;33491632+MarcoGorelli@users.noreply.github.com&gt;
diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py
@@ -740,8 +740,7 @@ def quantile(
         return self._reuse_series(
             "quantile",
             returns_scalar=True,
-            quantile=quantile,
-            interpolation=interpolation,
+            scalar_kwargs={"quantile": quantile, "interpolation": interpolation},
         )
 
     def head(self, n: int) -> Self:
diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py
@@ -21,19 +21,33 @@
     from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
     from narwhals._compliant.series import CompliantSeries, EagerSeries
     from narwhals._compliant.window import WindowInputs
-    from narwhals.typing import FillNullStrategy, NativeFrame, NativeSeries, RankMethod
+    from narwhals.typing import (
+        FillNullStrategy,
+        NativeFrame,
+        NativeSeries,
+        RankMethod,
+        RollingInterpolationMethod,
+    )
 
     class ScalarKwargs(TypedDict, total=False):
         """Non-expressifiable args which we may need to reuse in `agg` or `over`."""
 
+        adjust: bool
+        alpha: float | None
         center: int
+        com: float | None
         ddof: int
         descending: bool
+        half_life: float | None
+        ignore_nulls: bool
+        interpolation: RollingInterpolationMethod
         limit: int | None
         method: RankMethod
         min_samples: int
         n: int
+        quantile: float
         reverse: bool
+        span: float | None
         strategy: FillNullStrategy | None
         window_size: int
 
@@ -157,7 +171,17 @@ class ScalarKwargs(TypedDict, total=False):
 """A function evaluated with `over(partition_by=..., order_by=...)`."""
 
 NarwhalsAggregation: TypeAlias = Literal[
-    "sum", "mean", "median", "max", "min", "std", "var", "len", "n_unique", "count"
+    "sum",
+    "mean",
+    "median",
+    "max",
+    "min",
+    "std",
+    "var",
+    "len",
+    "n_unique",
+    "count",
+    "quantile",
 ]
 """`Expr` methods we aim to support in `DepthTrackingGroupBy`.
 
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
@@ -64,6 +64,7 @@ class DaskLazyGroupBy(DepthTrackingGroupBy["DaskLazyFrame", "DaskExpr", Aggregat
         "len": "size",
         "n_unique": n_unique,
         "count": "count",
+        "quantile": "quantile",
     }
 
     def __init__(
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
@@ -37,6 +37,8 @@
     "rank": "rank",
     "diff": "diff",
     "fill_null": "fillna",
+    "quantile": "quantile",
+    "ewm_mean": "mean",
 }
 
 
@@ -74,6 +76,31 @@ def window_kwargs_to_pandas_equivalent(
         assert "strategy" in kwargs  # noqa: S101
         assert "limit" in kwargs  # noqa: S101
         pandas_kwargs = {"strategy": kwargs["strategy"], "limit": kwargs["limit"]}
+    elif function_name == "quantile":
+        assert "quantile" in kwargs  # noqa: S101
+        assert "interpolation" in kwargs  # noqa: S101
+        pandas_kwargs = {
+            "q": kwargs["quantile"],
+            "interpolation": kwargs["interpolation"],
+        }
+    elif function_name.startswith("ewm_"):
+        assert "com" in kwargs  # noqa: S101
+        assert "span" in kwargs  # noqa: S101
+        assert "half_life" in kwargs  # noqa: S101
+        assert "alpha" in kwargs  # noqa: S101
+        assert "adjust" in kwargs  # noqa: S101
+        assert "min_samples" in kwargs  # noqa: S101
+        assert "ignore_nulls" in kwargs  # noqa: S101
+
+        pandas_kwargs = {
+            "com": kwargs["com"],
+            "span": kwargs["span"],
+            "halflife": kwargs["half_life"],
+            "alpha": kwargs["alpha"],
+            "adjust": kwargs["adjust"],
+            "min_periods": kwargs["min_samples"],
+            "ignore_na": kwargs["ignore_nulls"],
+        }
     else:  # sum, len, ...
         pandas_kwargs = {}
     return pandas_kwargs
@@ -182,13 +209,15 @@ def ewm_mean(
     ) -> Self:
         return self._reuse_series(
             "ewm_mean",
-            com=com,
-            span=span,
-            half_life=half_life,
-            alpha=alpha,
-            adjust=adjust,
-            min_samples=min_samples,
-            ignore_nulls=ignore_nulls,
+            scalar_kwargs={
+                "com": com,
+                "span": span,
+                "half_life": half_life,
+                "alpha": alpha,
+                "adjust": adjust,
+                "min_samples": min_samples,
+                "ignore_nulls": ignore_nulls,
+            },
         )
 
     def over(  # noqa: C901, PLR0915
@@ -232,7 +261,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
                 function_name, self._scalar_kwargs
             )
 
-            def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:  # noqa: C901, PLR0912
+            def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:  # noqa: C901, PLR0912, PLR0914, PLR0915
                 output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
                 if function_name == "cum_count":
                     plx = self.__narwhals_namespace__()
@@ -268,6 +297,18 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:  # noqa: C901,
                         )
                     else:
                         res_native = getattr(rolling, pandas_function_name)()
+                elif function_name.startswith("ewm"):
+                    if self._implementation.is_pandas() and (
+                        backend_version := self._backend_version
+                    ) < (1, 2):  # pragma: no cover
+                        msg = (
+                            "Exponentially weighted calculation is not available in over "
+                            f"context for pandas versions older than 1.2.0, found {backend_version}."
+                        )
+                        raise NotImplementedError(msg)
+                    ewm = grouped[list(output_names)].ewm(**pandas_kwargs)
+                    assert pandas_function_name is not None  # help mypy  # noqa: S101
+                    res_native = getattr(ewm, pandas_function_name)()
                 elif function_name == "fill_null":
                     assert "strategy" in self._scalar_kwargs  # noqa: S101
                     assert "limit" in self._scalar_kwargs  # noqa: S101
diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
@@ -29,6 +29,7 @@ class PandasLikeGroupBy(EagerGroupBy["PandasLikeDataFrame", "PandasLikeExpr", st
         "len": "size",
         "n_unique": "nunique",
         "count": "count",
+        "quantile": "quantile",
     }
 
     def __init__(
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
@@ -432,3 +432,55 @@ def test_len_over_2369(constructor: Constructor, request: pytest.FixtureRequest)
     result = df.with_columns(a_len_per_group=nw.len().over("b")).sort("a")
     expected = {"a": [1, 2, 4], "b": ["x", "x", "y"], "a_len_per_group": [2, 2, 1]}
     assert_equal_data(result, expected)
+
+
+def test_over_quantile(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+    if "pyarrow_table" in str(constructor) or "pyspark" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+
+    data = {"a": [1, 2, 3, 4, 5, 6], "b": ["x", "x", "x", "y", "y", "y"]}
+
+    quantile_expr = nw.col("a").quantile(quantile=0.5, interpolation="linear")
+    native_frame = constructor(data)
+
+    if "dask" in str(constructor):
+        native_frame = native_frame.repartition(npartitions=1)  # type: ignore[union-attr]
+
+    result = (
+        nw.from_native(native_frame)
+        .with_columns(
+            quantile_over_b=quantile_expr.over("b"), quantile_global=quantile_expr
+        )
+        .sort("a")
+    )
+
+    expected = {
+        **data,
+        "quantile_over_b": [2, 2, 2, 5, 5, 5],
+        "quantile_global": [3.5] * 6,
+    }
+    assert_equal_data(result, expected)
+
+
+def test_over_ewm_mean(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
+    if "pyarrow_table" in str(constructor_eager) or "modin" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager) and PANDAS_VERSION < (1, 2):
+        request.applymarker(pytest.mark.xfail(reason="too old, not implemented"))
+
+    data = {"a": [0.0, 1.0, 3.0, 5.0, 7.0, 7.5], "b": [1, 1, 1, 2, 2, 2]}
+
+    ewm_expr = nw.col("a").ewm_mean(com=1)
+    result = (
+        nw.from_native(constructor_eager(data))
+        .with_columns(ewm_over_b=ewm_expr.over("b"), ewm_global=ewm_expr)
+        .sort("a")
+    )
+    expected = {
+        **data,
+        "ewm_over_b": [0.0, 2 / 3, 2.0, 5.0, 6 + 1 / 3, 7.0],
+        "ewm_global": [0.0, 2 / 3, 2.0, 3.6, 5.354838709677419, 6.444444444444445],
+    }
+    assert_equal_data(result, expected)

Original file line number	Diff line number	Diff line change
`@@ -740,8 +740,7 @@ def quantile(`
`740`	`740`	`return self._reuse_series(`
`741`	`741`	`"quantile",`
`742`	`742`	`returns_scalar=True,`
`743`		`- quantile=quantile,`
`744`		`- interpolation=interpolation,`
	`743`	`+ scalar_kwargs={"quantile": quantile, "interpolation": interpolation},`
`745`	`744`	`)`
`746`	`745`
`747`	`746`	`def head(self, n: int) -> Self:`
Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,7 @@ class DaskLazyGroupBy(DepthTrackingGroupBy["DaskLazyFrame", "DaskExpr", Aggregat`
`64`	`64`	`"len": "size",`
`65`	`65`	`"n_unique": n_unique,`
`66`	`66`	`"count": "count",`
	`67`	`+ "quantile": "quantile",`
`67`	`68`	`}`
`68`	`69`
`69`	`70`	`def __init__(`
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ class PandasLikeGroupBy(EagerGroupBy["PandasLikeDataFrame", "PandasLikeExpr", st`
`29`	`29`	`"len": "size",`
`30`	`30`	`"n_unique": "nunique",`
`31`	`31`	`"count": "count",`
	`32`	`+ "quantile": "quantile",`
`32`	`33`	`}`
`33`	`34`
`34`	`35`	`def __init__(`