Merge remote-tracking branch 'upstream/main' into from-numpy-2d-ns

dangotbanned · dangotbanned · commit 4f1b1727c054 · 2025-03-26T12:04:56.000Z
diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py
@@ -882,7 +882,6 @@ class LazyExpr(
     sample: not_implemented = not_implemented()
     map_batches: not_implemented = not_implemented()
     ewm_mean: not_implemented = not_implemented()
-    rolling_mean: not_implemented = not_implemented()
     rolling_var: not_implemented = not_implemented()
     rolling_std: not_implemented = not_implemented()
     gather_every: not_implemented = not_implemented()
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -392,6 +392,16 @@ def rolling_sum(
             "rolling_sum",
         )
 
+    def rolling_mean(
+        self: Self, window_size: int, *, min_samples: int, center: bool
+    ) -> Self:
+        return self._from_call(
+            lambda _input: _input.rolling(
+                window=window_size, min_periods=min_samples, center=center
+            ).mean(),
+            "rolling_mean",
+        )
+
     def sum(self: Self) -> Self:
         return self._from_call(lambda _input: _input.sum().to_series(), "sum")
 
diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py
@@ -98,6 +98,35 @@ def func(window_inputs: WindowInputs) -> duckdb.Expression:
 
         return func
 
+    def _rolling_window_func(
+        self,
+        *,
+        func_name: Literal["sum", "mean", "std", "var"],
+        center: bool,
+        window_size: int,
+        min_samples: int,
+    ) -> WindowFunction:
+        if center:
+            half = (window_size - 1) // 2
+            remainder = (window_size - 1) % 2
+            start = f"{half + remainder} preceding"
+            end = f"{half} following"
+        else:
+            start = f"{window_size - 1} preceding"
+            end = "current row"
+
+        def func(window_inputs: WindowInputs) -> duckdb.Expression:
+            order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True)
+            partition_by_sql = generate_partition_by_sql(*window_inputs.partition_by)
+            window = f"({partition_by_sql} {order_by_sql} rows between {start} and {end})"
+            sql = (
+                f"case when count({window_inputs.expr}) over {window} >= {min_samples}"
+                f"then {func_name}({window_inputs.expr}) over {window} else null end"
+            )
+            return SQLExpression(sql)  # type: ignore[no-any-return, unused-ignore]
+
+        return func
+
     def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
         if kind is ExprKind.LITERAL:
             return self
@@ -546,26 +575,24 @@ def cum_prod(self, *, reverse: bool) -> Self:
         )
 
     def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
-        if center:
-            half = (window_size - 1) // 2
-            remainder = (window_size - 1) % 2
-            start = f"{half + remainder} preceding"
-            end = f"{half} following"
-        else:
-            start = f"{window_size - 1} preceding"
-            end = "current row"
-
-        def func(window_inputs: WindowInputs) -> duckdb.Expression:
-            order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True)
-            partition_by_sql = generate_partition_by_sql(*window_inputs.partition_by)
-            window = f"({partition_by_sql} {order_by_sql} rows between {start} and {end})"
-            sql = (
-                f"case when count({window_inputs.expr}) over {window} >= {min_samples}"
-                f"then sum({window_inputs.expr}) over {window} else null end"
+        return self._with_window_function(
+            self._rolling_window_func(
+                func_name="sum",
+                center=center,
+                window_size=window_size,
+                min_samples=min_samples,
             )
-            return SQLExpression(sql)  # type: ignore[no-any-return, unused-ignore]
+        )
 
-        return self._with_window_function(func)
+    def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+        return self._with_window_function(
+            self._rolling_window_func(
+                func_name="mean",
+                center=center,
+                window_size=window_size,
+                min_samples=min_samples,
+            )
+        )
 
     def fill_null(
         self: Self, value: Self | Any, strategy: Any, limit: int | None
diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py
@@ -159,6 +159,39 @@ def func(window_inputs: WindowInputs) -> Column:
 
         return func
 
+    def _rolling_window_func(
+        self,
+        *,
+        func_name: Literal["sum", "mean", "std", "var"],
+        center: bool,
+        window_size: int,
+        min_samples: int,
+    ) -> WindowFunction:
+        if center:
+            half = (window_size - 1) // 2
+            remainder = (window_size - 1) % 2
+            start = self._Window().currentRow - half - remainder
+            end = self._Window().currentRow + half
+        else:
+            start = self._Window().currentRow - window_size + 1
+            end = self._Window().currentRow
+
+        def func(window_inputs: WindowInputs) -> Column:
+            window = (
+                self._Window()
+                .partitionBy(list(window_inputs.partition_by))
+                .orderBy(
+                    [self._F.col(x).asc_nulls_first() for x in window_inputs.order_by]
+                )
+                .rowsBetween(start, end)
+            )
+            return self._F.when(
+                self._F.count(window_inputs.expr).over(window) >= min_samples,
+                getattr(self._F, func_name)(window_inputs.expr).over(window),
+            )
+
+        return func
+
     @classmethod
     def from_column_names(
         cls: type[Self],
@@ -623,30 +656,24 @@ def _fill_null(_input: Column, value: Column) -> Column:
         return self._from_call(_fill_null, value=value)
 
     def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
-        if center:
-            half = (window_size - 1) // 2
-            remainder = (window_size - 1) % 2
-            start = self._Window().currentRow - half - remainder
-            end = self._Window().currentRow + half
-        else:
-            start = self._Window().currentRow - window_size + 1
-            end = self._Window().currentRow
-
-        def func(window_inputs: WindowInputs) -> Column:
-            window = (
-                self._Window()
-                .partitionBy(list(window_inputs.partition_by))
-                .orderBy(
-                    [self._F.col(x).asc_nulls_first() for x in window_inputs.order_by]
-                )
-                .rowsBetween(start, end)
-            )
-            return self._F.when(
-                self._F.count(window_inputs.expr).over(window) >= min_samples,
-                self._F.sum(window_inputs.expr).over(window),
+        return self._with_window_function(
+            self._rolling_window_func(
+                func_name="sum",
+                center=center,
+                window_size=window_size,
+                min_samples=min_samples,
             )
+        )
 
-        return self._with_window_function(func)
+    def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+        return self._with_window_function(
+            self._rolling_window_func(
+                func_name="mean",
+                center=center,
+                window_size=window_size,
+                min_samples=min_samples,
+            )
+        )
 
     @property
     def str(self: Self) -> SparkLikeExprStringNamespace:
diff --git a/tests/expr_and_series/rolling_mean_test.py b/tests/expr_and_series/rolling_mean_test.py
@@ -10,7 +10,10 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
+from tests.utils import DUCKDB_VERSION
 from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import Constructor
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
@@ -95,3 +98,110 @@ def test_rolling_mean_hypothesis(center: bool, values: list[float]) -> None:  #
     )
     expected_dict = nw.from_native(expected, eager_only=True).to_dict(as_series=False)
     assert_equal_data(result, expected_dict)
+
+
+@pytest.mark.filterwarnings(
+    "ignore:`Expr.rolling_mean` is being called from the stable API although considered an unstable feature."
+)
+@pytest.mark.parametrize(
+    ("expected_a", "window_size", "min_samples", "center"),
+    [
+        ([None, None, 1.5, None, None, 5, 8.5], 2, None, False),
+        ([None, None, 1.5, None, None, 5, 8.5], 2, 2, False),
+        ([None, None, 1.5, 1.5, None, 5, 7.0], 3, 2, False),
+        ([1, None, 1.5, 1.5, 4, 5, 7], 3, 1, False),
+        ([1.5, 1, 1.5, 2, 5, 7, 8.5], 3, 1, True),
+        ([1.5, 1, 1.5, 1.5, 5, 7, 7], 4, 1, True),
+        ([1.5, 1.5, 1.5, 1.5, 7, 7, 7], 5, 1, True),
+    ],
+)
+def test_rolling_mean_expr_lazy_grouped(
+    constructor: Constructor,
+    expected_a: list[float],
+    window_size: int,
+    min_samples: int,
+    request: pytest.FixtureRequest,
+    *,
+    center: bool,
+) -> None:
+    if ("polars" in str(constructor) and POLARS_VERSION < (1, 10)) or (
+        "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
+    ):
+        pytest.skip()
+    if "pandas" in str(constructor):
+        pytest.skip()
+    if any(x in str(constructor) for x in ("dask", "pyarrow_table")):
+        request.applymarker(pytest.mark.xfail)
+    if "cudf" in str(constructor) and center:
+        # center is not implemented for offset-based windows
+        request.applymarker(pytest.mark.xfail)
+    if "modin" in str(constructor):
+        # unreliable
+        pytest.skip()
+    data = {
+        "a": [1, None, 2, None, 4, 6, 11],
+        "g": [1, 1, 1, 1, 2, 2, 2],
+        "b": [1, None, 2, 3, 4, 5, 6],
+        "i": list(range(7)),
+    }
+    df = nw.from_native(constructor(data))
+    result = (
+        df.with_columns(
+            nw.col("a")
+            .rolling_mean(window_size, min_samples=min_samples, center=center)
+            .over("g", order_by="b")
+        )
+        .sort("i")
+        .select("a")
+    )
+    expected = {"a": expected_a}
+    assert_equal_data(result, expected)
+
+
+@pytest.mark.filterwarnings(
+    "ignore:`Expr.rolling_mean` is being called from the stable API although considered an unstable feature."
+)
+@pytest.mark.parametrize(
+    ("expected_a", "window_size", "min_samples", "center"),
+    [
+        ([None, None, 1.5, None, None, 5, 8.5], 2, None, False),
+        ([None, None, 1.5, None, None, 5, 8.5], 2, 2, False),
+        ([None, None, 1.5, 1.5, 3, 5, 7], 3, 2, False),
+        ([1, None, 1.5, 1.5, 3, 5, 7], 3, 1, False),
+        ([1.5, 1, 1.5, 3, 5, 7, 8.5], 3, 1, True),
+        ([1.5, 1, 1.5, 2.3333333333333335, 4, 7, 7], 4, 1, True),
+        ([1.5, 1.5, 2.3333333333333335, 3.25, 5.75, 7.0, 7.0], 5, 1, True),
+    ],
+)
+def test_rolling_mean_expr_lazy_ungrouped(
+    constructor: Constructor,
+    expected_a: list[float],
+    window_size: int,
+    min_samples: int,
+    *,
+    center: bool,
+) -> None:
+    if ("polars" in str(constructor) and POLARS_VERSION < (1, 10)) or (
+        "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
+    ):
+        pytest.skip()
+    if "modin" in str(constructor):
+        # unreliable
+        pytest.skip()
+    data = {
+        "a": [1, None, 2, None, 4, 6, 11],
+        "b": [1, None, 2, 3, 4, 5, 6],
+        "i": list(range(7)),
+    }
+    df = nw.from_native(constructor(data))
+    result = (
+        df.with_columns(
+            nw.col("a")
+            .rolling_mean(window_size, min_samples=min_samples, center=center)
+            .over(order_by="b")
+        )
+        .select("a", "i")
+        .sort("i")
+    )
+    expected = {"a": expected_a, "i": list(range(7))}
+    assert_equal_data(result, expected)