narwhals-dev
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/pytest.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎narwhals/_arrow/dataframe.py‎
Lines changed: 2 additions & 2 deletions b/‎narwhals/_arrow/dataframe.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎narwhals/_arrow/expr.py‎
Lines changed: 3 additions & 4 deletions b/‎narwhals/_arrow/expr.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎narwhals/_arrow/namespace.py‎
Lines changed: 11 additions & 13 deletions b/‎narwhals/_arrow/namespace.py‎
Lines changed: 11 additions & 13 deletions
diff --git a/‎narwhals/_arrow/series.py‎
Lines changed: 8 additions & 19 deletions b/‎narwhals/_arrow/series.py‎
Lines changed: 8 additions & 19 deletions
diff --git a/‎narwhals/_arrow/typing.py‎
Lines changed: 1 addition & 1 deletion b/‎narwhals/_arrow/typing.py‎
Lines changed: 1 addition & 1 deletion
@@ -53,12 +53,12 @@ jobs:
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
         # we are not testing pyspark on Windows here because it is very slow
-        run: uv pip install -e ".[tests, core, extra, dask, modin]" --system
+        run: uv pip install -e ".[tests, core, extra, dask, modin, sqlframe]" --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
         run: |
-          pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb
+          pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb,sqlframe
 
   pytest-full-coverage:
     strategy:
@@ -83,7 +83,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
-        run: uv pip install -e ".[tests, core, extra, modin, dask]" --system
+        run: uv pip install -e ".[tests, core, extra, modin, dask, sqlframe]" --system
       - name: install pyspark
         run: uv pip install -e ".[pyspark]" --system
         # PySpark is not yet available on Python3.12+
 
@@ -181,7 +181,7 @@ def get_column(self: Self, name: str) -> ArrowSeries:
             version=self._version,
         )
 
-    def __array__(self: Self, dtype: Any, copy: bool | None) -> _2DArray:
+    def __array__(self: Self, dtype: Any, *, copy: bool | None) -> _2DArray:
         return self._native_frame.__array__(dtype, copy=copy)
 
     @overload
@@ -356,7 +356,7 @@ def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
         names = [s.name for s in new_series]
         reshaped = align_series_full_broadcast(*new_series)
         df = pa.Table.from_arrays([s._native_series for s in reshaped], names=names)
-        return self._from_native_frame(df, validate_column_names=False)
+        return self._from_native_frame(df, validate_column_names=True)
 
     def with_columns(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
         native_frame = self._native_frame
 
@@ -14,6 +14,7 @@
 from narwhals._arrow.series import ArrowSeries
 from narwhals._expression_parsing import ExprKind
 from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._expression_parsing import is_scalar_like
 from narwhals._expression_parsing import reuse_series_implementation
 from narwhals.dependencies import get_numpy
 from narwhals.dependencies import is_numpy_array
@@ -414,10 +415,8 @@ def clip(self: Self, lower_bound: Any | None, upper_bound: Any | None) -> Self:
         )
 
     def over(self: Self, keys: list[str], kind: ExprKind) -> Self:
-        if kind is ExprKind.TRANSFORM:
-            msg = (
-                "Elementwise operations in `over` context are not supported for PyArrow."
-            )
+        if not is_scalar_like(kind):
+            msg = "Only aggregation or literal operations are supported in `over` context for PyArrow."
             raise NotImplementedError(msg)
 
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
 
@@ -33,12 +33,15 @@
     from typing import Callable
 
     from typing_extensions import Self
+    from typing_extensions import TypeAlias
 
     from narwhals._arrow.typing import Incomplete
     from narwhals._arrow.typing import IntoArrowExpr
     from narwhals.dtypes import DType
     from narwhals.utils import Version
 
+    _Scalar: TypeAlias = Any
+
 
 class ArrowNamespace(CompliantNamespace[ArrowDataFrame, ArrowSeries]):
     def _create_expr_from_callable(
@@ -385,15 +388,15 @@ def __init__(
         self: Self,
         condition: ArrowExpr,
         backend_version: tuple[int, ...],
-        then_value: Any = None,
-        otherwise_value: Any = None,
+        then_value: ArrowExpr | _Scalar = None,
+        otherwise_value: ArrowExpr | _Scalar = None,
         *,
         version: Version,
     ) -> None:
         self._backend_version = backend_version
         self._condition: ArrowExpr = condition
-        self._then_value: ArrowExpr | Any = then_value
-        self._otherwise_value: ArrowExpr | Any = otherwise_value
+        self._then_value: ArrowExpr | _Scalar = then_value
+        self._otherwise_value: ArrowExpr | _Scalar = otherwise_value
         self._version = version
 
     def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
@@ -404,7 +407,6 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
         if isinstance(self._then_value, ArrowExpr):
             value_series = self._then_value(df)[0]
         else:
-            # `self._then_value` is a scalar
             value_series = plx._create_series_from_scalar(
                 self._then_value, reference_series=condition.alias("literal")
             )
@@ -423,7 +425,6 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
         if isinstance(self._otherwise_value, ArrowExpr):
             otherwise_series = self._otherwise_value(df)[0]
         else:
-            # `self._otherwise_value` is a scalar
             otherwise_series = plx._create_series_from_scalar(
                 self._otherwise_value, reference_series=condition.alias("literal")
             )
@@ -438,7 +439,7 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
             )
         ]
 
-    def then(self: Self, value: ArrowExpr | ArrowSeries | Any) -> ArrowThen:
+    def then(self: Self, value: ArrowExpr | ArrowSeries | _Scalar) -> ArrowThen:
         self._then_value = value
 
         return ArrowThen(
@@ -469,17 +470,14 @@ def __init__(
     ) -> None:
         self._backend_version = backend_version
         self._version = version
-        self._call = call
+        self._call: ArrowWhen = call
         self._depth = depth
         self._function_name = function_name
         self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._call_kwargs = call_kwargs or {}
 
-    def otherwise(self: Self, value: ArrowExpr | ArrowSeries | Any) -> ArrowExpr:
-        # type ignore because we are setting the `_call` attribute to a
-        # callable object of type `PandasWhen`, base class has the attribute as
-        # only a `Callable`
-        self._call._otherwise_value = value  # type: ignore[attr-defined]
+    def otherwise(self: Self, value: ArrowExpr | ArrowSeries | _Scalar) -> ArrowExpr:
+        self._call._otherwise_value = value
         self._function_name = "whenotherwise"
         return self
@@ -419,7 +419,7 @@ def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self:
     def to_list(self: Self) -> list[Any]:
         return self._native_series.to_pylist()
 
-    def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> _1DArray:
+    def __array__(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
         return self._native_series.__array__(dtype=dtype, copy=copy)
 
     def to_numpy(self: Self) -> _1DArray:
@@ -997,7 +997,7 @@ def rolling_var(
         )
 
         cum_sum_sq = (
-            padded_series.__pow__(2)
+            pow(padded_series, 2)
             .cum_sum(reverse=False)
             .fill_null(value=None, strategy="forward", limit=None)
         )
@@ -1091,7 +1091,6 @@ def hist(  # noqa: PLR0915
         def _hist_from_bin_count(bin_count: int):  # type: ignore[no-untyped-def] # noqa: ANN202
             d = pc.min_max(self._native_series)
             lower, upper = d["min"], d["max"]
-            pad_lowest_bin = False
             pa_float = pa.type_for_alias("float")
             if lower == upper:
                 range_ = lit(1.0)
@@ -1100,7 +1099,6 @@ def _hist_from_bin_count(bin_count: int):  # type: ignore[no-untyped-def] # noqa
                 lower = pc.subtract(lower, mid)
                 upper = pc.add(upper, mid)
             else:
-                pad_lowest_bin = True
                 range_ = pc.subtract(upper, lower)
                 width = pc.divide(pc.cast(range_, pa_float), lit(float(bin_count)))
 
@@ -1151,15 +1149,7 @@ def _hist_from_bin_count(bin_count: int):  # type: ignore[no-untyped-def] # noqa
             # extract left/right side of the intervals
             bin_left = pc.add(lower, pc.multiply(counts.column("values"), width))
             bin_right = pc.add(bin_left, width)
-            if pad_lowest_bin:
-                # pad lowest bin by 1% of range
-                lowest_padded = [
-                    pc.subtract(
-                        bin_left[0], pc.multiply(pc.cast(range_, pa_float), lit(0.001))
-                    )
-                ]
-                bin_left = chunked_array([lowest_padded, cast("Any", bin_left[1:])])
-            return counts.column("counts"), bin_left, bin_right
+            return counts.column("counts"), bin_right
 
         def _hist_from_bins(bins: Sequence[int | float]):  # type: ignore[no-untyped-def] # noqa: ANN202
             bin_indices = np.searchsorted(bins, self._native_series, side="left")
@@ -1169,20 +1159,19 @@ def _hist_from_bins(bins: Sequence[int | float]):  # type: ignore[no-untyped-def
             counts[np.isin(obj_cats, obs_cats)] = obs_counts[np.isin(obs_cats, obj_cats)]
 
             bin_right = bins[1:]
-            bin_left = bins[:-1]
-            return counts, bin_left, bin_right
+            return counts, bin_right
 
         if bins is not None:
             if len(bins) < 2:
-                counts, bin_left, bin_right = [], [], []
+                counts, bin_right = [], []
             else:
-                counts, bin_left, bin_right = _hist_from_bins(bins)
+                counts, bin_right = _hist_from_bins(bins)
 
         elif bin_count is not None:
             if bin_count == 0:
-                counts, bin_left, bin_right = [], [], []
+                counts, bin_right = [], []
             else:
-                counts, bin_left, bin_right = _hist_from_bin_count(bin_count)
+                counts, bin_right = _hist_from_bin_count(bin_count)
 
         else:  # pragma: no cover
             # caller guarantees that either bins or bin_count is specified
 
@@ -33,7 +33,7 @@
     TieBreaker: TypeAlias = Literal["min", "max", "first", "dense"]
     NullPlacement: TypeAlias = Literal["at_start", "at_end"]
 
-    StringArray: TypeAlias = "pc.StringArray"
+    StringArray: TypeAlias = pc.StringArray
     ArrowChunkedArray: TypeAlias = pa.ChunkedArray[Any]
     ArrowArray: TypeAlias = pa.Array[Any]
     _AsPyType = TypeVar("_AsPyType")