narwhals-dev
diff --git a/‎narwhals/_arrow/dataframe.py‎
Lines changed: 13 additions & 30 deletions b/‎narwhals/_arrow/dataframe.py‎
Lines changed: 13 additions & 30 deletions
diff --git a/‎narwhals/_arrow/namespace.py‎
Lines changed: 4 additions & 4 deletions b/‎narwhals/_arrow/namespace.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎narwhals/_arrow/typing.py‎
Lines changed: 1 addition & 0 deletions b/‎narwhals/_arrow/typing.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎narwhals/_arrow/utils.py‎
Lines changed: 42 additions & 5 deletions b/‎narwhals/_arrow/utils.py‎
Lines changed: 42 additions & 5 deletions
diff --git a/‎narwhals/_compliant/dataframe.py‎
Lines changed: 3 additions & 1 deletion b/‎narwhals/_compliant/dataframe.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎narwhals/_compliant/expr.py‎
Lines changed: 4 additions & 3 deletions b/‎narwhals/_compliant/expr.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎narwhals/_duckdb/dataframe.py‎
Lines changed: 18 additions & 23 deletions b/‎narwhals/_duckdb/dataframe.py‎
Lines changed: 18 additions & 23 deletions
diff --git a/‎narwhals/_duckdb/expr.py‎
Lines changed: 11 additions & 13 deletions b/‎narwhals/_duckdb/expr.py‎
Lines changed: 11 additions & 13 deletions
diff --git a/‎narwhals/_duckdb/group_by.py‎
Lines changed: 1 addition & 1 deletion b/‎narwhals/_duckdb/group_by.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
 from collections.abc import Collection, Iterator, Mapping, Sequence
-from functools import partial
 from typing import TYPE_CHECKING, Any, Literal, cast, overload
 
 import pyarrow as pa
 import pyarrow.compute as pc
 
 from narwhals._arrow.series import ArrowSeries
-from narwhals._arrow.utils import native_to_narwhals_dtype
+from narwhals._arrow.utils import concat_tables, native_to_narwhals_dtype, repeat
 from narwhals._compliant import EagerDataFrame
 from narwhals._expression_parsing import ExprKind
 from narwhals._utils import (
@@ -72,7 +71,6 @@
         "right outer",
         "full outer",
     ]
-    PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]
 
 
 class ArrowDataFrame(
@@ -790,34 +788,19 @@ def unpivot(
         variable_name: str,
         value_name: str,
     ) -> Self:
-        n_rows = len(self)
-        index_ = [] if index is None else index
-        on_ = [c for c in self.columns if c not in index_] if on is None else on
-        concat = (
-            partial(pa.concat_tables, promote_options="permissive")
-            if self._backend_version >= (14, 0, 0)
-            else pa.concat_tables
-        )
-        names = [*index_, variable_name, value_name]
-        return self._with_native(
-            concat(
-                [
-                    pa.Table.from_arrays(
-                        [
-                            *(self.native.column(idx_col) for idx_col in index_),
-                            cast(
-                                "ChunkedArrayAny",
-                                pa.array([on_col] * n_rows, pa.string()),
-                            ),
-                            self.native.column(on_col),
-                        ],
-                        names=names,
-                    )
-                    for on_col in on_
-                ]
-            )
-        )
         # TODO(Unassigned): Even with promote_options="permissive", pyarrow does not
         # upcast numeric to non-numeric (e.g. string) datatypes
+        n = len(self)
+        index = [] if index is None else list(index)
+        on_ = (c for c in self.columns if c not in index) if on is None else iter(on)
+        index_cols = self.native.select(index)
+        column = self.native.column
+        tables = (
+            index_cols.append_column(variable_name, repeat(name, n)).append_column(
+                value_name, column(name)
+            )
+            for name in on_
+        )
+        return self._with_native(concat_tables(tables, "permissive"))
 
     pivot = not_implemented()
@@ -134,7 +134,7 @@ def mean_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
         int_64 = self._version.dtypes.Int64()
 
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
-            expr_results = list(chain.from_iterable(expr(df) for expr in exprs))
+            expr_results = tuple(chain.from_iterable(expr(df) for expr in exprs))
             align = self._series._align_full_broadcast
             series = align(
                 *(s.fill_null(0, strategy=None, limit=None) for s in expr_results)
@@ -154,7 +154,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
     def min_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             align = self._series._align_full_broadcast
-            init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs))
+            init_series, *series = tuple(chain.from_iterable(expr(df) for expr in exprs))
             init_series, *series = align(init_series, *series)
             native_series = reduce(
                 pc.min_element_wise, [s.native for s in series], init_series.native
@@ -175,7 +175,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
     def max_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             align = self._series._align_full_broadcast
-            init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs))
+            init_series, *series = tuple(chain.from_iterable(expr(df) for expr in exprs))
             init_series, *series = align(init_series, *series)
             native_series = reduce(
                 pc.max_element_wise, [s.native for s in series], init_series.native
@@ -200,7 +200,7 @@ def _concat_diagonal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
 
     def _concat_horizontal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
         names = list(chain.from_iterable(df.column_names for df in dfs))
-        arrays = list(chain.from_iterable(df.itercolumns() for df in dfs))
+        arrays = tuple(chain.from_iterable(df.itercolumns() for df in dfs))
         return pa.Table.from_arrays(arrays, names=names)
 
     def _concat_vertical(self, dfs: Sequence[pa.Table], /) -> pa.Table:
 
@@ -45,6 +45,7 @@
         "microsecond",
         "nanosecond",
     ]
+    PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]
 
     ChunkedArrayAny: TypeAlias = pa.ChunkedArray[Any]
     ArrayAny: TypeAlias = pa.Array[Any]
 
@@ -7,7 +7,7 @@
 import pyarrow.compute as pc
 
 from narwhals._compliant import EagerSeriesNamespace
-from narwhals._utils import Version, isinstance_or_issubclass
+from narwhals._utils import Implementation, Version, isinstance_or_issubclass
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Mapping
@@ -21,7 +21,9 @@
         ArrayOrScalarT1,
         ArrayOrScalarT2,
         ChunkedArrayAny,
+        Incomplete,
         NativeIntervalUnit,
+        PromoteOptions,
         ScalarAny,
     )
     from narwhals._duration import IntervalUnit
@@ -57,6 +59,9 @@ def extract_regex(
         is_timestamp,
     )
 
+BACKEND_VERSION = Implementation.PYARROW._backend_version()
+"""Static backend version for `pyarrow`."""
+
 UNITS_DICT: Mapping[IntervalUnit, NativeIntervalUnit] = {
     "y": "year",
     "q": "quarter",
@@ -103,6 +108,17 @@ def nulls_like(n: int, series: ArrowSeries) -> ArrayAny:
     return pa.nulls(n, series.native.type)
 
 
+def repeat(
+    value: PythonLiteral | ScalarAny, n: int, /, dtype: pa.DataType | None = None
+) -> ArrayAny:
+    """Create an Array instance whose slots are the given scalar.
+
+    *Optionally*, casting to `dtype` **before** repeating `n` times.
+    """
+    lit_: Incomplete = lit
+    return pa.repeat(lit_(value, type=dtype), n)
+
+
 def zeros(n: int, /) -> pa.Int64Array:
     return pa.repeat(0, n)
 
@@ -423,10 +439,9 @@ def pad_series(
     offset_left = window_size // 2
     # subtract one if window_size is even
     offset_right = offset_left - (window_size % 2 == 0)
-    pad_left = pa.array([None] * offset_left, type=series._type)
-    pad_right = pa.array([None] * offset_right, type=series._type)
-    concat = pa.concat_arrays([pad_left, *series.native.chunks, pad_right])
-    return series._with_native(concat), offset_left + offset_right
+    chunks = series.native.chunks
+    arrays = nulls_like(offset_left, series), *chunks, nulls_like(offset_right, series)
+    return series._with_native(pa.concat_arrays(arrays)), offset_left + offset_right
 
 
 def cast_to_comparable_string_types(
@@ -441,4 +456,26 @@ def cast_to_comparable_string_types(
     return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype)
 
 
+if BACKEND_VERSION >= (14,):
+    # https://arrow.apache.org/docs/14.0/python/generated/pyarrow.concat_tables.html
+    _PROMOTE: Mapping[PromoteOptions, Mapping[str, Any]] = {
+        "default": {"promote_options": "default"},
+        "permissive": {"promote_options": "permissive"},
+        "none": {"promote_options": "none"},
+    }
+else:  # pragma: no cover
+    # https://arrow.apache.org/docs/13.0/python/generated/pyarrow.concat_tables.html
+    _PROMOTE = {
+        "default": {"promote": True},
+        "permissive": {"promote": True},
+        "none": {"promote": False},
+    }
+
+
+def concat_tables(
+    tables: Iterable[pa.Table], promote_options: PromoteOptions = "none"
+) -> pa.Table:
+    return pa.concat_tables(tables, **_PROMOTE[promote_options])
+
+
 class ArrowSeriesNamespace(EagerSeriesNamespace["ArrowSeries", "ChunkedArrayAny"]): ...
@@ -357,7 +357,9 @@ def _evaluate_into_exprs(self, *exprs: EagerExprT) -> Sequence[EagerSeriesT]:
         # NOTE: Ignore intermittent [False Negative]
         # Argument of type "EagerExprT@EagerDataFrame" cannot be assigned to parameter "expr" of type "EagerExprT@EagerDataFrame" in function "_evaluate_into_expr"
         #  Type "EagerExprT@EagerDataFrame" is not assignable to type "EagerExprT@EagerDataFrame"
-        return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs))  # pyright: ignore[reportArgumentType]
+        return tuple(
+            chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)  # pyright: ignore[reportArgumentType]
+        )
 
     def _evaluate_into_expr(self, expr: EagerExprT, /) -> Sequence[EagerSeriesT]:
         """Return list of raw columns.
 
@@ -386,12 +386,13 @@ def _reuse_series_inner(
             series._from_scalar(method(series)) if returns_scalar else method(series)
             for series in self(df)
         ]
-        aliases = self._evaluate_aliases(df)
-        if [s.name for s in out] != list(aliases):  # pragma: no cover
+        aliases, names = self._evaluate_aliases(df), (s.name for s in out)
+        if any(
+            alias != name for alias, name in zip_strict(aliases, names)
+        ):  # pragma: no cover
             msg = (
                 f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n"
                 f"Expression aliases: {aliases}\n"
-                f"Series names: {[s.name for s in out]}"
             )
             raise AssertionError(msg)
         return out
 
@@ -23,6 +23,7 @@
     Implementation,
     ValidateBackendVersion,
     Version,
+    extend_bool,
     generate_temporary_column_name,
     not_implemented,
     parse_columns_to_drop,
@@ -393,27 +394,22 @@ def unique(
         if error := self._check_columns_exist(subset_):
             raise error
         tmp_name = generate_temporary_column_name(8, self.columns, prefix="row_index_")
-        if order_by and keep == "last":
-            descending = [True] * len(order_by)
-            nulls_last = [True] * len(order_by)
-        else:
-            descending = None
-            nulls_last = None
+        flags = extend_bool(True, len(order_by)) if order_by and keep == "last" else None
         if keep == "none":
             expr = window_expression(
                 F("count", StarExpression()),
                 subset_,
                 order_by or (),
-                descending=descending,
-                nulls_last=nulls_last,
+                descending=flags,
+                nulls_last=flags,
             )
         else:
             expr = window_expression(
                 F("row_number"),
                 subset_,
                 order_by or (),
-                descending=descending,
-                nulls_last=nulls_last,
+                descending=flags,
+                nulls_last=flags,
             )
         return self._with_native(
             self.native.select(StarExpression(), expr.alias(tmp_name)).filter(
@@ -422,8 +418,7 @@ def unique(
         ).drop([tmp_name], strict=False)
 
     def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
-        if isinstance(descending, bool):
-            descending = [descending] * len(by)
+        descending = extend_bool(descending, len(by))
         if nulls_last:
             it = (
                 col(name).nulls_last() if not desc else col(name).desc().nulls_last()
@@ -437,23 +432,23 @@ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) ->
         return self._with_native(self.native.sort(*it))
 
     def top_k(self, k: int, *, by: Iterable[str], reverse: bool | Sequence[bool]) -> Self:
-        _df = self.native
+        _rel = self.native
         by = list(by)
         if isinstance(reverse, bool):
-            descending = [not reverse] * len(by)
+            descending = extend_bool(not reverse, len(by))
         else:
-            descending = [not rev for rev in reverse]
+            descending = tuple(not rev for rev in reverse)
         expr = window_expression(
             F("row_number"),
             order_by=by,
             descending=descending,
-            nulls_last=[True] * len(by),
+            nulls_last=extend_bool(True, len(by)),
         )
         condition = expr <= lit(k)
         query = f"""
-        SELECT *
-        FROM _df
-        QUALIFY {condition}
+            SELECT *
+            FROM _rel
+            QUALIFY {condition}
         """  # noqa: S608
         return self._with_native(duckdb.sql(query))
 
@@ -523,11 +518,11 @@ def unpivot(
             raise NotImplementedError(msg)
 
         unpivot_on = join_column_names(*on_)
-        rel = self.native  # noqa: F841
+        _rel = self.native
         # Replace with Python API once
         # https://github.com/duckdb/duckdb/discussions/16980 is addressed.
         query = f"""
-            unpivot rel
+            unpivot _rel
             on {unpivot_on}
             into
                 name {col(variable_name)}
@@ -548,9 +543,9 @@ def with_row_index(self, name: str, order_by: Sequence[str]) -> Self:
         return self._with_native(self.native.select(expr, StarExpression()))
 
     def sink_parquet(self, file: str | Path | BytesIO) -> None:
-        df = self.native  # noqa: F841
+        _rel = self.native
         query = f"""
-            COPY (SELECT * FROM df)
+            COPY (SELECT * FROM _rel)
             TO '{file}'
             (FORMAT parquet)
             """  # noqa: S608
 
@@ -22,7 +22,7 @@
 )
 from narwhals._expression_parsing import ExprKind, ExprMetadata
 from narwhals._sql.expr import SQLExpr
-from narwhals._utils import Implementation, Version
+from narwhals._utils import Implementation, Version, extend_bool
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -95,23 +95,21 @@ def _window_expression(
             nulls_last=nulls_last,
         )
 
-    def _first(self, expr: Expression, *order_by: str) -> Expression:
+    def _first_last(
+        self, function: str, expr: Expression, order_by: Sequence[str], /
+    ) -> Expression:
         # https://github.com/duckdb/duckdb/discussions/19252
+        flags = extend_bool(False, len(order_by))
         order_by_sql = generate_order_by_sql(
-            *order_by,
-            descending=[False] * len(order_by),
-            nulls_last=[False] * len(order_by),
+            *order_by, descending=flags, nulls_last=flags
         )
-        return sql_expression(f"first({expr} {order_by_sql})")
+        return sql_expression(f"{function}({expr} {order_by_sql})")
+
+    def _first(self, expr: Expression, *order_by: str) -> Expression:
+        return self._first_last("first", expr, order_by)
 
     def _last(self, expr: Expression, *order_by: str) -> Expression:
-        # https://github.com/duckdb/duckdb/discussions/19252
-        order_by_sql = generate_order_by_sql(
-            *order_by,
-            descending=[False] * len(order_by),
-            nulls_last=[False] * len(order_by),
-        )
-        return sql_expression(f"last({expr} {order_by_sql})")
+        return self._first_last("last", expr, order_by)
 
     def __narwhals_namespace__(self) -> DuckDBNamespace:  # pragma: no cover
         from narwhals._duckdb.namespace import DuckDBNamespace
 
@@ -27,7 +27,7 @@ def __init__(
         self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
 
     def agg(self, *exprs: DuckDBExpr) -> DuckDBLazyFrame:
-        agg_columns = list(chain(self._keys, self._evaluate_exprs(exprs)))
+        agg_columns = tuple(chain(self._keys, self._evaluate_exprs(exprs)))
         return self.compliant._with_native(
             self.compliant.native.aggregate(agg_columns)  # type: ignore[arg-type]
         ).rename(dict(zip(self._keys, self._output_key_names)))
Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,7 @@`
`45`	`45`	`"microsecond",`
`46`	`46`	`"nanosecond",`
`47`	`47`	`]`
	`48`	`+ PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]`
`48`	`49`
`49`	`50`	`ChunkedArrayAny: TypeAlias = pa.ChunkedArray[Any]`
`50`	`51`	`ArrayAny: TypeAlias = pa.Array[Any]`