Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from narwhals._pandas_like.selectors import PandasSelectorNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.typing import NativeDataFrameT, NativeSeriesT
from narwhals._pandas_like.utils import is_non_nullable_boolean
from narwhals._pandas_like.utils import align_and_extract_native, is_non_nullable_boolean
from narwhals._utils import zip_strict

if TYPE_CHECKING:
Expand Down Expand Up @@ -330,9 +330,29 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:

if not ignore_nulls:
null_mask_result = reduce(operator.or_, null_mask)
result = reduce(lambda x, y: x + separator + y, series).zip_with(
~null_mask_result, None
)
result = series[0]
for s in series[1:]:
r_native, s_native = align_and_extract_native(result, s)
if str(result.native.dtype) == "large_string[pyarrow]":
# https://github.com/pandas-dev/pandas/issues/64393
import pyarrow as pa # ignore-banned-import

separator_pa_large_string = pa.scalar(
separator, type=pa.large_string()
)
if isinstance(s_native, str):
result = result._with_native(
r_native
+ separator_pa_large_string
+ pa.scalar(s_native, type=pa.large_string())
)
else:
result = result._with_native(
r_native + separator_pa_large_string + s_native
)
else:
result = result + separator + s
result = result.zip_with(~null_mask_result, None)
else:
# NOTE: Trying to help `mypy` later
# error: Cannot determine type of "values" [has-type]
Expand Down
41 changes: 39 additions & 2 deletions tests/expr_and_series/concat_str_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from typing import Callable
from typing import Any, Callable

import pytest

import narwhals as nw
from tests.utils import POLARS_VERSION, Constructor, assert_equal_data
from tests.utils import PANDAS_VERSION, POLARS_VERSION, Constructor, assert_equal_data

pytest.importorskip("pyarrow")
import pyarrow as pa
Expand Down Expand Up @@ -100,3 +100,40 @@ def test_pyarrow_string_type(
.schema
)
assert expected_function(result.field("store_item").type)


@pytest.mark.skipif(
PANDAS_VERSION < (2, 2), reason='"add" was not implemented yet for large-string'
)
def test_concat_str_with_large_string() -> None:
# https://github.com/pandas-dev/pandas/issues/64393
pytest.importorskip("pandas")
import pandas as pd

native_pa = pa.table(
{"store": ["foo", "bar"], "item": ["axe", "saw"]},
schema=pa.schema([("store", pa.large_string()), ("item", pa.large_string())]),
)
native_pd = native_pa.to_pandas(types_mapper=pd.ArrowDtype)

expr = nw.concat_str("store", "item", separator="-").alias("store_item")
result: nw.DataFrame[Any] = nw.from_native(native_pa).with_columns(expr)
expected = {
"store": ["foo", "bar"],
"item": ["axe", "saw"],
"store_item": ["foo-axe", "bar-saw"],
}
assert_equal_data(result, expected)
result = nw.from_native(native_pd).with_columns(expr)
assert_equal_data(result, expected)

expr = nw.concat_str("store", nw.lit("item"), separator="-").alias("store_item")
result = nw.from_native(native_pa).with_columns(expr)
expected = {
"store": ["foo", "bar"],
"item": ["axe", "saw"],
"store_item": ["foo-item", "bar-item"],
}
assert_equal_data(result, expected)
result = nw.from_native(native_pd).with_columns(expr)
assert_equal_data(result, expected)
Loading