Skip to content

Commit 0329b4f

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-setitem-error-message
2 parents 60bd4b8 + c15d823 commit 0329b4f

File tree

12 files changed

+56
-23
lines changed

12 files changed

+56
-23
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ def shares_memory(left, right) -> bool:
501501
if isinstance(left, MultiIndex):
502502
return shares_memory(left._codes, right)
503503
if isinstance(left, (Index, Series)):
504+
if isinstance(right, (Index, Series)):
505+
return shares_memory(left._values, right._values)
504506
return shares_memory(left._values, right)
505507

506508
if isinstance(left, NDArrayBackedExtensionArray):

pandas/conftest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def multiindex_year_month_day_dataframe_random_data():
600600
"""
601601
tdf = DataFrame(
602602
np.random.default_rng(2).standard_normal((100, 4)),
603-
columns=Index(list("ABCD"), dtype=object),
603+
columns=Index(list("ABCD")),
604604
index=date_range("2000-01-01", periods=100, freq="B"),
605605
)
606606
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
@@ -787,7 +787,7 @@ def string_series() -> Series:
787787
"""
788788
return Series(
789789
np.arange(30, dtype=np.float64) * 1.1,
790-
index=Index([f"i_{i}" for i in range(30)], dtype=object),
790+
index=Index([f"i_{i}" for i in range(30)]),
791791
name="series",
792792
)
793793

@@ -798,7 +798,7 @@ def object_series() -> Series:
798798
Fixture for Series of dtype object with Index of unique strings
799799
"""
800800
data = [f"foo_{i}" for i in range(30)]
801-
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
801+
index = Index([f"bar_{i}" for i in range(30)])
802802
return Series(data, index=index, name="objects", dtype=object)
803803

804804

@@ -890,8 +890,8 @@ def int_frame() -> DataFrame:
890890
"""
891891
return DataFrame(
892892
np.ones((30, 4), dtype=np.int64),
893-
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
894-
columns=Index(list("ABCD"), dtype=object),
893+
index=Index([f"foo_{i}" for i in range(30)]),
894+
columns=Index(list("ABCD")),
895895
)
896896

897897

pandas/core/arrays/string_.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
import operator
45
from typing import (
56
TYPE_CHECKING,
@@ -64,6 +65,8 @@
6465
from pandas.core.indexers import check_array_indexer
6566
from pandas.core.missing import isna
6667

68+
from pandas.io.formats import printing
69+
6770
if TYPE_CHECKING:
6871
import pyarrow
6972

@@ -391,6 +394,14 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
391394
raise ValueError
392395
return cls._from_sequence(scalars, dtype=dtype)
393396

397+
def _formatter(self, boxed: bool = False):
398+
formatter = partial(
399+
printing.pprint_thing,
400+
escape_chars=("\t", "\r", "\n"),
401+
quote_strings=not boxed,
402+
)
403+
return formatter
404+
394405
def _str_map(
395406
self,
396407
f,

pandas/core/arrays/string_arrow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.compat import (
1818
pa_version_under10p1,
1919
pa_version_under13p0,
20+
pa_version_under16p0,
2021
)
2122
from pandas.util._exceptions import find_stack_level
2223

@@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None:
7172
raise ImportError(msg)
7273

7374

75+
def _is_string_view(typ):
76+
return not pa_version_under16p0 and pa.types.is_string_view(typ)
77+
78+
7479
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
7580
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
7681
# fallback for the ones that pyarrow doesn't yet support
@@ -128,11 +133,13 @@ def __init__(self, values) -> None:
128133
_chk_pyarrow_available()
129134
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
130135
pa.types.is_string(values.type)
136+
or _is_string_view(values.type)
131137
or (
132138
pa.types.is_dictionary(values.type)
133139
and (
134140
pa.types.is_string(values.type.value_type)
135141
or pa.types.is_large_string(values.type.value_type)
142+
or _is_string_view(values.type.value_type)
136143
)
137144
)
138145
):

pandas/core/config_init.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ def use_numba_cb(key: str) -> None:
100100
: int
101101
If max_rows is exceeded, switch to truncate view. Depending on
102102
`large_repr`, objects are either centrally truncated or printed as
103-
a summary view. 'None' value means unlimited.
103+
a summary view.
104+
105+
'None' value means unlimited. Beware that printing a large number of rows
106+
could cause your rendering environment (the browser, etc.) to crash.
104107
105108
In case python/IPython is running in a terminal and `large_repr`
106109
equals 'truncate' this can be set to 0 and pandas will auto-detect
@@ -121,7 +124,11 @@ def use_numba_cb(key: str) -> None:
121124
: int
122125
If max_cols is exceeded, switch to truncate view. Depending on
123126
`large_repr`, objects are either centrally truncated or printed as
124-
a summary view. 'None' value means unlimited.
127+
a summary view.
128+
129+
'None' value means unlimited. Beware that printing a large number of
130+
columns could cause your rendering environment (the browser, etc.) to
131+
crash.
125132
126133
In case python/IPython is running in a terminal and `large_repr`
127134
equals 'truncate' this can be set to 0 or None and pandas will auto-detect

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
9999
assert pa.types.is_large_string(arr._pa_array.type)
100100

101101

102+
@pytest.mark.parametrize("chunked", [True, False])
103+
def test_constructor_valid_string_view(chunked):
104+
# requires pyarrow>=18 for casting string_view to string
105+
pa = pytest.importorskip("pyarrow", minversion="18")
106+
107+
arr = pa.array(["1", "2", "3"], pa.string_view())
108+
if chunked:
109+
arr = pa.chunked_array(arr)
110+
111+
arr = ArrowStringArray(arr)
112+
# dictionary type get converted to dense large string array
113+
assert pa.types.is_large_string(arr._pa_array.type)
114+
115+
102116
def test_constructor_from_list():
103117
# GH#27673
104118
pytest.importorskip("pyarrow")

pandas/tests/frame/test_reductions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,6 @@ def test_sum_bools(self):
10471047
# ----------------------------------------------------------------------
10481048
# Index of max / min
10491049

1050-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
10511050
@pytest.mark.parametrize("axis", [0, 1])
10521051
def test_idxmin(self, float_frame, int_frame, skipna, axis):
10531052
frame = float_frame

pandas/tests/frame/test_repr.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
from pandas import (
1311
NA,
1412
Categorical,
@@ -176,7 +174,6 @@ def test_repr_mixed_big(self):
176174

177175
repr(biggie)
178176

179-
@pytest.mark.xfail(using_string_dtype(), reason="/r in")
180177
def test_repr(self):
181178
# columns but no index
182179
no_index = DataFrame(columns=[0, 1, 3])

pandas/tests/series/indexing/test_setitem.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,6 @@ def test_setitem_with_expansion_type_promotion(self):
545545
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
546546
tm.assert_series_equal(ser, expected)
547547

548-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
549548
def test_setitem_not_contained(self, string_series):
550549
# set item that's not contained
551550
ser = string_series.copy()

pandas/tests/series/methods/test_reindex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
def test_reindex(datetime_series, string_series):
2424
identity = string_series.reindex(string_series.index)
2525

26-
assert np.may_share_memory(string_series.index, identity.index)
26+
assert tm.shares_memory(string_series.index, identity.index)
2727

2828
assert identity.index.is_(string_series.index)
2929
assert identity.index.identical(string_series.index)

0 commit comments

Comments
 (0)