Skip to content

Commit e19bf19

Browse files
committed
Merge remote-tracking branch 'upstream/main' into sequence_to_td64ns
2 parents 5c36932 + 7fe140e commit e19bf19

File tree

15 files changed

+107
-28
lines changed

15 files changed

+107
-28
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ def shares_memory(left, right) -> bool:
501501
if isinstance(left, MultiIndex):
502502
return shares_memory(left._codes, right)
503503
if isinstance(left, (Index, Series)):
504+
if isinstance(right, (Index, Series)):
505+
return shares_memory(left._values, right._values)
504506
return shares_memory(left._values, right)
505507

506508
if isinstance(left, NDArrayBackedExtensionArray):

pandas/conftest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def multiindex_year_month_day_dataframe_random_data():
600600
"""
601601
tdf = DataFrame(
602602
np.random.default_rng(2).standard_normal((100, 4)),
603-
columns=Index(list("ABCD"), dtype=object),
603+
columns=Index(list("ABCD")),
604604
index=date_range("2000-01-01", periods=100, freq="B"),
605605
)
606606
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
@@ -787,7 +787,7 @@ def string_series() -> Series:
787787
"""
788788
return Series(
789789
np.arange(30, dtype=np.float64) * 1.1,
790-
index=Index([f"i_{i}" for i in range(30)], dtype=object),
790+
index=Index([f"i_{i}" for i in range(30)]),
791791
name="series",
792792
)
793793

@@ -798,7 +798,7 @@ def object_series() -> Series:
798798
Fixture for Series of dtype object with Index of unique strings
799799
"""
800800
data = [f"foo_{i}" for i in range(30)]
801-
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
801+
index = Index([f"bar_{i}" for i in range(30)])
802802
return Series(data, index=index, name="objects", dtype=object)
803803

804804

@@ -890,8 +890,8 @@ def int_frame() -> DataFrame:
890890
"""
891891
return DataFrame(
892892
np.ones((30, 4), dtype=np.int64),
893-
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
894-
columns=Index(list("ABCD"), dtype=object),
893+
index=Index([f"foo_{i}" for i in range(30)]),
894+
columns=Index(list("ABCD")),
895895
)
896896

897897

pandas/core/arrays/string_.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
import operator
45
from typing import (
56
TYPE_CHECKING,
@@ -64,6 +65,8 @@
6465
from pandas.core.indexers import check_array_indexer
6566
from pandas.core.missing import isna
6667

68+
from pandas.io.formats import printing
69+
6770
if TYPE_CHECKING:
6871
import pyarrow
6972

@@ -391,6 +394,14 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
391394
raise ValueError
392395
return cls._from_sequence(scalars, dtype=dtype)
393396

397+
def _formatter(self, boxed: bool = False):
398+
formatter = partial(
399+
printing.pprint_thing,
400+
escape_chars=("\t", "\r", "\n"),
401+
quote_strings=not boxed,
402+
)
403+
return formatter
404+
394405
def _str_map(
395406
self,
396407
f,

pandas/core/arrays/string_arrow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.compat import (
1818
pa_version_under10p1,
1919
pa_version_under13p0,
20+
pa_version_under16p0,
2021
)
2122
from pandas.util._exceptions import find_stack_level
2223

@@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None:
7172
raise ImportError(msg)
7273

7374

75+
def _is_string_view(typ):
76+
return not pa_version_under16p0 and pa.types.is_string_view(typ)
77+
78+
7479
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
7580
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
7681
# fallback for the ones that pyarrow doesn't yet support
@@ -128,11 +133,13 @@ def __init__(self, values) -> None:
128133
_chk_pyarrow_available()
129134
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
130135
pa.types.is_string(values.type)
136+
or _is_string_view(values.type)
131137
or (
132138
pa.types.is_dictionary(values.type)
133139
and (
134140
pa.types.is_string(values.type.value_type)
135141
or pa.types.is_large_string(values.type.value_type)
142+
or _is_string_view(values.type.value_type)
136143
)
137144
)
138145
):

pandas/core/config_init.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ def use_numba_cb(key: str) -> None:
100100
: int
101101
If max_rows is exceeded, switch to truncate view. Depending on
102102
`large_repr`, objects are either centrally truncated or printed as
103-
a summary view. 'None' value means unlimited.
103+
a summary view.
104+
105+
'None' value means unlimited. Beware that printing a large number of rows
106+
could cause your rendering environment (the browser, etc.) to crash.
104107
105108
In case python/IPython is running in a terminal and `large_repr`
106109
equals 'truncate' this can be set to 0 and pandas will auto-detect
@@ -121,7 +124,11 @@ def use_numba_cb(key: str) -> None:
121124
: int
122125
If max_cols is exceeded, switch to truncate view. Depending on
123126
`large_repr`, objects are either centrally truncated or printed as
124-
a summary view. 'None' value means unlimited.
127+
a summary view.
128+
129+
'None' value means unlimited. Beware that printing a large number of
130+
columns could cause your rendering environment (the browser, etc.) to
131+
crash.
125132
126133
In case python/IPython is running in a terminal and `large_repr`
127134
equals 'truncate' this can be set to 0 or None and pandas will auto-detect

pandas/core/generic.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,12 @@ def empty(self) -> bool:
20142014
def __array__(
20152015
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
20162016
) -> np.ndarray:
2017+
if copy is False and not self._mgr.is_single_block and not self.empty:
2018+
# check this manually, otherwise ._values will already return a copy
2019+
# and np.array(values, copy=False) will not raise an error
2020+
raise ValueError(
2021+
"Unable to avoid copy while creating an array as requested."
2022+
)
20172023
values = self._values
20182024
if copy is None:
20192025
# Note: branch avoids `copy=None` for NumPy 1.x support

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
9999
assert pa.types.is_large_string(arr._pa_array.type)
100100

101101

102+
@pytest.mark.parametrize("chunked", [True, False])
103+
def test_constructor_valid_string_view(chunked):
104+
# requires pyarrow>=18 for casting string_view to string
105+
pa = pytest.importorskip("pyarrow", minversion="18")
106+
107+
arr = pa.array(["1", "2", "3"], pa.string_view())
108+
if chunked:
109+
arr = pa.chunked_array(arr)
110+
111+
arr = ArrowStringArray(arr)
112+
# dictionary type get converted to dense large string array
113+
assert pa.types.is_large_string(arr._pa_array.type)
114+
115+
102116
def test_constructor_from_list():
103117
# GH#27673
104118
pytest.importorskip("pyarrow")

pandas/tests/copy_view/test_array.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.compat.numpy import np_version_gt2
5+
46
from pandas import (
57
DataFrame,
68
Series,
@@ -15,8 +17,12 @@
1517

1618
@pytest.mark.parametrize(
1719
"method",
18-
[lambda ser: ser.values, lambda ser: np.asarray(ser)],
19-
ids=["values", "asarray"],
20+
[
21+
lambda ser: ser.values,
22+
lambda ser: np.asarray(ser),
23+
lambda ser: np.array(ser, copy=False),
24+
],
25+
ids=["values", "asarray", "array"],
2026
)
2127
def test_series_values(method):
2228
ser = Series([1, 2, 3], name="name")
@@ -40,8 +46,12 @@ def test_series_values(method):
4046

4147
@pytest.mark.parametrize(
4248
"method",
43-
[lambda df: df.values, lambda df: np.asarray(df)],
44-
ids=["values", "asarray"],
49+
[
50+
lambda df: df.values,
51+
lambda df: np.asarray(df),
52+
lambda ser: np.array(ser, copy=False),
53+
],
54+
ids=["values", "asarray", "array"],
4555
)
4656
def test_dataframe_values(method):
4757
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
@@ -82,7 +92,7 @@ def test_series_to_numpy():
8292
ser.iloc[0] = 0
8393
assert ser.values[0] == 0
8494

85-
# specify copy=False gives a writeable array
95+
# specify copy=True gives a writeable array
8696
ser = Series([1, 2, 3], name="name")
8797
arr = ser.to_numpy(copy=True)
8898
assert not np.shares_memory(arr, get_array(ser, "name"))
@@ -130,6 +140,23 @@ def test_dataframe_multiple_numpy_dtypes():
130140
assert not np.shares_memory(arr, get_array(df, "a"))
131141
assert arr.flags.writeable is True
132142

143+
if np_version_gt2:
144+
# copy=False semantics are only supported in NumPy>=2.
145+
146+
with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
147+
arr = np.array(df, copy=False)
148+
149+
arr = np.array(df, copy=True)
150+
assert arr.flags.writeable is True
151+
152+
153+
def test_dataframe_single_block_copy_true():
154+
# the copy=False/None cases are tested above in test_dataframe_values
155+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
156+
arr = np.array(df, copy=True)
157+
assert not np.shares_memory(arr, get_array(df, "a"))
158+
assert arr.flags.writeable is True
159+
133160

134161
def test_values_is_ea():
135162
df = DataFrame({"a": date_range("2012-01-01", periods=3)})

pandas/tests/frame/test_reductions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,6 @@ def test_sum_bools(self):
10471047
# ----------------------------------------------------------------------
10481048
# Index of max / min
10491049

1050-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
10511050
@pytest.mark.parametrize("axis", [0, 1])
10521051
def test_idxmin(self, float_frame, int_frame, skipna, axis):
10531052
frame = float_frame

pandas/tests/frame/test_repr.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
from pandas import (
1311
NA,
1412
Categorical,
@@ -176,7 +174,6 @@ def test_repr_mixed_big(self):
176174

177175
repr(biggie)
178176

179-
@pytest.mark.xfail(using_string_dtype(), reason="/r in")
180177
def test_repr(self):
181178
# columns but no index
182179
no_index = DataFrame(columns=[0, 1, 3])

0 commit comments

Comments
 (0)