Skip to content

Commit 534f7b1

Browse files
authored
Merge branch 'main' into cp313t-windows-wheels
2 parents d53f99a + 692ea6f commit 534f7b1

26 files changed

+190
-80
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ def shares_memory(left, right) -> bool:
501501
if isinstance(left, MultiIndex):
502502
return shares_memory(left._codes, right)
503503
if isinstance(left, (Index, Series)):
504+
if isinstance(right, (Index, Series)):
505+
return shares_memory(left._values, right._values)
504506
return shares_memory(left._values, right)
505507

506508
if isinstance(left, NDArrayBackedExtensionArray):

pandas/conftest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def multiindex_year_month_day_dataframe_random_data():
600600
"""
601601
tdf = DataFrame(
602602
np.random.default_rng(2).standard_normal((100, 4)),
603-
columns=Index(list("ABCD"), dtype=object),
603+
columns=Index(list("ABCD")),
604604
index=date_range("2000-01-01", periods=100, freq="B"),
605605
)
606606
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
@@ -787,7 +787,7 @@ def string_series() -> Series:
787787
"""
788788
return Series(
789789
np.arange(30, dtype=np.float64) * 1.1,
790-
index=Index([f"i_{i}" for i in range(30)], dtype=object),
790+
index=Index([f"i_{i}" for i in range(30)]),
791791
name="series",
792792
)
793793

@@ -798,7 +798,7 @@ def object_series() -> Series:
798798
Fixture for Series of dtype object with Index of unique strings
799799
"""
800800
data = [f"foo_{i}" for i in range(30)]
801-
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
801+
index = Index([f"bar_{i}" for i in range(30)])
802802
return Series(data, index=index, name="objects", dtype=object)
803803

804804

@@ -890,8 +890,8 @@ def int_frame() -> DataFrame:
890890
"""
891891
return DataFrame(
892892
np.ones((30, 4), dtype=np.int64),
893-
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
894-
columns=Index(list("ABCD"), dtype=object),
893+
index=Index([f"foo_{i}" for i in range(30)]),
894+
columns=Index(list("ABCD")),
895895
)
896896

897897

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def fillna(
11451145
try:
11461146
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
11471147
except pa.ArrowTypeError as err:
1148-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
1148+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
11491149
raise TypeError(msg) from err
11501150

11511151
try:
@@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value):
21362136
try:
21372137
value = self._box_pa(value, self._pa_array.type)
21382138
except pa.ArrowTypeError as err:
2139-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
2139+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
21402140
raise TypeError(msg) from err
21412141
return value
21422142

pandas/core/arrays/masked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def _validate_setitem_value(self, value):
286286

287287
# Note: without the "str" here, the f-string rendering raises in
288288
# py38 builds.
289-
raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}")
289+
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")
290290

291291
def __setitem__(self, key, value) -> None:
292292
key = check_array_indexer(self, key)

pandas/core/arrays/string_.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
import operator
45
from typing import (
56
TYPE_CHECKING,
@@ -64,6 +65,8 @@
6465
from pandas.core.indexers import check_array_indexer
6566
from pandas.core.missing import isna
6667

68+
from pandas.io.formats import printing
69+
6770
if TYPE_CHECKING:
6871
import pyarrow
6972

@@ -391,6 +394,14 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
391394
raise ValueError
392395
return cls._from_sequence(scalars, dtype=dtype)
393396

397+
def _formatter(self, boxed: bool = False):
398+
formatter = partial(
399+
printing.pprint_thing,
400+
escape_chars=("\t", "\r", "\n"),
401+
quote_strings=not boxed,
402+
)
403+
return formatter
404+
394405
def _str_map(
395406
self,
396407
f,
@@ -641,7 +652,8 @@ def _validate_scalar(self, value):
641652
return self.dtype.na_value
642653
elif not isinstance(value, str):
643654
raise TypeError(
644-
f"Cannot set non-string value '{value}' into a string array."
655+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
656+
f"string or missing value, got '{type(value).__name__}' instead."
645657
)
646658
return value
647659

@@ -732,7 +744,9 @@ def __setitem__(self, key, value) -> None:
732744
value = self.dtype.na_value
733745
elif not isinstance(value, str):
734746
raise TypeError(
735-
f"Cannot set non-string value '{value}' into a StringArray."
747+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
748+
f"be a string or missing value, got '{type(value).__name__}' "
749+
"instead."
736750
)
737751
else:
738752
if not is_array_like(value):
@@ -742,7 +756,10 @@ def __setitem__(self, key, value) -> None:
742756
# compatible, compatibility with arrow backed strings
743757
value = np.asarray(value)
744758
if len(value) and not lib.is_string_array(value, skipna=True):
745-
raise TypeError("Must provide strings.")
759+
raise TypeError(
760+
"Invalid value for dtype 'str'. Value should be a "
761+
"string or missing value (or array of those)."
762+
)
746763

747764
mask = isna(value)
748765
if mask.any():

pandas/core/arrays/string_arrow.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.compat import (
1818
pa_version_under10p1,
1919
pa_version_under13p0,
20+
pa_version_under16p0,
2021
)
2122
from pandas.util._exceptions import find_stack_level
2223

@@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None:
7172
raise ImportError(msg)
7273

7374

75+
def _is_string_view(typ):
76+
return not pa_version_under16p0 and pa.types.is_string_view(typ)
77+
78+
7479
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
7580
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
7681
# fallback for the ones that pyarrow doesn't yet support
@@ -128,11 +133,13 @@ def __init__(self, values) -> None:
128133
_chk_pyarrow_available()
129134
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
130135
pa.types.is_string(values.type)
136+
or _is_string_view(values.type)
131137
or (
132138
pa.types.is_dictionary(values.type)
133139
and (
134140
pa.types.is_string(values.type.value_type)
135141
or pa.types.is_large_string(values.type.value_type)
142+
or _is_string_view(values.type.value_type)
136143
)
137144
)
138145
):
@@ -216,7 +223,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
216223
if self.dtype.na_value is np.nan and item is np.nan:
217224
item = libmissing.NA
218225
if not isinstance(item, str) and item is not libmissing.NA:
219-
raise TypeError("Scalar must be NA or str")
226+
raise TypeError(
227+
f"Invalid value '{item}' for dtype 'str'. Value should be a "
228+
f"string or missing value, got '{type(item).__name__}' instead."
229+
)
220230
return super().insert(loc, item)
221231

222232
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
@@ -248,13 +258,19 @@ def _maybe_convert_setitem_value(self, value):
248258
if isna(value):
249259
value = None
250260
elif not isinstance(value, str):
251-
raise TypeError("Scalar must be NA or str")
261+
raise TypeError(
262+
f"Invalid value '{value}' for dtype 'str'. Value should be a "
263+
f"string or missing value, got '{type(value).__name__}' instead."
264+
)
252265
else:
253266
value = np.array(value, dtype=object, copy=True)
254267
value[isna(value)] = None
255268
for v in value:
256269
if not (v is None or isinstance(v, str)):
257-
raise TypeError("Must provide strings")
270+
raise TypeError(
271+
"Invalid value for dtype 'str'. Value should be a "
272+
"string or missing value (or array of those)."
273+
)
258274
return super()._maybe_convert_setitem_value(value)
259275

260276
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/core/config_init.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ def use_numba_cb(key: str) -> None:
100100
: int
101101
If max_rows is exceeded, switch to truncate view. Depending on
102102
`large_repr`, objects are either centrally truncated or printed as
103-
a summary view. 'None' value means unlimited.
103+
a summary view.
104+
105+
'None' value means unlimited. Beware that printing a large number of rows
106+
could cause your rendering environment (the browser, etc.) to crash.
104107
105108
In case python/IPython is running in a terminal and `large_repr`
106109
equals 'truncate' this can be set to 0 and pandas will auto-detect
@@ -121,7 +124,11 @@ def use_numba_cb(key: str) -> None:
121124
: int
122125
If max_cols is exceeded, switch to truncate view. Depending on
123126
`large_repr`, objects are either centrally truncated or printed as
124-
a summary view. 'None' value means unlimited.
127+
a summary view.
128+
129+
'None' value means unlimited. Beware that printing a large number of
130+
columns could cause your rendering environment (the browser, etc.) to
131+
crash.
125132
126133
In case python/IPython is running in a terminal and `large_repr`
127134
equals 'truncate' this can be set to 0 or None and pandas will auto-detect

pandas/core/generic.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,12 @@ def empty(self) -> bool:
20142014
def __array__(
20152015
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
20162016
) -> np.ndarray:
2017+
if copy is False and not self._mgr.is_single_block and not self.empty:
2018+
# check this manually, otherwise ._values will already return a copy
2019+
# and np.array(values, copy=False) will not raise an error
2020+
raise ValueError(
2021+
"Unable to avoid copy while creating an array as requested."
2022+
)
20172023
values = self._values
20182024
if copy is None:
20192025
# Note: branch avoids `copy=None` for NumPy 1.x support

pandas/core/indexes/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5139,7 +5139,9 @@ def _is_memory_usage_qualified(self) -> bool:
51395139
"""
51405140
Return a boolean if we need a qualified .info display.
51415141
"""
5142-
return is_object_dtype(self.dtype)
5142+
return is_object_dtype(self.dtype) or (
5143+
is_string_dtype(self.dtype) and self.dtype.storage == "python" # type: ignore[union-attr]
5144+
)
51435145

51445146
def __contains__(self, key: Any) -> bool:
51455147
"""

pandas/core/indexes/multi.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
is_list_like,
6767
is_object_dtype,
6868
is_scalar,
69+
is_string_dtype,
6970
pandas_dtype,
7071
)
7172
from pandas.core.dtypes.dtypes import (
@@ -1425,10 +1426,12 @@ def dtype(self) -> np.dtype:
14251426
def _is_memory_usage_qualified(self) -> bool:
14261427
"""return a boolean if we need a qualified .info display"""
14271428

1428-
def f(level) -> bool:
1429-
return "mixed" in level or "string" in level or "unicode" in level
1429+
def f(dtype) -> bool:
1430+
return is_object_dtype(dtype) or (
1431+
is_string_dtype(dtype) and dtype.storage == "python"
1432+
)
14301433

1431-
return any(f(level.inferred_type) for level in self.levels)
1434+
return any(f(level.dtype) for level in self.levels)
14321435

14331436
# Cannot determine type of "memory_usage"
14341437
@doc(Index.memory_usage) # type: ignore[has-type]

0 commit comments

Comments
 (0)