Skip to content

Commit a1000c5

Browse files
committed
Code refactor, bug fixes and test fixes
2 parents 493dd9d + 7acd629 commit a1000c5

File tree

178 files changed

+1004
-240
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

178 files changed

+1004
-240
lines changed

.github/workflows/unit-tests.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ jobs:
5959
extra_loc: "zh_CN"
6060
- name: "Future infer strings"
6161
env_file: actions-311.yaml
62-
pattern: "not slow and not network and not single_cpu"
6362
pandas_future_infer_string: "1"
6463
- name: "Pypy"
6564
env_file: actions-pypy-39.yaml

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.MultiIndex.append PR07,SA01" \
7473
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
7574
-i "pandas.MultiIndex.get_level_values SA01" \
7675
-i "pandas.MultiIndex.get_loc PR07" \
@@ -223,7 +222,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
223222
-i "pandas.Timestamp.fromordinal SA01" \
224223
-i "pandas.Timestamp.fromtimestamp PR01,SA01" \
225224
-i "pandas.Timestamp.hour GL08" \
226-
-i "pandas.Timestamp.isoweekday SA01" \
227225
-i "pandas.Timestamp.max PR02" \
228226
-i "pandas.Timestamp.microsecond GL08" \
229227
-i "pandas.Timestamp.min PR02" \
@@ -328,7 +326,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
328326
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
329327
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
330328
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
331-
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
332329
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
333330
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
334331
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
@@ -347,7 +344,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
347344
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
348345
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
349346
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
350-
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
351347
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
352348
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
353349
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
@@ -362,7 +358,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
362358
-i "pandas.core.resample.Resampler.indices SA01" \
363359
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
364360
-i "pandas.core.resample.Resampler.mean SA01" \
365-
-i "pandas.core.resample.Resampler.median SA01" \
366361
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
367362
-i "pandas.core.resample.Resampler.ohlc SA01" \
368363
-i "pandas.core.resample.Resampler.prod SA01" \

ci/run_tests.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,5 @@ if [[ "$PATTERN" ]]; then
1616
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
1717
fi
1818

19-
# temporarily let pytest always succeed (many tests are not yet passing in the
20-
# build enabling the future string dtype)
21-
if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then
22-
PYTEST_CMD="$PYTEST_CMD || true"
23-
fi
24-
2519
echo $PYTEST_CMD
2620
sh -c "$PYTEST_CMD"

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2702,7 +2702,7 @@ def maybe_convert_objects(ndarray[object] objects,
27022702
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(storage="pyarrow_numpy")
2705+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_libs/tslibs/nattype.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,13 @@ class NaTType(_NaT):
441441
442442
Monday == 1 ... Sunday == 7.
443443
444+
See Also
445+
--------
446+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
447+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
448+
and weekday.
449+
datetime.date.isoweekday : Equivalent method in datetime module.
450+
444451
Examples
445452
--------
446453
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2775,6 +2775,13 @@ default 'raise'
27752775
27762776
Monday == 1 ... Sunday == 7.
27772777
2778+
See Also
2779+
--------
2780+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
2781+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
2782+
and weekday.
2783+
datetime.date.isoweekday : Equivalent method in datetime module.
2784+
27782785
Examples
27792786
--------
27802787
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,14 +509,14 @@ def shares_memory(left, right) -> bool:
509509
if (
510510
isinstance(left, ExtensionArray)
511511
and is_string_dtype(left.dtype)
512-
and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
512+
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
513513
):
514514
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
515515
left = cast("ArrowExtensionArray", left)
516516
if (
517517
isinstance(right, ExtensionArray)
518518
and is_string_dtype(right.dtype)
519-
and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
519+
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
520520
):
521521
right = cast("ArrowExtensionArray", right)
522522
left_pa_data = left._pa_array

pandas/_testing/asserters.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,13 +578,19 @@ def raise_assert_detail(
578578

579579
if isinstance(left, np.ndarray):
580580
left = pprint_thing(left)
581-
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
581+
elif isinstance(left, (CategoricalDtype, NumpyEADtype)):
582582
left = repr(left)
583+
elif isinstance(left, StringDtype):
584+
# TODO(infer_string) this special case could be avoided if we have
585+
# a more informative repr https://github.com/pandas-dev/pandas/issues/59342
586+
left = f"StringDtype(storage={left.storage}, na_value={left.na_value})"
583587

584588
if isinstance(right, np.ndarray):
585589
right = pprint_thing(right)
586-
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
590+
elif isinstance(right, (CategoricalDtype, NumpyEADtype)):
587591
right = repr(right)
592+
elif isinstance(right, StringDtype):
593+
right = f"StringDtype(storage={right.storage}, na_value={right.na_value})"
588594

589595
msg += f"""
590596
[left]: {left}

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,8 @@ def __getitem__(self, item: PositionalIndexer):
575575
if isinstance(item, np.ndarray):
576576
if not len(item):
577577
# Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
578-
if self._dtype.name == "string" and self._dtype.storage in (
579-
"pyarrow",
580-
"pyarrow_numpy",
581-
):
578+
if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
579+
# TODO(infer_string) should this be large_string?
582580
pa_dtype = pa.string()
583581
else:
584582
pa_dtype = self._dtype.pyarrow_dtype

pandas/core/arrays/numpy_.py

Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.core.dtypes.dtypes import NumpyEADtype
1818
from pandas.core.dtypes.missing import isna
1919

20+
import pandas as pd
2021
from pandas.core import (
2122
arraylike,
2223
missing,
@@ -248,74 +249,41 @@ def _validate_setitem_value(self, value):
248249
TypeError
249250
"""
250251

251-
kind = self.dtype.kind
252-
253-
if kind == "b":
254-
if lib.is_bool(value) or np.can_cast(type(value), self.dtype.type):
255-
return value
256-
if isinstance(value, NumpyExtensionArray) and (
257-
lib.is_bool_array(value.to_numpy())
258-
):
259-
return value
260-
261-
elif kind == "i":
262-
if lib.is_integer(value) or np.can_cast(type(value), self.dtype.type):
263-
return value
264-
if isinstance(value, NumpyExtensionArray) and lib.is_integer_array(
265-
value.to_numpy()
266-
):
267-
return value
268-
269-
elif kind == "u":
270-
if (lib.is_integer(value) and value > -1) or np.can_cast(
271-
type(value), self.dtype.type
272-
):
273-
return value
274-
275-
elif kind == "c":
276-
if lib.is_complex(value) or np.can_cast(type(value), self.dtype.type):
277-
return value
278-
279-
elif kind == "S":
280-
if isinstance(value, str) or np.can_cast(type(value), self.dtype.type):
281-
return value
282-
if isinstance(value, NumpyExtensionArray) and lib.is_string_array(
283-
value.to_numpy()
284-
):
285-
return value
252+
if type(value) == self.dtype.type:
253+
return value
286254

287-
elif kind == "M":
288-
if isinstance(value, np.datetime64):
289-
return value
290-
if isinstance(value, NumpyExtensionArray) and (
291-
lib.is_date_array(value.to_numpy())
292-
or lib.is_datetime_array(value.to_numpy())
293-
or lib.is_datetime64_array(value.to_numpy())
294-
or lib.is_datetime_with_singletz_array(value.to_numpy())
295-
):
296-
return value
255+
if isinstance(value, NumpyExtensionArray) and value.dtype == self.dtype:
256+
return value
297257

298-
elif kind == "m":
299-
if isinstance(value, np.timedelta64):
300-
return value
301-
if isinstance(value, NumpyExtensionArray) and (
302-
lib.is_timedelta_or_timedelta64_array(value.to_numpy())
303-
or lib.is_time_array(value.to_numpy())
304-
):
258+
if (
259+
isinstance(value, list)
260+
or isinstance(value, NumpyExtensionArray)
261+
or isinstance(value, np.ndarray)
262+
or isinstance(value, pd.Series)
263+
):
264+
try:
265+
_ = pd.array(value, dtype=self.dtype)
305266
return value
267+
except ValueError:
268+
print("Caught the error")
306269

307-
elif kind == "f":
308-
if lib.is_float(value) or np.can_cast(type(value), self.dtype.type):
309-
return value
310-
if isinstance(value, NumpyExtensionArray) and lib.is_float_array(
311-
value.to_numpy()
312-
):
270+
if (
271+
(
272+
(lib.is_integer(value) or lib.is_float(value))
273+
and self.dtype.kind in "iuf"
274+
)
275+
or (isinstance(value, str) and self.dtype.kind in "US")
276+
or (self.dtype.kind == "O")
277+
) and not isna(value):
278+
if self.dtype.type(value) == value: # -> Problem
313279
return value
314280

315-
elif np.can_cast(type(value), self.dtype.type):
281+
if isna(value):
316282
return value
317283

318-
raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}")
284+
raise TypeError(
285+
f"Invalid value '{value!s}' with type {type(value)} for dtype {self.dtype}"
286+
)
319287

320288
# Base EA class (and all other EA classes) don't have limit_area keyword
321289
# This can be removed here as well when the interpolate ffill/bfill method

0 commit comments

Comments
 (0)