Skip to content

Commit bd67b72

Browse files
authored
Merge branch 'main' into add-pandas-merge-how-param-validation
2 parents 84e77c9 + 328e79d commit bd67b72

File tree

13 files changed

+75
-24
lines changed

13 files changed

+75
-24
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ Groupby/resample/rolling
651651
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
652652
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
653653
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
654+
- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)
654655

655656
Reshaping
656657
^^^^^^^^^

pandas/_libs/lib.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2699,16 +2699,16 @@ def maybe_convert_objects(ndarray[object] objects,
26992699
seen.object_ = True
27002700

27012701
elif seen.str_:
2702-
if using_string_dtype() and is_string_array(objects, skipna=True):
2702+
if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(na_value=np.nan)
2705+
dtype = StringDtype()
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

2708-
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):
2708+
elif using_string_dtype() and is_string_array(objects, skipna=True):
27092709
from pandas.core.arrays.string_ import StringDtype
27102710

2711-
dtype = StringDtype()
2711+
dtype = StringDtype(na_value=np.nan)
27122712
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27132713

27142714
seen.object_ = True

pandas/core/generic.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,10 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
600600
if isinstance(self, ABCSeries):
601601
return {clean_column_name(self.name): self}
602602

603+
dtypes = self.dtypes
603604
return {
604605
clean_column_name(k): Series(
605-
v, copy=False, index=self.index, name=k, dtype=self.dtypes[k]
606+
v, copy=False, index=self.index, name=k, dtype=dtypes[k]
606607
).__finalize__(self)
607608
for k, v in zip(self.columns, self._iter_column_arrays())
608609
if not isinstance(k, int)
@@ -7486,9 +7487,13 @@ def replace(
74867487
if inplace:
74877488
return None
74887489
return self.copy(deep=False)
7489-
74907490
if is_dict_like(to_replace):
74917491
if is_dict_like(value): # {'A' : NA} -> {'A' : 0}
7492+
if isinstance(self, ABCSeries):
7493+
raise ValueError(
7494+
"to_replace and value cannot be dict-like for "
7495+
"Series.replace"
7496+
)
74927497
# Note: Checking below for `in foo.keys()` instead of
74937498
# `in foo` is needed for when we have a Series and not dict
74947499
mapping = {

pandas/core/resample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2466,7 +2466,7 @@ def _get_timestamp_range_edges(
24662466
)
24672467
if isinstance(freq, Day):
24682468
first = first.tz_localize(index_tz)
2469-
last = last.tz_localize(index_tz)
2469+
last = last.tz_localize(index_tz, nonexistent="shift_forward")
24702470
else:
24712471
first = first.normalize()
24722472
last = last.normalize()

pandas/io/parsers/readers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
321321
322322
Note: A fast-path exists for iso8601-formatted dates.
323323
date_format : str or dict of column -> format, optional
324-
Format to use for parsing dates when used in conjunction with ``parse_dates``.
324+
Format to use for parsing dates and/or times when used in conjunction with ``parse_dates``.
325325
The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See
326326
`strftime documentation
327327
<https://docs.python.org/3/library/datetime.html

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ def test_config(string_storage, using_infer_string):
3636
result = pd.array(["a", "b"])
3737
assert result.dtype.storage == string_storage
3838

39-
dtype = StringDtype(
40-
string_storage, na_value=np.nan if using_infer_string else pd.NA
41-
)
39+
# pd.array(..) by default always returns the NA-variant
40+
dtype = StringDtype(string_storage, na_value=pd.NA)
4241
expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype)
4342
tm.assert_equal(result, expected)
4443

pandas/tests/arrays/test_array.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,21 +215,45 @@ def test_dt64_array(dtype_unit):
215215
.construct_array_type()
216216
._from_sequence(["a", None], dtype=pd.StringDtype()),
217217
),
218+
(
219+
["a", None],
220+
"str",
221+
pd.StringDtype(na_value=np.nan)
222+
.construct_array_type()
223+
._from_sequence(["a", None], dtype=pd.StringDtype(na_value=np.nan))
224+
if using_string_dtype()
225+
else NumpyExtensionArray(np.array(["a", "None"])),
226+
),
218227
(
219228
["a", None],
220229
pd.StringDtype(),
221230
pd.StringDtype()
222231
.construct_array_type()
223232
._from_sequence(["a", None], dtype=pd.StringDtype()),
224233
),
234+
(
235+
["a", None],
236+
pd.StringDtype(na_value=np.nan),
237+
pd.StringDtype(na_value=np.nan)
238+
.construct_array_type()
239+
._from_sequence(["a", None], dtype=pd.StringDtype(na_value=np.nan)),
240+
),
225241
(
226242
# numpy array with string dtype
227243
np.array(["a", "b"], dtype=str),
228-
None,
244+
pd.StringDtype(),
229245
pd.StringDtype()
230246
.construct_array_type()
231247
._from_sequence(["a", "b"], dtype=pd.StringDtype()),
232248
),
249+
(
250+
# numpy array with string dtype
251+
np.array(["a", "b"], dtype=str),
252+
pd.StringDtype(na_value=np.nan),
253+
pd.StringDtype(na_value=np.nan)
254+
.construct_array_type()
255+
._from_sequence(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
256+
),
233257
# Boolean
234258
(
235259
[True, None],
@@ -287,7 +311,6 @@ def test_array_copy():
287311
assert tm.shares_memory(a, b)
288312

289313

290-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
291314
@pytest.mark.parametrize(
292315
"data, expected",
293316
[
@@ -387,6 +410,13 @@ def test_array_copy():
387410
.construct_array_type()
388411
._from_sequence(["a", None], dtype=pd.StringDtype()),
389412
),
413+
(
414+
# numpy array with string dtype
415+
np.array(["a", "b"], dtype=str),
416+
pd.StringDtype()
417+
.construct_array_type()
418+
._from_sequence(["a", "b"], dtype=pd.StringDtype()),
419+
),
390420
# Boolean
391421
([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
392422
([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),

pandas/tests/arrays/test_datetimelike.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,7 @@ def test_searchsorted(self):
297297
assert result == 10
298298

299299
@pytest.mark.parametrize("box", [None, "index", "series"])
300-
def test_searchsorted_castable_strings(
301-
self, arr1d, box, string_storage, using_infer_string
302-
):
300+
def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
303301
arr = arr1d
304302
if box is None:
305303
pass
@@ -335,8 +333,7 @@ def test_searchsorted_castable_strings(
335333
TypeError,
336334
match=re.escape(
337335
f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
338-
"or array of those. Got "
339-
f"{'str' if using_infer_string else 'string'} array instead."
336+
"or array of those. Got string array instead."
340337
),
341338
):
342339
arr.searchsorted([str(arr[1]), "baz"])

pandas/tests/base/test_value_counts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def test_value_counts_inferred(index_or_series, using_infer_string):
114114
else:
115115
exp = np.unique(np.array(s_values, dtype=np.object_))
116116
if using_infer_string:
117-
exp = array(exp)
117+
exp = array(exp, dtype="str")
118118
tm.assert_equal(s.unique(), exp)
119119

120120
assert s.nunique() == 4
@@ -192,7 +192,7 @@ def test_value_counts_bins(index_or_series, using_infer_string):
192192
else:
193193
exp = np.array(["a", "b", np.nan, "d"], dtype=object)
194194
if using_infer_string:
195-
exp = array(exp)
195+
exp = array(exp, dtype="str")
196196
tm.assert_equal(s.unique(), exp)
197197
assert s.nunique() == 3
198198

pandas/tests/dtypes/cast/test_construct_ndarray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_construct_1d_ndarray_preserving_na(
2121
):
2222
result = sanitize_array(values, index=None, dtype=dtype)
2323
if using_infer_string and expected.dtype == object and dtype is None:
24-
tm.assert_extension_array_equal(result, pd.array(expected))
24+
tm.assert_extension_array_equal(result, pd.array(expected, dtype="str"))
2525
else:
2626
tm.assert_numpy_array_equal(result, expected)
2727

0 commit comments

Comments
 (0)