From f60f84f0f7935c429c9944395bed5963d27d78f7 Mon Sep 17 00:00:00 2001 From: Floura Angel Date: Mon, 6 Oct 2025 14:04:50 -0400 Subject: [PATCH 1/4] fix subclass of str for to_datetime --- doc/source/whatsnew/v3.0.0.rst | 2 ++ pandas/_libs/tslibs/strptime.pyx | 5 +++++ pandas/tests/tools/test_to_datetime.py | 12 ++++++++++++ 3 files changed, 19 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e7d70ebb7b27f..ea8f62f76dc90 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -914,6 +914,8 @@ Datetimelike - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) +- Bug in :func:`pandas.to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format=...`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`, :pr:`62604`) + Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index b443aa7bede22..025cd6c04cb69 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -405,6 +405,11 @@ def array_strptime( if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue + elif type(val) is not str: + # GH#60933: normalize string subclasses + # (e.g. lxml.etree._ElementUnicodeResult). The downstream Cython + # path expects an exact `str`, so ensure we pass a plain str + val = str(val) elif checknull_with_nat_and_na(val): iresult[i] = NPY_NAT continue diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index cedd9dfe82e5a..5e9de3572264e 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3791,3 +3791,15 @@ def test_to_datetime_wrapped_datetime64_ps(): ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None ) tm.assert_index_equal(result, expected) + + +def test_to_datetime_lxml_elementunicoderesult_with_format(cache): + pytest.importorskip("lxml") + from lxml import etree # pyright: ignore[reportMissingImports] + + s = "2025-02-05 16:59:57" + node = etree.XML(f"{s}") + val = node.xpath("/date/node()")[0] # _ElementUnicodeResult + + out = to_datetime(Series([val]), format="%Y-%m-%d %H:%M:%S", cache=cache) + tm.assert_equal(out.iloc[0], Timestamp(s)) From bd2b59895fe4a899f8422c0c938f80f2df8c9621 Mon Sep 17 00:00:00 2001 From: FLOURA ANGEL Date: Mon, 6 Oct 2025 19:03:45 -0400 Subject: [PATCH 2/4] Apply suggestion from @mroeschke Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/tools/test_to_datetime.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index e23260e45c135..6e4aa9ebe4cf9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3793,8 +3793,7 @@ def test_to_datetime_wrapped_datetime64_ps(): def test_to_datetime_lxml_elementunicoderesult_with_format(cache): - pytest.importorskip("lxml") - from lxml import etree # pyright: ignore[reportMissingImports] + etree = pytest.importorskip("lxml.etree") s = "2025-02-05 16:59:57" node = etree.XML(f"{s}") From 88ca67559feeb464ea17818f215c0b087ad2c456 Mon Sep 17 00:00:00 2001 From: FLOURA ANGEL Date: Mon, 6 Oct 2025 19:03:56 -0400 Subject: [PATCH 3/4] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b2e39a9ea468f..4dd8cf39dd55d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -971,7 +971,7 @@ Datetimelike - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) -- Bug in :func:`pandas.to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format=...`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`, :pr:`62604`) +- Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`) Timedelta From 215bf1558dc6bdb656961667a462278e34c91e01 Mon Sep 17 00:00:00 2001 From: FLOURA ANGEL Date: Mon, 6 Oct 2025 19:04:02 -0400 Subject: [PATCH 4/4] Update pandas/tests/tools/test_to_datetime.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 6e4aa9ebe4cf9..f59339cacd8d8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3800,4 +3800,4 @@ def test_to_datetime_lxml_elementunicoderesult_with_format(cache): val = node.xpath("/date/node()")[0] # _ElementUnicodeResult out = to_datetime(Series([val]), format="%Y-%m-%d %H:%M:%S", cache=cache) - tm.assert_equal(out.iloc[0], Timestamp(s)) + assert out.iloc[0] == Timestamp(s)