diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 293f1cb6f5e79..4dd8cf39dd55d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -971,6 +971,8 @@ Datetimelike - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) +- Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`) + Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index b443aa7bede22..025cd6c04cb69 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -405,6 +405,11 @@ def array_strptime( if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue + elif type(val) is not str: + # GH#60933: normalize string subclasses + # (e.g. lxml.etree._ElementUnicodeResult). The downstream Cython + # path expects an exact `str`, so ensure we pass a plain str + val = str(val) elif checknull_with_nat_and_na(val): iresult[i] = NPY_NAT continue diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index eddfeb80967ef..f59339cacd8d8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3790,3 +3790,14 @@ def test_to_datetime_wrapped_datetime64_ps(): ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None ) tm.assert_index_equal(result, expected) + + +def test_to_datetime_lxml_elementunicoderesult_with_format(cache): + etree = pytest.importorskip("lxml.etree") + + s = "2025-02-05 16:59:57" + node = etree.XML(f"{s}") + val = node.xpath("/date/node()")[0] # _ElementUnicodeResult + + out = to_datetime(Series([val]), format="%Y-%m-%d %H:%M:%S", cache=cache) + assert out.iloc[0] == Timestamp(s)