pandas-dev
diff --git a/‎.github/workflows/comment-commands.yml‎
Lines changed: 0 additions & 9 deletions b/‎.github/workflows/comment-commands.yml‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 5 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/source/development/contributing.rst‎
Lines changed: 6 additions & 8 deletions b/‎doc/source/development/contributing.rst‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx‎
Lines changed: 56 additions & 0 deletions b/‎pandas/_libs/parsers.pyx‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/strptime.pyx‎
Lines changed: 5 additions & 0 deletions b/‎pandas/_libs/tslibs/strptime.pyx‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/arrays/base.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/core/arrays/base.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/arrays/categorical.py‎
Lines changed: 7 additions & 3 deletions b/‎pandas/core/arrays/categorical.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/datetimelike.py‎
Lines changed: 1 addition & 1 deletion
@@ -9,15 +9,6 @@ permissions:
   pull-requests: write
 
 jobs:
-  issue_assign:
-    runs-on: ubuntu-24.04
-    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
-    concurrency:
-      group: ${{ github.actor }}-issue-assign
-    steps:
-      - run: |
-          echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
-          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
   preview_docs:
     runs-on: ubuntu-24.04
     if: github.event.issue.pull_request && github.event.comment.body == '/preview'
 
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.11
+    rev: v0.13.3
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -46,7 +46,7 @@ repos:
     -   id: codespell
         types_or: [python, rst, markdown, cython, c]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.16.7
+    rev: v0.17.0
     hooks:
     -   id: cython-lint
     -   id: double-quote-cython-strings
@@ -67,7 +67,7 @@ repos:
     -   id: trailing-whitespace
         args: [--markdown-linebreak-ext=md]
 -   repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
+    rev: 6.1.0
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
@@ -92,14 +92,14 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v21.1.0
+    rev: v21.1.2
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.9.0
+    rev: v1.9.1
     hooks:
     - id: meson-fmt
       args: ['--inplace']
 
@@ -36,16 +36,14 @@ and `good first issue
 <https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+sort%3Aupdated-desc+label%3A%22good+first+issue%22+no%3Aassignee>`_
 are typically good for newer contributors.
 
-Once you've found an interesting issue, it's a good idea to assign the issue to yourself,
-so nobody else duplicates the work on it. On the Github issue, a comment with the exact
-text ``take`` to automatically assign you the issue
-(this will take seconds and may require refreshing the page to see it).
+Once you've found an interesting issue, leave a comment with your intention
+to start working on it. If somebody else has
+already commented on issue but they have shown a lack of activity in the issue
+or a pull request in the past 2-3 weeks, you may take it over.
 
 If for whatever reason you are not able to continue working with the issue, please
-unassign it, so other people know it's available again. You can check the list of
-assigned issues, since people may not be working in them anymore. If you want to work on one
-that is assigned, feel free to kindly ask the current assignee if you can take it
-(please allow at least a week of inactivity before considering work in the issue discontinued).
+leave a comment on an issue, so other people know it's available again. You can check the list of
+assigned issues, since people may not be working in them anymore.
 
 We have several :ref:`contributor community <community>` communication channels, which you are
 welcome to join, and ask questions as you figure things out. Among them are regular meetings for
 
@@ -948,6 +948,7 @@ Datetimelike
 - Bug in :class:`Timestamp` constructor failing to raise when given a ``np.datetime64`` object with non-standard unit (:issue:`25611`)
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
+- Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised  ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
 - Bug in :meth:`DataFrame.fillna` raising an ``AssertionError`` instead of ``OutOfBoundsDatetime`` when filling a ``datetime64[ns]`` column with an out-of-bounds timestamp. Now correctly raises ``OutOfBoundsDatetime``. (:issue:`61208`)
@@ -973,6 +974,7 @@ Datetimelike
 - Bug in retaining frequency in :meth:`value_counts` specifically for :meth:`DatetimeIndex` and :meth:`TimedeltaIndex` (:issue:`33830`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
+
 Timedelta
 ^^^^^^^^^
 - Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
@@ -1081,6 +1083,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_csv` with ``engine="c"`` reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading large float numbers with preceding integers as strings. Now reads them as floats. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 
@@ -1070,6 +1070,10 @@ cdef class TextReader:
         else:
             col_res = None
             for dt in self.dtype_cast_order:
+                if (dt.kind in "iu" and
+                        self._column_has_float(i, start, end, na_filter, na_hashset)):
+                    continue
+
                 try:
                     col_res, na_count = self._convert_with_dtype(
                         dt, i, start, end, na_filter, 0, na_hashset, na_fset)
@@ -1347,6 +1351,58 @@ cdef class TextReader:
             else:
                 return None
 
+    cdef bint _column_has_float(self, Py_ssize_t col,
+                                int64_t start, int64_t end,
+                                bint na_filter, kh_str_starts_t *na_hashset):
+        """Check if the column contains any float number."""
+        cdef:
+            Py_ssize_t i, j, lines = end - start
+            coliter_t it
+            const char *word = NULL
+            const char *ignored_chars = " +-"
+            const char *digits = "0123456789"
+            const char *float_indicating_chars = "eE"
+            char null_byte = 0
+
+        coliter_setup(&it, self.parser, col, start)
+
+        for i in range(lines):
+            COLITER_NEXT(it, word)
+
+            if na_filter and kh_get_str_starts_item(na_hashset, word):
+                continue
+
+            found_first_digit = False
+            j = 0
+            while word[j] != null_byte:
+                if word[j] == self.parser.decimal:
+                    return True
+                elif not found_first_digit and word[j] in ignored_chars:
+                    # no-op
+                    pass
+                elif not found_first_digit and word[j] not in digits:
+                    # word isn't numeric
+                    return False
+                elif not found_first_digit and word[j] in digits:
+                    found_first_digit = True
+                elif word[j] in float_indicating_chars:
+                    # preceding chars indicates numeric and
+                    # current char indicates float
+                    return True
+                elif word[j] not in digits:
+                    # previous characters indicates numeric
+                    # current character shows otherwise
+                    return False
+                elif word[j] in digits:
+                    # no-op
+                    pass
+                else:
+                    raise AssertionError(
+                            f"Unhandled case {word[j]=} {found_first_digit=}"
+                            )
+                j += 1
+
+        return False
 
 # Factor out code common to TextReader.__dealloc__ and TextReader.close
 # It cannot be a class method, since calling self.close() in __dealloc__
 
@@ -405,6 +405,11 @@ def array_strptime(
                 if len(val) == 0 or val in nat_strings:
                     iresult[i] = NPY_NAT
                     continue
+                elif type(val) is not str:
+                    # GH#60933: normalize string subclasses
+                    # (e.g. lxml.etree._ElementUnicodeResult). The downstream Cython
+                    # path expects an exact `str`, so ensure we pass a plain str
+                    val = str(val)
             elif checknull_with_nat_and_na(val):
                 iresult[i] = NPY_NAT
                 continue
 
@@ -888,7 +888,7 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
                 boxed = self._box_pa(other)
             except pa.lib.ArrowInvalid:
                 # e.g. GH#60228 [1, "b"] we have to operate pointwise
-                res_values = [op(x, y) for x, y in zip(self, other)]
+                res_values = [op(x, y) for x, y in zip(self, other, strict=True)]
                 result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
             else:
                 rtype = boxed.type
@@ -2713,7 +2713,7 @@ def _str_extract(self, pat: str, flags: int = 0, expand: bool = True):
         if expand:
             return {
                 col: self._from_pyarrow_array(pc.struct_field(result, [i]))
-                for col, i in zip(groups, range(result.type.num_fields))
+                for col, i in zip(groups, range(result.type.num_fields), strict=True)
             }
         else:
             return type(self)(pc.struct_field(result, [0]))
 
@@ -2869,7 +2869,7 @@ def convert_values(param):
 
             # If the operator is not defined for the underlying objects,
             # a TypeError should be raised
-            res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
+            res = [op(a, b) for (a, b) in zip(lvalues, rvalues, strict=True)]
 
             def _maybe_convert(arr):
                 if coerce_to_dtype:
@@ -2885,7 +2885,7 @@ def _maybe_convert(arr):
                 return res
 
             if op.__name__ in {"divmod", "rdivmod"}:
-                a, b = zip(*res)
+                a, b = zip(*res, strict=True)
                 return _maybe_convert(a), _maybe_convert(b)
 
             return _maybe_convert(res)
 
@@ -2,6 +2,7 @@
 
 from csv import QUOTE_NONNUMERIC
 from functools import partial
+import itertools
 import operator
 from shutil import get_terminal_size
 from typing import (
@@ -2429,8 +2430,8 @@ def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]:
             ensure_platform_int(self.codes), categories.size
         )
         counts = ensure_int64(counts).cumsum()
-        _result = (r[start:end] for start, end in zip(counts, counts[1:]))
-        return dict(zip(categories, _result))
+        _result = (r[start:end] for start, end in itertools.pairwise(counts))
+        return dict(zip(categories, _result, strict=True))
 
     # ------------------------------------------------------------------
     # Reductions
@@ -3165,5 +3166,8 @@ def factorize_from_iterables(iterables) -> tuple[list[np.ndarray], list[Index]]:
         # For consistency, it should return two empty lists.
         return [], []
 
-    codes, categories = zip(*(factorize_from_iterable(it) for it in iterables))
+    codes, categories = zip(
+        *(factorize_from_iterable(it) for it in iterables),
+        strict=True,
+    )
     return list(codes), list(categories)
@@ -2374,7 +2374,7 @@ def _concat_same_type(
             to_concat = [x for x in to_concat if len(x)]
 
             if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
-                pairs = zip(to_concat[:-1], to_concat[1:])
+                pairs = zip(to_concat[:-1], to_concat[1:], strict=True)
                 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
                     new_freq = obj.freq
                     new_obj._freq = new_freq