pandas-dev
diff --git a/‎.github/actions/build_pandas/action.yml
Lines changed: 7 additions & 0 deletions b/‎.github/actions/build_pandas/action.yml
Lines changed: 7 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 3 additions & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 3 additions & 1 deletion
diff --git a/‎ci/code_checks.sh
Lines changed: 1 addition & 129 deletions b/‎ci/code_checks.sh
Lines changed: 1 addition & 129 deletions
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 1 addition & 2 deletions b/‎doc/source/development/contributing_codebase.rst
Lines changed: 1 addition & 2 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 5 additions & 1 deletion b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 5 additions & 1 deletion
diff --git a/‎pandas/_libs/arrays.pyx
Lines changed: 4 additions & 0 deletions b/‎pandas/_libs/arrays.pyx
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/_libs/hashtable.pyx
Lines changed: 4 additions & 1 deletion b/‎pandas/_libs/hashtable.pyx
Lines changed: 4 additions & 1 deletion
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 15 additions & 3 deletions b/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 15 additions & 3 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 4 additions & 4 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Lines changed: 29 additions & 19 deletions b/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Lines changed: 29 additions & 19 deletions
@@ -22,6 +22,13 @@ runs:
         fi
       shell: bash -el {0}
 
+    - name: Uninstall nomkl
+      run: |
+        if conda list nomkl | grep nomkl 1>/dev/null; then
+          conda remove nomkl -y
+        fi
+      shell: bash -el {0}
+
     - name: Build Pandas
       run: |
         if [[ ${{ inputs.editable }} == "true" ]]; then
 
@@ -23,6 +23,7 @@ repos:
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
+        exclude: ^pandas/tests/frame/test_query_eval.py
     -   id: ruff
         # TODO: remove autofixe-only rules when they are checked by ruff
         name: ruff-selected-autofixes
@@ -31,7 +32,7 @@ repos:
         exclude: ^pandas/tests
         args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
     -   id: ruff-format
-        exclude: ^scripts
+        exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
 -   repo: https://github.com/jendrikseipp/vulture
     rev: 'v2.11'
     hooks:
@@ -85,6 +86,7 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
       - id: rst-inline-touching-normal
+        exclude: ^pandas/tests/frame/test_query_eval.py
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
 
@@ -762,8 +762,7 @@ install pandas) by typing::
     your installation is probably fine and you can start contributing!
 
 Often it is worth running only a subset of tests first around your changes before running the
-entire suite (tip: you can use the `pandas-coverage app <https://pandas-coverage-12d2130077bc.herokuapp.com/>`_)
-to find out which tests hit the lines of code you've modified, and then run only those).
+entire suite.
 
 The easiest way to do this is with::
 
 
@@ -31,6 +31,7 @@ Other enhancements
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
+- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
 - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
@@ -543,7 +544,7 @@ Datetimelike
 - Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
 - Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
-- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
+- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
 - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
@@ -632,6 +633,7 @@ Period
 Plotting
 ^^^^^^^^
 - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
+- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
 - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
 - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
 
@@ -650,6 +652,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
+- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)
 
 Reshaping
 ^^^^^^^^^
@@ -685,6 +688,7 @@ Other
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
+- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
 
@@ -67,6 +67,10 @@ cdef class NDArrayBacked:
         """
         Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
 
+        The returned array has the same dtype as self.
+
+        Caller is responsible for ensuring `values.dtype == self._ndarray.dtype`.
+
         This should round-trip:
             self == self._from_backing_data(self._ndarray)
         """
 
@@ -30,7 +30,10 @@ from pandas._libs.khash cimport (
     kh_python_hash_func,
     khiter_t,
 )
-from pandas._libs.missing cimport checknull
+from pandas._libs.missing cimport (
+    checknull,
+    is_matching_na,
+)
 
 
 def get_hashtable_trace_domain():
 
@@ -1171,11 +1171,13 @@ cdef class StringHashTable(HashTable):
             const char **vecs
             khiter_t k
             bint use_na_value
+            bint non_null_na_value
 
         if return_inverse:
             labels = np.zeros(n, dtype=np.intp)
         uindexer = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
+        non_null_na_value = not checknull(na_value)
 
         # assign pointers and pre-filter out missing (if ignore_na)
         vecs = <const char **>malloc(n * sizeof(char *))
@@ -1186,7 +1188,12 @@ cdef class StringHashTable(HashTable):
 
             if (ignore_na
                 and (not isinstance(val, str)
-                     or (use_na_value and val == na_value))):
+                     or (use_na_value and (
+                        (non_null_na_value and val == na_value) or
+                        (not non_null_na_value and is_matching_na(val, na_value)))
+                        )
+                    )
+                ):
                 # if missing values do not count as unique values (i.e. if
                 # ignore_na is True), we can skip the actual value, and
                 # replace the label with na_sentinel directly
@@ -1452,18 +1459,23 @@ cdef class PyObjectHashTable(HashTable):
             object val
             khiter_t k
             bint use_na_value
-
+            bint non_null_na_value
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
         use_na_value = na_value is not None
+        non_null_na_value = not checknull(na_value)
 
         for i in range(n):
             val = values[i]
             hash(val)
 
             if ignore_na and (
                 checknull(val)
-                or (use_na_value and val == na_value)
+                or (use_na_value and (
+                    (non_null_na_value and val == na_value) or
+                    (not non_null_na_value and is_matching_na(val, na_value))
+                    )
+                )
             ):
                 # if missing values do not count as unique values (i.e. if
                 # ignore_na is True), skip the hashtable entry for them, and
 
@@ -2699,16 +2699,16 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if using_string_dtype() and is_string_array(objects, skipna=True):
+        if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
             from pandas.core.arrays.string_ import StringDtype
 
-            dtype = StringDtype(na_value=np.nan)
+            dtype = StringDtype()
             return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
 
-        elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):
+        elif using_string_dtype() and is_string_array(objects, skipna=True):
             from pandas.core.arrays.string_ import StringDtype
 
-            dtype = StringDtype()
+            dtype = StringDtype(na_value=np.nan)
             return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
 
         seen.object_ = True
 
@@ -20,14 +20,12 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #endif // NPY_NO_DEPRECATED_API
 
-#include <Python.h>
-
 #include "pandas/vendored/numpy/datetime/np_datetime.h"
-
 #define NO_IMPORT_ARRAY
 #define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY
 #include <numpy/ndarrayobject.h>
 #include <numpy/npy_common.h>
+#include <stdbool.h>
 
 #if defined(_WIN32)
 #ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
@@ -58,12 +56,15 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler");
 #endif
 #endif
 
+#define XSTR(a) STR(a)
+#define STR(a) #a
+
 #define PD_CHECK_OVERFLOW(FUNC)                                                \
   do {                                                                         \
     if ((FUNC) != 0) {                                                         \
       PyGILState_STATE gstate = PyGILState_Ensure();                           \
       PyErr_SetString(PyExc_OverflowError,                                     \
-                      "Overflow occurred in npy_datetimestruct_to_datetime");  \
+                      "Overflow occurred at " __FILE__ ":" XSTR(__LINE__));    \
       PyGILState_Release(gstate);                                              \
       return -1;                                                               \
     }                                                                          \
@@ -139,53 +140,53 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
   npy_int64 year, days = 0;
   const int *month_lengths;
 
-  year = dts->year - 1970;
-  days = year * 365;
+  PD_CHECK_OVERFLOW(checked_int64_sub(dts->year, 1970, &year));
+  PD_CHECK_OVERFLOW(checked_int64_mul(year, 365, &days));
 
   /* Adjust for leap years */
   if (days >= 0) {
     /*
      * 1968 is the closest leap year before 1970.
      * Exclude the current year, so add 1.
      */
-    year += 1;
+    PD_CHECK_OVERFLOW(checked_int64_add(year, 1, &year));
     /* Add one day for each 4 years */
-    days += year / 4;
+    PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
     /* 1900 is the closest previous year divisible by 100 */
-    year += 68;
+    PD_CHECK_OVERFLOW(checked_int64_add(year, 68, &year));
     /* Subtract one day for each 100 years */
-    days -= year / 100;
+    PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
     /* 1600 is the closest previous year divisible by 400 */
-    year += 300;
+    PD_CHECK_OVERFLOW(checked_int64_add(year, 300, &year));
     /* Add one day for each 400 years */
-    days += year / 400;
+    PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
   } else {
     /*
      * 1972 is the closest later year after 1970.
      * Include the current year, so subtract 2.
      */
-    year -= 2;
+    PD_CHECK_OVERFLOW(checked_int64_sub(year, 2, &year));
     /* Subtract one day for each 4 years */
-    days += year / 4;
+    PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
     /* 2000 is the closest later year divisible by 100 */
-    year -= 28;
+    PD_CHECK_OVERFLOW(checked_int64_sub(year, 28, &year));
     /* Add one day for each 100 years */
-    days -= year / 100;
+    PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
     /* 2000 is also the closest later year divisible by 400 */
     /* Subtract one day for each 400 years */
-    days += year / 400;
+    PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
   }
 
   month_lengths = days_per_month_table[is_leapyear(dts->year)];
   month = dts->month - 1;
 
   /* Add the months */
   for (i = 0; i < month; ++i) {
-    days += month_lengths[i];
+    PD_CHECK_OVERFLOW(checked_int64_add(days, month_lengths[i], &days));
   }
 
   /* Add the days */
-  days += dts->day - 1;
+  PD_CHECK_OVERFLOW(checked_int64_add(days, dts->day - 1, &days));
 
   return days;
 }
@@ -430,6 +431,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
   }
 
   const int64_t days = get_datetimestruct_days(dts);
+  if (days == -1) {
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    bool did_error = PyErr_Occurred() == NULL ? false : true;
+    PyGILState_Release(gstate);
+    if (did_error) {
+      return -1;
+    }
+  }
+
   if (base == NPY_FR_D) {
     return days;
   }