pandas-dev
diff --git a/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions b/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 5 additions & 1 deletion b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎pandas/_config/config.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/_config/config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/hashing.pyx‎
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/hashing.pyx‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx‎
Lines changed: 38 additions & 3 deletions b/‎pandas/_libs/parsers.pyx‎
Lines changed: 38 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx‎
Lines changed: 30 additions & 7 deletions b/‎pandas/_libs/tslibs/offsets.pyx‎
Lines changed: 30 additions & 7 deletions
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx‎
Lines changed: 3 additions & 0 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_testing/asserters.py‎
Lines changed: 11 additions & 1 deletion b/‎pandas/_testing/asserters.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎pandas/core/apply.py‎
Lines changed: 6 additions & 4 deletions b/‎pandas/core/apply.py‎
Lines changed: 6 additions & 4 deletions
@@ -270,6 +270,42 @@ column with another DataFrame's index.
     indexed_df2 = df2.set_index("key")
     pd.merge(df1, indexed_df2, left_on="key", right_index=True)
 
+:meth:`~pandas.merge` also supports joining on multiple columns by passing a list of column names.
+
+.. code-block:: sql
+
+    SELECT *
+    FROM df1_multi
+    INNER JOIN df2_multi
+      ON df1_multi.key1 = df2_multi.key1
+        AND df1_multi.key2 = df2_multi.key2;
+
+.. ipython:: python
+
+    df1_multi = pd.DataFrame({
+        "key1": ["A", "B", "C", "D"],
+        "key2": [1, 2, 3, 4],
+        "value": np.random.randn(4)
+    })
+    df2_multi = pd.DataFrame({
+        "key1": ["B", "D", "D", "E"],
+        "key2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, on=["key1", "key2"])
+
+If the columns have different names between DataFrames, on can be replaced with left_on and
+right_on.
+
+.. ipython:: python
+
+    df2_multi = pd.DataFrame({
+        "key_1": ["B", "D", "D", "E"],
+        "key_2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, left_on=["key1", "key2"], right_on=["key_1", "key_2"])
+
 LEFT OUTER JOIN
 ~~~~~~~~~~~~~~~
 
 
@@ -215,6 +215,7 @@ Other enhancements
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
 - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
+- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
@@ -933,6 +934,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`)
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
@@ -1006,8 +1008,8 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
 - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
--
 
 Interval
 ^^^^^^^^
@@ -1077,6 +1079,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="c"`` reading large float numbers with preceding integers as strings. Now reads them as floats. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
@@ -1134,6 +1137,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
 - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
+- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
 
 Reshaping
 ^^^^^^^^^
 
@@ -271,7 +271,7 @@ def set_option(*args) -> None:
     if not nargs or nargs % 2 != 0:
         raise ValueError("Must provide an even number of non-keyword arguments")
 
-    for k, v in zip(args[::2], args[1::2]):
+    for k, v in zip(args[::2], args[1::2], strict=True):
         key = _get_single_key(k)
 
         opt = _get_registered_option(key)
@@ -502,7 +502,7 @@ def option_context(*args) -> Generator[None]:
             "option_context(pat, val, pat, val...)."
         )
 
-    ops = tuple(zip(args[::2], args[1::2]))
+    ops = tuple(zip(args[::2], args[1::2], strict=True))
     try:
         undo = tuple((pat, get_option(pat)) for pat, val in ops)
         for pat, val in ops:
 
@@ -91,6 +91,8 @@ def hash_object_array(
             hash(val)
             data = <bytes>str(val).encode(encoding)
         else:
+            free(vecs)
+            free(lens)
             raise TypeError(
                 f"{val} of type {type(val)} is not a valid type for hashing, "
                 "must be string or null"
 
@@ -29,6 +29,7 @@ from cpython.exc cimport (
     PyErr_Fetch,
     PyErr_Occurred,
 )
+from cpython.long cimport PyLong_FromString
 from cpython.object cimport PyObject
 from cpython.ref cimport (
     Py_INCREF,
@@ -1085,9 +1086,13 @@ cdef class TextReader:
                         np.dtype("object"), i, start, end, 0,
                         0, na_hashset, na_fset)
                 except OverflowError:
-                    col_res, na_count = self._convert_with_dtype(
-                        np.dtype("object"), i, start, end, na_filter,
-                        0, na_hashset, na_fset)
+                    try:
+                        col_res, na_count = _try_pylong(self.parser, i, start,
+                                                        end, na_filter, na_hashset)
+                    except ValueError:
+                        col_res, na_count = self._convert_with_dtype(
+                            np.dtype("object"), i, start, end, 0,
+                            0, na_hashset, na_fset)
 
                 if col_res is not None:
                     break
@@ -1929,6 +1934,36 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
 
     return 0
 
+cdef _try_pylong(parser_t *parser, Py_ssize_t col,
+                 int64_t line_start, int64_t line_end,
+                 bint na_filter, kh_str_starts_t *na_hashset):
+    cdef:
+        int na_count = 0
+        Py_ssize_t lines
+        coliter_t it
+        const char *word = NULL
+        ndarray[object] result
+        object NA = na_values[np.object_]
+
+    lines = line_end - line_start
+    result = np.empty(lines, dtype=object)
+    coliter_setup(&it, parser, col, line_start)
+
+    for i in range(lines):
+        COLITER_NEXT(it, word)
+        if na_filter and kh_get_str_starts_item(na_hashset, word):
+            # in the hash table
+            na_count += 1
+            result[i] = NA
+            continue
+
+        py_int = PyLong_FromString(word, NULL, 10)
+        if py_int is None:
+            raise ValueError("Invalid integer ", word)
+        result[i] = py_int
+
+    return result, na_count
+
 
 # -> tuple[ndarray[bool], int]
 cdef _try_bool_flex(parser_t *parser, int64_t col,
 
@@ -5188,6 +5188,27 @@ INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
 _offset_map = {}
 
 
+deprec_to_valid_alias = {
+    "H": "h",
+    "BH": "bh",
+    "CBH": "cbh",
+    "T": "min",
+    "S": "s",
+    "L": "ms",
+    "U": "us",
+    "N": "ns",
+}
+
+
+def raise_invalid_freq(freq: str, extra_message: str | None = None) -> None:
+    msg = f"Invalid frequency: {freq}."
+    if extra_message is not None:
+        msg += f" {extra_message}"
+    if freq in deprec_to_valid_alias:
+        msg += f" Did you mean {deprec_to_valid_alias[freq]}?"
+    raise ValueError(msg)
+
+
 def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
     if name in _lite_rule_alias:
         return name
@@ -5236,7 +5257,7 @@ def _validate_to_offset_alias(alias: str, is_period: bool) -> None:
         if (alias.upper() != alias and
                 alias.lower() not in {"s", "ms", "us", "ns"} and
                 alias.upper().split("-")[0].endswith(("S", "E"))):
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(alias))
+            raise ValueError(raise_invalid_freq(freq=alias))
     if (
         is_period and
         alias in c_OFFSET_TO_PERIOD_FREQSTR and
@@ -5267,8 +5288,9 @@ def _get_offset(name: str) -> BaseOffset:
             offset = klass._from_name(*split[1:])
         except (ValueError, TypeError, KeyError) as err:
             # bad prefix or suffix
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(
-                f"{name}, failed to parse with error message: {repr(err)}")
+            raise_invalid_freq(
+                freq=name,
+                extra_message=f"Failed to parse with error message: {repr(err)}."
             )
         # cache
         _offset_map[name] = offset
@@ -5399,9 +5421,10 @@ cpdef to_offset(freq, bint is_period=False):
                 else:
                     result = result + offset
         except (ValueError, TypeError) as err:
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(
-                f"{freq}, failed to parse with error message: {repr(err)}")
-            ) from err
+            raise_invalid_freq(
+                freq=freq,
+                extra_message=f"Failed to parse with error message: {repr(err)}"
+            )
 
         # TODO(3.0?) once deprecation of "d" is enforced, the check for it here
         #  can be removed
@@ -5417,7 +5440,7 @@ cpdef to_offset(freq, bint is_period=False):
         result = None
 
     if result is None:
-        raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))
+        raise_invalid_freq(freq=freq)
 
     try:
         has_period_dtype_code = hasattr(result, "_period_dtype_code")
 
@@ -2068,6 +2068,9 @@ class Timedelta(_Timedelta):
 
         disallow_ambiguous_unit(unit)
 
+        cdef:
+            int64_t new_value
+
         # GH 30543 if pd.Timedelta already passed, return it
         # check that only value is passed
         if isinstance(value, _Timedelta):
 
@@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
 
             # Over the first window, observations can only be added
             # never removed
-            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
+            if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
 
                 prev_value = values[s]
                 num_consecutive_same_value = 0
 
@@ -325,7 +325,17 @@ def _check_types(left, right, obj: str = "Index") -> None:
     # skip exact index checking when `check_categorical` is False
     elif check_exact and check_categorical:
         if not left.equals(right):
-            mismatch = left._values != right._values
+            # _values compare can raise TypeError (non-comparable
+            # categoricals (GH#61935)
+            try:
+                mismatch = left._values != right._values
+            except TypeError:
+                raise_assert_detail(
+                    obj,
+                    "types are not comparable (non-matching categorical categories)",
+                    left,
+                    right,
+                )
 
             if not isinstance(mismatch, np.ndarray):
                 mismatch = cast("ExtensionArray", mismatch).fillna(True)
 
@@ -564,7 +564,7 @@ def compute_dict_like(
                 indices = selected_obj.columns.get_indexer_for([key])
                 labels = selected_obj.columns.take(indices)
                 label_to_indices = defaultdict(list)
-                for index, label in zip(indices, labels):
+                for index, label in zip(indices, labels, strict=True):
                     label_to_indices[label].append(index)
 
                 key_data = [
@@ -618,7 +618,9 @@ def wrap_results_dict_like(
         if all(is_ndframe):
             results = [result for result in result_data if not result.empty]
             keys_to_use: Iterable[Hashable]
-            keys_to_use = [k for k, v in zip(result_index, result_data) if not v.empty]
+            keys_to_use = [
+                k for k, v in zip(result_index, result_data, strict=True) if not v.empty
+            ]
             # Have to check, if at least one DataFrame is not empty.
             if keys_to_use == []:
                 keys_to_use = result_index
@@ -1359,7 +1361,7 @@ def series_generator(self) -> Generator[Series]:
                 yield obj._ixs(i, axis=0)
 
         else:
-            for arr, name in zip(values, self.index):
+            for arr, name in zip(values, self.index, strict=True):
                 # GH#35462 re-pin mgr in case setitem changed it
                 ser._mgr = mgr
                 mgr.set_values(arr)
@@ -1913,7 +1915,7 @@ def relabel_result(
     from pandas.core.indexes.base import Index
 
     reordered_indexes = [
-        pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
+        pair[0] for pair in sorted(zip(columns, order, strict=True), key=lambda t: t[1])
     ]
     reordered_result_in_dict: dict[Hashable, Series] = {}
     idx = 0