pandas-dev
diff --git a/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions b/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 7 additions & 2 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎pandas/_config/config.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/_config/config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/hashing.pyx‎
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/hashing.pyx‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/missing.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/missing.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/parsers.pyx‎
Lines changed: 38 additions & 3 deletions b/‎pandas/_libs/parsers.pyx‎
Lines changed: 38 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/fields.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/fields.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/offsets.pyx‎
Lines changed: 33 additions & 10 deletions b/‎pandas/_libs/tslibs/offsets.pyx‎
Lines changed: 33 additions & 10 deletions
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx‎
Lines changed: 3 additions & 0 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx‎
Lines changed: 3 additions & 0 deletions
@@ -270,6 +270,42 @@ column with another DataFrame's index.
     indexed_df2 = df2.set_index("key")
     pd.merge(df1, indexed_df2, left_on="key", right_index=True)
 
+:meth:`~pandas.merge` also supports joining on multiple columns by passing a list of column names.
+
+.. code-block:: sql
+
+    SELECT *
+    FROM df1_multi
+    INNER JOIN df2_multi
+      ON df1_multi.key1 = df2_multi.key1
+        AND df1_multi.key2 = df2_multi.key2;
+
+.. ipython:: python
+
+    df1_multi = pd.DataFrame({
+        "key1": ["A", "B", "C", "D"],
+        "key2": [1, 2, 3, 4],
+        "value": np.random.randn(4)
+    })
+    df2_multi = pd.DataFrame({
+        "key1": ["B", "D", "D", "E"],
+        "key2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, on=["key1", "key2"])
+
+If the columns have different names between DataFrames, on can be replaced with left_on and
+right_on.
+
+.. ipython:: python
+
+    df2_multi = pd.DataFrame({
+        "key_1": ["B", "D", "D", "E"],
+        "key_2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, left_on=["key1", "key2"], right_on=["key_1", "key_2"])
+
 LEFT OUTER JOIN
 ~~~~~~~~~~~~~~~
 
 
@@ -215,6 +215,7 @@ Other enhancements
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
 - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
+- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
@@ -933,6 +934,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`)
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
@@ -982,7 +984,8 @@ Timezones
 ^^^^^^^^^
 - Bug in :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` changing timezone to UTC when merging two DatetimeIndex objects with the same timezone but different units (:issue:`60080`)
 - Bug in :meth:`Series.dt.tz_localize` with a timezone-aware :class:`ArrowDtype` incorrectly converting to UTC when ``tz=None`` (:issue:`61780`)
--
+- Fixed bug in :func:`date_range` where tz-aware endpoints with calendar offsets (e.g. ``"MS"``) failed on DST fall-back. These now respect ``ambiguous``/ ``nonexistent``. (:issue:`52908`)
+
 
 Numeric
 ^^^^^^^
@@ -1006,8 +1009,8 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
 - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
--
 
 Interval
 ^^^^^^^^
@@ -1077,6 +1080,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
@@ -1133,6 +1137,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
 - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
+- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
 
 Reshaping
 ^^^^^^^^^
 
@@ -271,7 +271,7 @@ def set_option(*args) -> None:
     if not nargs or nargs % 2 != 0:
         raise ValueError("Must provide an even number of non-keyword arguments")
 
-    for k, v in zip(args[::2], args[1::2]):
+    for k, v in zip(args[::2], args[1::2], strict=True):
         key = _get_single_key(k)
 
         opt = _get_registered_option(key)
@@ -502,7 +502,7 @@ def option_context(*args) -> Generator[None]:
             "option_context(pat, val, pat, val...)."
         )
 
-    ops = tuple(zip(args[::2], args[1::2]))
+    ops = tuple(zip(args[::2], args[1::2], strict=True))
     try:
         undo = tuple((pat, get_option(pat)) for pat, val in ops)
         for pat, val in ops:
 
@@ -91,6 +91,8 @@ def hash_object_array(
             hash(val)
             data = <bytes>str(val).encode(encoding)
         else:
+            free(vecs)
+            free(lens)
             raise TypeError(
                 f"{val} of type {type(val)} is not a valid type for hashing, "
                 "must be string or null"
 
@@ -838,7 +838,7 @@ cdef class BaseMultiIndexCodesEngine:
             raise KeyError(key)
         try:
             indices = [1 if checknull(v) else lev.get_loc(v) + multiindex_nulls_shift
-                       for lev, v in zip(self.levels, key)]
+                       for lev, v in zip(self.levels, key, strict=True)]
         except KeyError:
             raise KeyError(key)
 
 
@@ -72,7 +72,7 @@ cpdef bint check_na_tuples_nonequal(object left, object right):
     if len(left) != len(right):
         return False
 
-    for left_element, right_element in zip(left, right):
+    for left_element, right_element in zip(left, right, strict=True):
         if left_element is C_NA and right_element is not C_NA:
             return True
         elif right_element is C_NA and left_element is not C_NA:
 
@@ -29,6 +29,7 @@ from cpython.exc cimport (
     PyErr_Fetch,
     PyErr_Occurred,
 )
+from cpython.long cimport PyLong_FromString
 from cpython.object cimport PyObject
 from cpython.ref cimport (
     Py_INCREF,
@@ -1081,9 +1082,13 @@ cdef class TextReader:
                         np.dtype("object"), i, start, end, 0,
                         0, na_hashset, na_fset)
                 except OverflowError:
-                    col_res, na_count = self._convert_with_dtype(
-                        np.dtype("object"), i, start, end, na_filter,
-                        0, na_hashset, na_fset)
+                    try:
+                        col_res, na_count = _try_pylong(self.parser, i, start,
+                                                        end, na_filter, na_hashset)
+                    except ValueError:
+                        col_res, na_count = self._convert_with_dtype(
+                            np.dtype("object"), i, start, end, 0,
+                            0, na_hashset, na_fset)
 
                 if col_res is not None:
                     break
@@ -1873,6 +1878,36 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
 
     return 0
 
+cdef _try_pylong(parser_t *parser, Py_ssize_t col,
+                 int64_t line_start, int64_t line_end,
+                 bint na_filter, kh_str_starts_t *na_hashset):
+    cdef:
+        int na_count = 0
+        Py_ssize_t lines
+        coliter_t it
+        const char *word = NULL
+        ndarray[object] result
+        object NA = na_values[np.object_]
+
+    lines = line_end - line_start
+    result = np.empty(lines, dtype=object)
+    coliter_setup(&it, parser, col, line_start)
+
+    for i in range(lines):
+        COLITER_NEXT(it, word)
+        if na_filter and kh_get_str_starts_item(na_hashset, word):
+            # in the hash table
+            na_count += 1
+            result[i] = NA
+            continue
+
+        py_int = PyLong_FromString(word, NULL, 10)
+        if py_int is None:
+            raise ValueError("Invalid integer ", word)
+        result[i] = py_int
+
+    return result, na_count
+
 
 # -> tuple[ndarray[bool], int]
 cdef _try_bool_flex(parser_t *parser, int64_t col,
 
@@ -109,7 +109,7 @@ def month_position_check(fields, weekdays) -> str | None:
         int32_t[:] months = fields["M"]
         int32_t[:] days = fields["D"]
 
-    for y, m, d, wd in zip(years, months, days, weekdays):
+    for y, m, d, wd in zip(years, months, days, weekdays, strict=True):
         if calendar_start:
             calendar_start &= d == 1
         if business_start:
 
@@ -2217,7 +2217,7 @@ cdef class BusinessHour(BusinessMixin):
         # Use python string formatting to be faster than strftime
         hours = ",".join(
             f"{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}"
-            for st, en in zip(self.start, self.end)
+            for st, en in zip(self.start, self.end, strict=True)
         )
         attrs = [f"{self._prefix}={hours}"]
         out += ": " + ", ".join(attrs)
@@ -2414,7 +2414,7 @@ cdef class BusinessHour(BusinessMixin):
         # get total business hours by sec in one business day
         businesshours = sum(
             self._get_business_hours_by_sec(st, en)
-            for st, en in zip(self.start, self.end)
+            for st, en in zip(self.start, self.end, strict=True)
         )
 
         bd, r = divmod(abs(n * 60), businesshours // 60)
@@ -5188,6 +5188,27 @@ INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
 _offset_map = {}
 
 
+deprec_to_valid_alias = {
+    "H": "h",
+    "BH": "bh",
+    "CBH": "cbh",
+    "T": "min",
+    "S": "s",
+    "L": "ms",
+    "U": "us",
+    "N": "ns",
+}
+
+
+def raise_invalid_freq(freq: str, extra_message: str | None = None) -> None:
+    msg = f"Invalid frequency: {freq}."
+    if extra_message is not None:
+        msg += f" {extra_message}"
+    if freq in deprec_to_valid_alias:
+        msg += f" Did you mean {deprec_to_valid_alias[freq]}?"
+    raise ValueError(msg)
+
+
 def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
     if name in _lite_rule_alias:
         return name
@@ -5236,7 +5257,7 @@ def _validate_to_offset_alias(alias: str, is_period: bool) -> None:
         if (alias.upper() != alias and
                 alias.lower() not in {"s", "ms", "us", "ns"} and
                 alias.upper().split("-")[0].endswith(("S", "E"))):
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(alias))
+            raise ValueError(raise_invalid_freq(freq=alias))
     if (
         is_period and
         alias in c_OFFSET_TO_PERIOD_FREQSTR and
@@ -5267,8 +5288,9 @@ def _get_offset(name: str) -> BaseOffset:
             offset = klass._from_name(*split[1:])
         except (ValueError, TypeError, KeyError) as err:
             # bad prefix or suffix
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(
-                f"{name}, failed to parse with error message: {repr(err)}")
+            raise_invalid_freq(
+                freq=name,
+                extra_message=f"Failed to parse with error message: {repr(err)}."
             )
         # cache
         _offset_map[name] = offset
@@ -5357,7 +5379,7 @@ cpdef to_offset(freq, bint is_period=False):
                 # the last element must be blank
                 raise ValueError("last element must be blank")
 
-            tups = zip(split[0::4], split[1::4], split[2::4])
+            tups = zip(split[0::4], split[1::4], split[2::4], strict=False)
             for n, (sep, stride, name) in enumerate(tups):
                 name = _warn_about_deprecated_aliases(name, is_period)
                 _validate_to_offset_alias(name, is_period)
@@ -5399,9 +5421,10 @@ cpdef to_offset(freq, bint is_period=False):
                 else:
                     result = result + offset
         except (ValueError, TypeError) as err:
-            raise ValueError(INVALID_FREQ_ERR_MSG.format(
-                f"{freq}, failed to parse with error message: {repr(err)}")
-            ) from err
+            raise_invalid_freq(
+                freq=freq,
+                extra_message=f"Failed to parse with error message: {repr(err)}"
+            )
 
         # TODO(3.0?) once deprecation of "d" is enforced, the check for it here
         #  can be removed
@@ -5417,7 +5440,7 @@ cpdef to_offset(freq, bint is_period=False):
         result = None
 
     if result is None:
-        raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))
+        raise_invalid_freq(freq=freq)
 
     try:
         has_period_dtype_code = hasattr(result, "_period_dtype_code")
 
@@ -2068,6 +2068,9 @@ class Timedelta(_Timedelta):
 
         disallow_ambiguous_unit(unit)
 
+        cdef:
+            int64_t new_value
+
         # GH 30543 if pd.Timedelta already passed, return it
         # check that only value is passed
         if isinstance(value, _Timedelta):