Merge branch 'main' into #57512-bad-datetime-str-conversion-in-series-ctor

ruimamaral · web-flow · commit 16f6514dcfde · 2024-03-22T14:22:16.000Z
diff --git a/Dockerfile b/Dockerfile
@@ -11,4 +11,5 @@ RUN apt-get install -y libhdf5-dev libgles2-mesa-dev
 RUN python -m pip install --upgrade pip
 COPY requirements-dev.txt /tmp
 RUN python -m pip install -r /tmp/requirements-dev.txt
+RUN git config --global --add safe.directory /home/pandas
 CMD ["/bin/bash"]
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
@@ -183,7 +183,7 @@ def setup(self):
         self.dt_ts = Series(5, rng3, dtype="datetime64[ns]")
 
     def time_resample(self):
-        self.dt_ts.resample("1S").last()
+        self.dt_ts.resample("1s").last()
 
 
 class AsOf:
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -1468,11 +1468,16 @@ or some other non-observed day.  Defined observance rules are:
     :header: "Rule", "Description"
     :widths: 15, 70
 
+    "next_workday", "move Saturday and Sunday to Monday"
+    "previous_workday", "move Saturday and Sunday to Friday"
     "nearest_workday", "move Saturday to Friday and Sunday to Monday"
+    "before_nearest_workday", "apply ``nearest_workday`` and then move to previous workday before that day"
+    "after_nearest_workday", "apply ``nearest_workday`` and then move to next workday after that day"
     "sunday_to_monday", "move Sunday to following Monday"
     "next_monday_or_tuesday", "move Saturday to Monday and Sunday/Monday to Tuesday"
     "previous_friday", move Saturday and Sunday to previous Friday"
     "next_monday", "move Saturday and Sunday to following Monday"
+    "weekend_to_monday", "same as ``next_monday``"
 
 An example of how holidays and holiday calendars are defined:
 
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -208,6 +208,8 @@ Removal of prior version deprecations/changes
 - Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`)
 - Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`)
 - Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`)
+- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
+- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
 - Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
 - Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
 - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
@@ -183,7 +183,7 @@ cdef class Int64Vector(Vector):
     cdef Int64VectorData data
     cdef ndarray ao
 
-    cdef resize(self)
+    cdef resize(self, Py_ssize_t new_size)
     cpdef ndarray to_array(self)
     cdef void append(self, int64_t x) noexcept
     cdef extend(self, int64_t[:] x)
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
@@ -7,6 +7,7 @@ from libc.stdlib cimport (
     free,
     malloc,
 )
+from libc.string cimport memcpy
 
 import numpy as np
 
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -163,8 +163,9 @@ ctypedef fused vector_data:
     Complex64VectorData
     StringVectorData
 
-cdef bint needs_resize(vector_data *data) noexcept nogil:
-    return data.size == data.capacity
+
+cdef bint needs_resize(Py_ssize_t nelems, Py_ssize_t capacity) noexcept nogil:
+    return nelems >= capacity
 
 # ----------------------------------------------------------------------
 # Vector
@@ -214,8 +215,8 @@ cdef class {{name}}Vector(Vector):
         self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
         self.data.data = <{{c_type}}*>self.ao.data
 
-    cdef resize(self):
-        self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
+    cdef resize(self, Py_ssize_t new_size):
+        self.data.capacity = max(new_size, _INIT_VEC_CAP)
         self.ao.resize(self.data.capacity, refcheck=False)
         self.data.data = <{{c_type}}*>self.ao.data
 
@@ -234,17 +235,28 @@ cdef class {{name}}Vector(Vector):
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(&self.data):
+        if needs_resize(self.data.size, self.data.capacity):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
-            self.resize()
+            self.resize(self.data.capacity * 4)
 
         append_data_{{dtype}}(&self.data, x)
 
     cdef extend(self, const {{c_type}}[:] x):
-        for i in range(len(x)):
-            self.append(x[i])
+        cdef Py_ssize_t x_size = len(x)
+        if x_size == 0:
+            return
+
+        cdef Py_ssize_t needed_size = self.data.size + x_size
+        if needs_resize(needed_size, self.data.capacity):
+            if self.external_view_exists:
+                raise ValueError("external reference but "
+                                 "Vector.resize() needed")
+            self.resize(needed_size)
+
+        memcpy(self.data.data + self.data.size, &x[0], x_size * sizeof({{c_type}}))
+        self.data.size = needed_size
 
 {{endfor}}
 
@@ -260,7 +272,7 @@ cdef class StringVector(Vector):
         if self.data.data is NULL:
             raise MemoryError()
 
-    cdef resize(self):
+    cdef resize(self, Py_ssize_t new_size):
         cdef:
             char **orig_data
             Py_ssize_t i, orig_capacity
@@ -297,8 +309,8 @@ cdef class StringVector(Vector):
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(&self.data):
-            self.resize()
+        if needs_resize(self.data.size, self.data.capacity):
+            self.resize(self.data.capacity * 4)
 
         append_data_string(&self.data, x)
 
@@ -684,18 +696,18 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud):
+                        if needs_resize(ud.size, ud.capacity):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
                                                      "uniques held, but "
                                                      "Vector.resize() needed")
-                                uniques.resize()
+                                uniques.resize(uniques.data.capacity * 4)
                                 if result_mask.external_view_exists:
                                     raise ValueError("external reference to "
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
-                                result_mask.resize()
+                                result_mask.resize(result_mask.data.capacity * 4)
                         append_data_{{dtype}}(ud, val)
                         append_data_uint8(rmd, 1)
                         continue
@@ -706,19 +718,19 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud):
+                    if needs_resize(ud.size, ud.capacity):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
                                                  "uniques held, but "
                                                  "Vector.resize() needed")
-                            uniques.resize()
+                            uniques.resize(uniques.data.capacity * 4)
                             if use_result_mask:
                                 if result_mask.external_view_exists:
                                     raise ValueError("external reference to "
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
-                                result_mask.resize()
+                                result_mask.resize(result_mask.data.capacity * 4)
                     append_data_{{dtype}}(ud, val)
                     if use_result_mask:
                         append_data_uint8(rmd, 0)
@@ -849,9 +861,9 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud):
+                    if needs_resize(ud.size, ud.capacity):
                         with gil:
-                            uniques.resize()
+                            uniques.resize(uniques.data.capacity * 4)
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -480,9 +480,9 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(ud):
+                if needs_resize(ud.size, ud.capacity):
                     with gil:
-                        idx.resize()
+                        idx.resize(idx.data.capacity * 4)
                 append_data_{{ttype}}(ud, i)
 
     kh_destroy_{{ttype}}(table)
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -231,3 +231,7 @@ def is_range_indexer(
     left: np.ndarray,
     n: int,  # np.ndarray[np.int64, ndim=1]
 ) -> bool: ...
+def is_sequence_range(
+    sequence: np.ndarray,
+    step: int,  # np.ndarray[np.int64, ndim=1]
+) -> bool: ...
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -678,6 +678,28 @@ def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
     return True
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_sequence_range(ndarray[int6432_t, ndim=1] sequence, int64_t step) -> bool:
+    """
+    Check if sequence is equivalent to a range with the specified step.
+    """
+    cdef:
+        Py_ssize_t i, n = len(sequence)
+        int6432_t first_element
+
+    if step == 0:
+        return False
+    if n == 0:
+        return True
+
+    first_element = sequence[0]
+    for i in range(1, n):
+        if sequence[i] != first_element + i * step:
+            return False
+    return True
+
+
 ctypedef fused ndarr_object:
     ndarray[object, ndim=1]
     ndarray[object, ndim=2]
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1603,7 +1603,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
 
 # -> ndarray[f'|S{width}']
 cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
-                   int64_t line_end, int64_t width) noexcept:
+                   int64_t line_end, int64_t width):
     cdef:
         char *data
         ndarray result
diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
@@ -313,15 +313,7 @@ cdef dict c_DEPR_ABBREVS = {
     "H": "h",
     "BH": "bh",
     "CBH": "cbh",
-    "T": "min",
-    "t": "min",
     "S": "s",
-    "L": "ms",
-    "l": "ms",
-    "U": "us",
-    "u": "us",
-    "N": "ns",
-    "n": "ns",
 }
 
 
@@ -415,13 +407,17 @@ class Resolution(Enum):
         """
         cdef:
             str abbrev
+        if freq in {"T", "t", "L", "l", "U", "u", "N", "n"}:
+            raise ValueError(
+                f"Frequency \'{freq}\' is no longer supported."
+            )
         try:
             if freq in c_DEPR_ABBREVS:
                 abbrev = c_DEPR_ABBREVS[freq]
                 warnings.warn(
                     f"\'{freq}\' is deprecated and will be removed in a future "
                     f"version. Please use \'{abbrev}\' "
-                    "instead of \'{freq}\'.",
+                    f"instead of \'{freq}\'.",
                     FutureWarning,
                     stacklevel=find_stack_level(),
                 )
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -7169,7 +7169,7 @@ def maybe_sequence_to_range(sequence) -> Any | range:
     -------
     Any : input or range
     """
-    if isinstance(sequence, (ABCSeries, Index)):
+    if isinstance(sequence, (ABCSeries, Index, range)):
         return sequence
     np_sequence = np.asarray(sequence)
     if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
@@ -7179,13 +7179,7 @@ def maybe_sequence_to_range(sequence) -> Any | range:
     diff = np_sequence[1] - np_sequence[0]
     if diff == 0:
         return sequence
-    elif len(np_sequence) == 2:
-        return range(np_sequence[0], np_sequence[1] + diff, diff)
-    maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff)
-    if (
-        lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
-        and not remainder.any()
-    ):
+    elif len(np_sequence) == 2 or lib.is_sequence_range(np_sequence, diff):
         return range(np_sequence[0], np_sequence[-1] + diff, diff)
     else:
         return sequence
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
@@ -112,18 +112,17 @@ def to_timedelta(
         * 'W'
         * 'D' / 'days' / 'day'
         * 'hours' / 'hour' / 'hr' / 'h' / 'H'
-        * 'm' / 'minute' / 'min' / 'minutes' / 'T'
+        * 'm' / 'minute' / 'min' / 'minutes'
         * 's' / 'seconds' / 'sec' / 'second' / 'S'
-        * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
-        * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
-        * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
+        * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis'
+        * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros'
+        * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond'
 
         Must not be specified when `arg` contains strings and ``errors="raise"``.
 
         .. deprecated:: 2.2.0
-            Units 'H', 'T', 'S', 'L', 'U' and 'N' are deprecated and will be removed
-            in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns'
-            instead of 'H', 'T', 'S', 'L', 'U' and 'N'.
+            Units 'H'and 'S' are deprecated and will be removed
+            in a future version. Please use 'h' and 's'.
 
     errors : {'raise', 'coerce'}, default 'raise'
         - If 'raise', then invalid parsing will raise an exception.
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -772,30 +772,11 @@ def test_freq_dateoffset_with_relateivedelta_nanos(self):
         )
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "freq,freq_depr",
-        [
-            ("h", "H"),
-            ("2min", "2T"),
-            ("1s", "1S"),
-            ("2ms", "2L"),
-            ("1us", "1U"),
-            ("2ns", "2N"),
-        ],
-    )
-    def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr):
-        # GH#52536
-        freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
-        freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
-        msg = (
-            f"'{freq_depr_msg}' is deprecated and will be removed in a future version, "
-        )
-        f"please use '{freq_msg}' instead"
-
-        expected = date_range("1/1/2000", periods=2, freq=freq)
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = date_range("1/1/2000", periods=2, freq=freq_depr)
-        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("freq", ["2T", "2L", "1l", "1U", "2N", "2n"])
+    def test_frequency_H_T_S_L_U_N_raises(self, freq):
+        msg = f"Invalid frequency: {freq}"
+        with pytest.raises(ValueError, match=msg):
+            date_range("1/1/2000", periods=2, freq=freq)
 
     @pytest.mark.parametrize(
         "freq,freq_depr",
diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py
@@ -60,6 +60,16 @@ def test_period_index_from_datetime_index_invalid_freq(self, freq):
         with pytest.raises(ValueError, match=msg):
             rng.to_period()
 
+    @pytest.mark.parametrize("freq_depr", ["2T", "1l", "2U", "n"])
+    def test_period_index_T_L_U_N_raises(self, freq_depr):
+        # GH#9586
+        msg = f"Invalid frequency: {freq_depr}"
+
+        with pytest.raises(ValueError, match=msg):
+            period_range("2020-01", "2020-05", freq=freq_depr)
+        with pytest.raises(ValueError, match=msg):
+            PeriodIndex(["2020-01", "2020-05"], freq=freq_depr)
+
 
 class TestPeriodIndex:
     def test_from_ordinals(self):
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py
diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py
diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py
diff --git a/pandas/tests/tslibs/test_resolution.py b/pandas/tests/tslibs/test_resolution.py
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ from libc.stdlib cimport (`
`7`	`7`	`free,`
`8`	`8`	`malloc,`
`9`	`9`	`)`
	`10`	`+from libc.string cimport memcpy`
`10`	`11`
`11`	`12`	`import numpy as np`
`12`	`13`