Skip to content

Commit 16f6514

Browse files
authored
Merge branch 'main' into #57512-bad-datetime-str-conversion-in-series-ctor
2 parents e66e9b7 + 41383cf commit 16f6514

File tree

24 files changed

+183
-153
lines changed

24 files changed

+183
-153
lines changed

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ RUN apt-get install -y libhdf5-dev libgles2-mesa-dev
1111
RUN python -m pip install --upgrade pip
1212
COPY requirements-dev.txt /tmp
1313
RUN python -m pip install -r /tmp/requirements-dev.txt
14+
RUN git config --global --add safe.directory /home/pandas
1415
CMD ["/bin/bash"]

asv_bench/benchmarks/timeseries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def setup(self):
183183
self.dt_ts = Series(5, rng3, dtype="datetime64[ns]")
184184

185185
def time_resample(self):
186-
self.dt_ts.resample("1S").last()
186+
self.dt_ts.resample("1s").last()
187187

188188

189189
class AsOf:

doc/source/user_guide/timeseries.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,11 +1468,16 @@ or some other non-observed day. Defined observance rules are:
14681468
:header: "Rule", "Description"
14691469
:widths: 15, 70
14701470

1471+
"next_workday", "move Saturday and Sunday to Monday"
1472+
"previous_workday", "move Saturday and Sunday to Friday"
14711473
"nearest_workday", "move Saturday to Friday and Sunday to Monday"
1474+
"before_nearest_workday", "apply ``nearest_workday`` and then move to previous workday before that day"
1475+
"after_nearest_workday", "apply ``nearest_workday`` and then move to next workday after that day"
14721476
"sunday_to_monday", "move Sunday to following Monday"
14731477
"next_monday_or_tuesday", "move Saturday to Monday and Sunday/Monday to Tuesday"
14741478
"previous_friday", move Saturday and Sunday to previous Friday"
14751479
"next_monday", "move Saturday and Sunday to following Monday"
1480+
"weekend_to_monday", "same as ``next_monday``"
14761481

14771482
An example of how holidays and holiday calendars are defined:
14781483

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ Removal of prior version deprecations/changes
208208
- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`)
209209
- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`)
210210
- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`)
211+
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
212+
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
211213
- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
212214
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
213215
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)

pandas/_libs/hashtable.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ cdef class Int64Vector(Vector):
183183
cdef Int64VectorData data
184184
cdef ndarray ao
185185

186-
cdef resize(self)
186+
cdef resize(self, Py_ssize_t new_size)
187187
cpdef ndarray to_array(self)
188188
cdef void append(self, int64_t x) noexcept
189189
cdef extend(self, int64_t[:] x)

pandas/_libs/hashtable.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ from libc.stdlib cimport (
77
free,
88
malloc,
99
)
10+
from libc.string cimport memcpy
1011

1112
import numpy as np
1213

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,9 @@ ctypedef fused vector_data:
163163
Complex64VectorData
164164
StringVectorData
165165

166-
cdef bint needs_resize(vector_data *data) noexcept nogil:
167-
return data.size == data.capacity
166+
167+
cdef bint needs_resize(Py_ssize_t nelems, Py_ssize_t capacity) noexcept nogil:
168+
return nelems >= capacity
168169

169170
# ----------------------------------------------------------------------
170171
# Vector
@@ -214,8 +215,8 @@ cdef class {{name}}Vector(Vector):
214215
self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
215216
self.data.data = <{{c_type}}*>self.ao.data
216217

217-
cdef resize(self):
218-
self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
218+
cdef resize(self, Py_ssize_t new_size):
219+
self.data.capacity = max(new_size, _INIT_VEC_CAP)
219220
self.ao.resize(self.data.capacity, refcheck=False)
220221
self.data.data = <{{c_type}}*>self.ao.data
221222

@@ -234,17 +235,28 @@ cdef class {{name}}Vector(Vector):
234235

235236
cdef void append(self, {{c_type}} x) noexcept:
236237

237-
if needs_resize(&self.data):
238+
if needs_resize(self.data.size, self.data.capacity):
238239
if self.external_view_exists:
239240
raise ValueError("external reference but "
240241
"Vector.resize() needed")
241-
self.resize()
242+
self.resize(self.data.capacity * 4)
242243

243244
append_data_{{dtype}}(&self.data, x)
244245

245246
cdef extend(self, const {{c_type}}[:] x):
246-
for i in range(len(x)):
247-
self.append(x[i])
247+
cdef Py_ssize_t x_size = len(x)
248+
if x_size == 0:
249+
return
250+
251+
cdef Py_ssize_t needed_size = self.data.size + x_size
252+
if needs_resize(needed_size, self.data.capacity):
253+
if self.external_view_exists:
254+
raise ValueError("external reference but "
255+
"Vector.resize() needed")
256+
self.resize(needed_size)
257+
258+
memcpy(self.data.data + self.data.size, &x[0], x_size * sizeof({{c_type}}))
259+
self.data.size = needed_size
248260

249261
{{endfor}}
250262

@@ -260,7 +272,7 @@ cdef class StringVector(Vector):
260272
if self.data.data is NULL:
261273
raise MemoryError()
262274

263-
cdef resize(self):
275+
cdef resize(self, Py_ssize_t new_size):
264276
cdef:
265277
char **orig_data
266278
Py_ssize_t i, orig_capacity
@@ -297,8 +309,8 @@ cdef class StringVector(Vector):
297309

298310
cdef void append(self, char *x) noexcept:
299311

300-
if needs_resize(&self.data):
301-
self.resize()
312+
if needs_resize(self.data.size, self.data.capacity):
313+
self.resize(self.data.capacity * 4)
302314

303315
append_data_string(&self.data, x)
304316

@@ -684,18 +696,18 @@ cdef class {{name}}HashTable(HashTable):
684696
continue
685697

686698
seen_na = True
687-
if needs_resize(ud):
699+
if needs_resize(ud.size, ud.capacity):
688700
with gil:
689701
if uniques.external_view_exists:
690702
raise ValueError("external reference to "
691703
"uniques held, but "
692704
"Vector.resize() needed")
693-
uniques.resize()
705+
uniques.resize(uniques.data.capacity * 4)
694706
if result_mask.external_view_exists:
695707
raise ValueError("external reference to "
696708
"result_mask held, but "
697709
"Vector.resize() needed")
698-
result_mask.resize()
710+
result_mask.resize(result_mask.data.capacity * 4)
699711
append_data_{{dtype}}(ud, val)
700712
append_data_uint8(rmd, 1)
701713
continue
@@ -706,19 +718,19 @@ cdef class {{name}}HashTable(HashTable):
706718
# k hasn't been seen yet
707719
k = kh_put_{{dtype}}(self.table, val, &ret)
708720

709-
if needs_resize(ud):
721+
if needs_resize(ud.size, ud.capacity):
710722
with gil:
711723
if uniques.external_view_exists:
712724
raise ValueError("external reference to "
713725
"uniques held, but "
714726
"Vector.resize() needed")
715-
uniques.resize()
727+
uniques.resize(uniques.data.capacity * 4)
716728
if use_result_mask:
717729
if result_mask.external_view_exists:
718730
raise ValueError("external reference to "
719731
"result_mask held, but "
720732
"Vector.resize() needed")
721-
result_mask.resize()
733+
result_mask.resize(result_mask.data.capacity * 4)
722734
append_data_{{dtype}}(ud, val)
723735
if use_result_mask:
724736
append_data_uint8(rmd, 0)
@@ -849,9 +861,9 @@ cdef class {{name}}HashTable(HashTable):
849861
k = kh_put_{{dtype}}(self.table, val, &ret)
850862
self.table.vals[k] = count
851863

852-
if needs_resize(ud):
864+
if needs_resize(ud.size, ud.capacity):
853865
with gil:
854-
uniques.resize()
866+
uniques.resize(uniques.data.capacity * 4)
855867
append_data_{{dtype}}(ud, val)
856868
labels[i] = count
857869
count += 1

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,9 +480,9 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
480480
for i in range(n):
481481
kh_put_{{ttype}}(table, labels[i], &ret)
482482
if ret != 0:
483-
if needs_resize(ud):
483+
if needs_resize(ud.size, ud.capacity):
484484
with gil:
485-
idx.resize()
485+
idx.resize(idx.data.capacity * 4)
486486
append_data_{{ttype}}(ud, i)
487487

488488
kh_destroy_{{ttype}}(table)

pandas/_libs/lib.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,7 @@ def is_range_indexer(
231231
left: np.ndarray,
232232
n: int, # np.ndarray[np.int64, ndim=1]
233233
) -> bool: ...
234+
def is_sequence_range(
235+
sequence: np.ndarray,
236+
step: int, # np.ndarray[np.int64, ndim=1]
237+
) -> bool: ...

pandas/_libs/lib.pyx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,28 @@ def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
678678
return True
679679

680680

681+
@cython.wraparound(False)
682+
@cython.boundscheck(False)
683+
def is_sequence_range(ndarray[int6432_t, ndim=1] sequence, int64_t step) -> bool:
684+
"""
685+
Check if sequence is equivalent to a range with the specified step.
686+
"""
687+
cdef:
688+
Py_ssize_t i, n = len(sequence)
689+
int6432_t first_element
690+
691+
if step == 0:
692+
return False
693+
if n == 0:
694+
return True
695+
696+
first_element = sequence[0]
697+
for i in range(1, n):
698+
if sequence[i] != first_element + i * step:
699+
return False
700+
return True
701+
702+
681703
ctypedef fused ndarr_object:
682704
ndarray[object, ndim=1]
683705
ndarray[object, ndim=2]

0 commit comments

Comments
 (0)