Skip to content

Commit 94800db

Browse files
authored
Merge branch 'main' into bugfix-spss-kwargs
2 parents cbef35e + 9008ee5 commit 94800db

File tree

27 files changed

+124
-202
lines changed

27 files changed

+124
-202
lines changed

asv_bench/benchmarks/io/csv.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,9 @@ def time_read_stringcsv(self, engine):
408408
def time_read_bytescsv(self, engine):
409409
read_csv(self.data(self.BytesIO_input), engine=engine)
410410

411+
def peakmem_read_csv(self, engine):
412+
read_csv(self.data(self.BytesIO_input), engine=engine)
413+
411414

412415
class ReadCSVCategorical(BaseIO):
413416
fname = "__test__.csv"

doc/source/conf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,6 @@
460460
"dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
461461
"matplotlib": ("https://matplotlib.org/stable/", None),
462462
"numpy": ("https://numpy.org/doc/stable/", None),
463-
"py": ("https://pylib.readthedocs.io/en/latest/", None),
464463
"python": ("https://docs.python.org/3/", None),
465464
"scipy": ("https://docs.scipy.org/doc/scipy/", None),
466465
"pyarrow": ("https://arrow.apache.org/docs/", None),

doc/source/getting_started/install.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,11 +277,12 @@ Installable with ``pip install "pandas[excel]"``.
277277
========================= ================== =============== =============================================================
278278
Dependency Minimum Version pip extra Notes
279279
========================= ================== =============== =============================================================
280-
xlrd 2.0.1 excel Reading Excel
281-
xlsxwriter 3.0.5 excel Writing Excel
282-
openpyxl 3.1.0 excel Reading / writing for xlsx files
280+
xlrd 2.0.1 excel Reading for xls files
281+
xlsxwriter 3.0.5 excel Writing for xlsx files
282+
openpyxl 3.1.0 excel Reading / writing for Excel 2010 xlsx/xlsm/xltx/xltm files
283283
pyxlsb 1.0.10 excel Reading for xlsb files
284-
python-calamine 0.1.7 excel Reading for xls/xlsx/xlsb/ods files
284+
python-calamine 0.1.7 excel Reading for xls/xlsx/xlsm/xlsb/xla/xlam/ods files
285+
odfpy 1.4.1 excel Reading / writing for OpenDocument 1.2 files
285286
========================= ================== =============== =============================================================
286287

287288
HTML

doc/source/user_guide/io.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ Basic
6161
+++++
6262

6363
filepath_or_buffer : various
64-
Either a path to a file (a :class:`python:str`, :class:`python:pathlib.Path`,
65-
or :class:`py:py._path.local.LocalPath`), URL (including http, ftp, and S3
64+
Either a path to a file (a :class:`python:str`, :class:`python:pathlib.Path`)
65+
URL (including http, ftp, and S3
6666
locations), or any object with a ``read()`` method (such as an open file or
6767
:class:`~python:io.StringIO`).
6868
sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table`

doc/source/whatsnew/v2.2.1.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
1617
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
1718
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
19+
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
20+
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
1821
- Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)
1922

2023
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
8484

8585
Other API changes
8686
^^^^^^^^^^^^^^^^^
87-
-
87+
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
8888
-
8989

9090
.. ---------------------------------------------------------------------------

pandas/_libs/groupby.pyx

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,6 +1771,7 @@ def group_idxmin_idxmax(
17711771
Py_ssize_t i, j, N, K, lab
17721772
numeric_object_t val
17731773
numeric_object_t[:, ::1] group_min_or_max
1774+
uint8_t[:, ::1] seen
17741775
bint uses_mask = mask is not None
17751776
bint isna_entry
17761777
bint compute_max = name == "idxmax"
@@ -1784,13 +1785,10 @@ def group_idxmin_idxmax(
17841785

17851786
if numeric_object_t is object:
17861787
group_min_or_max = np.empty((<object>out).shape, dtype=object)
1788+
seen = np.zeros((<object>out).shape, dtype=np.uint8)
17871789
else:
17881790
group_min_or_max = np.empty_like(out, dtype=values.dtype)
1789-
if N > 0 and K > 0:
1790-
# When N or K is zero, we never use group_min_or_max
1791-
group_min_or_max[:] = _get_min_or_max(
1792-
values[0, 0], compute_max, is_datetimelike
1793-
)
1791+
seen = np.zeros_like(out, dtype=np.uint8)
17941792

17951793
# When using transform, we need a valid value for take in the case
17961794
# a category is not observed; these values will be dropped
@@ -1806,6 +1804,7 @@ def group_idxmin_idxmax(
18061804
if not skipna and out[lab, j] == -1:
18071805
# Once we've hit NA there is no going back
18081806
continue
1807+
18091808
val = values[i, j]
18101809

18111810
if uses_mask:
@@ -1814,10 +1813,14 @@ def group_idxmin_idxmax(
18141813
isna_entry = _treat_as_na(val, is_datetimelike)
18151814

18161815
if isna_entry:
1817-
if not skipna:
1816+
if not skipna or not seen[lab, j]:
18181817
out[lab, j] = -1
18191818
else:
1820-
if compute_max:
1819+
if not seen[lab, j]:
1820+
seen[lab, j] = True
1821+
group_min_or_max[lab, j] = val
1822+
out[lab, j] = i
1823+
elif compute_max:
18211824
if val > group_min_or_max[lab, j]:
18221825
group_min_or_max[lab, j] = val
18231826
out[lab, j] = i

pandas/_libs/src/parser/tokenizer.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ void parser_set_default_options(parser_t *self) {
109109

110110
parser_t *parser_new(void) { return (parser_t *)calloc(1, sizeof(parser_t)); }
111111

112+
static void parser_clear_data_buffers(parser_t *self) {
113+
free_if_not_null((void *)&self->stream);
114+
free_if_not_null((void *)&self->words);
115+
free_if_not_null((void *)&self->word_starts);
116+
free_if_not_null((void *)&self->line_start);
117+
free_if_not_null((void *)&self->line_fields);
118+
}
119+
112120
static void parser_cleanup(parser_t *self) {
113121
// XXX where to put this
114122
free_if_not_null((void *)&self->error_msg);
@@ -119,6 +127,7 @@ static void parser_cleanup(parser_t *self) {
119127
self->skipset = NULL;
120128
}
121129

130+
parser_clear_data_buffers(self);
122131
if (self->cb_cleanup != NULL) {
123132
self->cb_cleanup(self->source);
124133
self->cb_cleanup = NULL;

pandas/_testing/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
Series,
3535
)
3636
from pandas._testing._io import (
37-
round_trip_localpath,
3837
round_trip_pathlib,
3938
round_trip_pickle,
4039
write_to_compressed,
@@ -609,7 +608,6 @@ def shares_memory(left, right) -> bool:
609608
"OBJECT_DTYPES",
610609
"raise_assert_detail",
611610
"raises_chained_assignment_error",
612-
"round_trip_localpath",
613611
"round_trip_pathlib",
614612
"round_trip_pickle",
615613
"setitem",

pandas/_testing/_io.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -89,35 +89,6 @@ def round_trip_pathlib(writer, reader, path: str | None = None):
8989
return obj
9090

9191

92-
def round_trip_localpath(writer, reader, path: str | None = None):
93-
"""
94-
Write an object to file specified by a py.path LocalPath and read it back.
95-
96-
Parameters
97-
----------
98-
writer : callable bound to pandas object
99-
IO writing function (e.g. DataFrame.to_csv )
100-
reader : callable
101-
IO reading function (e.g. pd.read_csv )
102-
path : str, default None
103-
The path where the object is written and then read.
104-
105-
Returns
106-
-------
107-
pandas object
108-
The original object that was serialized and then re-read.
109-
"""
110-
import pytest
111-
112-
LocalPath = pytest.importorskip("py.path").local
113-
if path is None:
114-
path = "___localpath___"
115-
with ensure_clean(path) as path:
116-
writer(LocalPath(path))
117-
obj = reader(LocalPath(path))
118-
return obj
119-
120-
12192
def write_to_compressed(compression, path, data, dest: str = "test") -> None:
12293
"""
12394
Write data to a compressed file.

0 commit comments

Comments
 (0)