Skip to content

Commit 6ec1b33

Browse files
authored
Merge branch 'main' into sainivas-pandas
2 parents aa2e35f + d79f7b0 commit 6ec1b33

File tree

9 files changed

+94
-13
lines changed

9 files changed

+94
-13
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
154154

155155
- name: Build wheels
156-
uses: pypa/[email protected].2
156+
uses: pypa/[email protected].3
157157
with:
158158
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
159159
env:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,7 @@ Groupby/resample/rolling
806806
^^^^^^^^^^^^^^^^^^^^^^^^
807807
- Bug in :meth:`.DataFrameGroupBy.__len__` and :meth:`.SeriesGroupBy.__len__` would raise when the grouping contained NA values and ``dropna=False`` (:issue:`58644`)
808808
- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all Timedelta values are NaT. (:issue:`59712`)
809+
- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` would fail when the groups were :class:`Categorical` with an NA value (:issue:`61356`)
809810
- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
810811
- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
811812
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)

pandas/core/generic.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3568,6 +3568,7 @@ def _wrap(x, alt_format_):
35683568
elif formatters is None and float_format is not None:
35693569
formatters_ = partial(_wrap, alt_format_=lambda v: v)
35703570
format_index_ = [index_format_, column_format_]
3571+
format_index_names_ = [index_format_, column_format_]
35713572

35723573
# Deal with hiding indexes and relabelling column names
35733574
hide_: list[dict] = []
@@ -3616,6 +3617,7 @@ def _wrap(x, alt_format_):
36163617
relabel_index=relabel_index_,
36173618
format={"formatter": formatters_, **base_format_},
36183619
format_index=format_index_,
3620+
format_index_names=format_index_names_,
36193621
render_kwargs=render_kwargs_,
36203622
)
36213623

@@ -3628,6 +3630,7 @@ def _to_latex_via_styler(
36283630
relabel_index: dict | list[dict] | None = None,
36293631
format: dict | list[dict] | None = None,
36303632
format_index: dict | list[dict] | None = None,
3633+
format_index_names: dict | list[dict] | None = None,
36313634
render_kwargs: dict | None = None,
36323635
):
36333636
"""
@@ -3672,7 +3675,13 @@ def _to_latex_via_styler(
36723675
self = cast("DataFrame", self)
36733676
styler = Styler(self, uuid="")
36743677

3675-
for kw_name in ["hide", "relabel_index", "format", "format_index"]:
3678+
for kw_name in [
3679+
"hide",
3680+
"relabel_index",
3681+
"format",
3682+
"format_index",
3683+
"format_index_names",
3684+
]:
36763685
kw = vars()[kw_name]
36773686
if isinstance(kw, dict):
36783687
getattr(styler, kw_name)(**kw)

pandas/core/groupby/grouper.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@
1212

1313
import numpy as np
1414

15+
from pandas._libs import (
16+
algos as libalgos,
17+
)
1518
from pandas._libs.tslibs import OutOfBoundsDatetime
1619
from pandas.errors import InvalidIndexError
1720
from pandas.util._decorators import cache_readonly
1821

1922
from pandas.core.dtypes.common import (
23+
ensure_int64,
24+
ensure_platform_int,
2025
is_list_like,
2126
is_scalar,
2227
)
@@ -38,7 +43,10 @@
3843
)
3944
from pandas.core.series import Series
4045

41-
from pandas.io.formats.printing import pprint_thing
46+
from pandas.io.formats.printing import (
47+
PrettyDict,
48+
pprint_thing,
49+
)
4250

4351
if TYPE_CHECKING:
4452
from collections.abc import (
@@ -668,8 +676,14 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
668676
def groups(self) -> dict[Hashable, Index]:
669677
codes, uniques = self._codes_and_uniques
670678
uniques = Index._with_infer(uniques, name=self.name)
671-
cats = Categorical.from_codes(codes, uniques, validate=False)
672-
return self._index.groupby(cats)
679+
680+
r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques))
681+
counts = ensure_int64(counts).cumsum()
682+
_result = (r[start:end] for start, end in zip(counts, counts[1:]))
683+
# map to the label
684+
result = {k: self._index.take(v) for k, v in zip(uniques, _result)}
685+
686+
return PrettyDict(result)
673687

674688
@property
675689
def observed_grouping(self) -> Grouping:

pandas/core/reshape/encoding.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,15 @@ def get_dummies(
6060
data : array-like, Series, or DataFrame
6161
Data of which to get dummy indicators.
6262
prefix : str, list of str, or dict of str, default None
63-
String to append DataFrame column names.
63+
A string to be prepended to DataFrame column names.
6464
Pass a list with length equal to the number of columns
6565
when calling get_dummies on a DataFrame. Alternatively, `prefix`
6666
can be a dictionary mapping column names to prefixes.
67-
prefix_sep : str, default '_'
68-
If appending prefix, separator/delimiter to use. Or pass a
69-
list or dictionary as with `prefix`.
67+
prefix_sep : str, list of str, or dict of str, default '_'
68+
Should you choose to prepend DataFrame column names with a prefix, this
69+
is the separator/delimiter to use between the two. Alternatively,
70+
`prefix_sep` can be a list with length equal to the number of columns,
71+
or a dictionary mapping column names to separators.
7072
dummy_na : bool, default False
7173
If True, a NaN indicator column will be added even if no NaN values are present.
7274
If False, NA values are encoded as all zero.

pandas/core/sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def sample(
123123
random_state: np.random.RandomState | np.random.Generator,
124124
) -> np.ndarray:
125125
"""
126-
Randomly sample `size` indices in `np.arange(obj_len)`
126+
Randomly sample `size` indices in `np.arange(obj_len)`.
127127
128128
Parameters
129129
----------

pandas/tests/groupby/test_categorical.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,23 @@ def test_observed_groups(observed):
506506
tm.assert_dict_equal(result, expected)
507507

508508

509+
def test_groups_na_category(dropna, observed):
510+
# https://github.com/pandas-dev/pandas/issues/61356
511+
df = DataFrame(
512+
{"cat": Categorical(["a", np.nan, "a"], categories=list("adb"))},
513+
index=list("xyz"),
514+
)
515+
g = df.groupby("cat", observed=observed, dropna=dropna)
516+
517+
result = g.groups
518+
expected = {"a": Index(["x", "z"])}
519+
if not dropna:
520+
expected |= {np.nan: Index(["y"])}
521+
if not observed:
522+
expected |= {"b": Index([]), "d": Index([])}
523+
tm.assert_dict_equal(result, expected)
524+
525+
509526
@pytest.mark.parametrize(
510527
"keys, expected_values, expected_index_levels",
511528
[

pandas/tests/io/formats/test_to_latex.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,46 @@ def test_to_latex_escape_special_chars(self):
824824
)
825825
assert result == expected
826826

827+
def test_to_latex_escape_special_chars_in_index_names(self):
828+
# https://github.com/pandas-dev/pandas/issues/61309
829+
# https://github.com/pandas-dev/pandas/issues/57362
830+
index = "&%$#_{}}~^\\"
831+
df = DataFrame({index: [1, 2, 3]}).set_index(index)
832+
result = df.to_latex(escape=True)
833+
expected = _dedent(
834+
r"""
835+
\begin{tabular}{l}
836+
\toprule
837+
\&\%\$\#\_\{\}\}\textasciitilde \textasciicircum \textbackslash \\
838+
\midrule
839+
1 \\
840+
2 \\
841+
3 \\
842+
\bottomrule
843+
\end{tabular}
844+
"""
845+
)
846+
assert result == expected
847+
848+
def test_to_latex_escape_special_chars_in_column_name(self):
849+
df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})
850+
df.columns.name = "_^~"
851+
result = df.to_latex(escape=True)
852+
expected = _dedent(
853+
r"""
854+
\begin{tabular}{lrl}
855+
\toprule
856+
\_\textasciicircum \textasciitilde & A & B \\
857+
\midrule
858+
0 & 1 & a \\
859+
1 & 2 & b \\
860+
2 & 3 & c \\
861+
\bottomrule
862+
\end{tabular}
863+
"""
864+
)
865+
assert result == expected
866+
827867
def test_to_latex_specified_header_special_chars_without_escape(self):
828868
# GH 7124
829869
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})

scripts/cibw_before_build_windows.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ done
88
FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")"
99
if [[ $FREE_THREADED_BUILD == "True" ]]; then
1010
python -m pip install -U pip
11-
# python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
12-
# TODO: Remove below and uncomment above once https://github.com/cython/cython/pull/6717 no longer breaks tests
13-
python -m pip install git+https://github.com/cython/cython.git@3276b588720a053c78488e5de788605950f4b136
11+
python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
1412
python -m pip install ninja meson-python versioneer[toml] numpy
1513
fi

0 commit comments

Comments
 (0)