From e6b23dab1d80e1985ef618cac2e1e9aa6c11f25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Mon, 29 Sep 2025 21:18:03 -0400 Subject: [PATCH 1/4] GH1383 Add support for creating Series/Index with dtype='category' --- pandas-stubs/core/indexes/base.pyi | 12 ++++++++++++ pandas-stubs/core/series.pyi | 10 ++++++++++ pyproject.toml | 2 +- tests/indexes/test_indexes.py | 7 +++++++ tests/series/test_series.py | 6 ++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index d98bb6e15..913c6e26a 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -42,6 +42,7 @@ from pandas.core.base import ( NumListLike, _ListLike, ) +from pandas.core.indexes.category import CategoricalIndex from pandas.core.strings.accessor import StringMethods from typing_extensions import ( Never, @@ -58,6 +59,7 @@ from pandas._typing import ( AnyAll, ArrayLike, AxesData, + CategoryDtypeArg, DropKeep, Dtype, DtypeArg, @@ -229,6 +231,16 @@ class Index(IndexOpsMixin[S1]): tupleize_cols: bool = ..., ) -> TimedeltaIndex: ... @overload + def __new__( + cls, + data: AxesData, + *, + dtype: CategoryDtypeArg, + copy: bool = ..., + name: Hashable = ..., + tupleize_cols: bool = ..., + ) -> CategoricalIndex: ... + @overload def __new__( cls, data: Sequence[Interval[_OrderableT]] | IndexOpsMixin[Interval[_OrderableT]], diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 5ebd3d407..a3ef4abb2 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -353,6 +353,16 @@ class Series(IndexOpsMixin[S1], NDFrame): copy: bool = ..., ) -> Series[Timestamp]: ... @overload + def __new__( + cls, + data: _ListLike, + index: AxesData | None = ..., + *, + dtype: CategoryDtypeArg, + name: Hashable = ..., + copy: bool = ..., + ) -> Series[CategoricalDtype]: ... + @overload def __new__( cls, data: PeriodIndex | Sequence[Period], diff --git a/pyproject.toml b/pyproject.toml index 237cffdf3..5c655c2e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ scipy = { version = ">=1.9.1", python = "<3.14" } scipy-stubs = ">=1.15.3.0" SQLAlchemy = ">=2.0.39" types-python-dateutil = ">=2.8.19" -beautifulsoup4 = "<=4.13.5" +beautifulsoup4 = ">=4.13.5" html5lib = ">=1.1" python-calamine = ">=0.2.0" diff --git a/tests/indexes/test_indexes.py b/tests/indexes/test_indexes.py index eb6a790b4..01ac64709 100644 --- a/tests/indexes/test_indexes.py +++ b/tests/indexes/test_indexes.py @@ -15,6 +15,7 @@ from pandas.core.arrays import DatetimeArray from pandas.core.arrays.categorical import Categorical from pandas.core.indexes.base import Index +from pandas.core.indexes.category import CategoricalIndex from typing_extensions import ( Never, assert_type, @@ -1366,6 +1367,12 @@ def test_index_factorize() -> None: check(assert_type(idx_uniques, np_1darray | Index | Categorical), pd.Index) +def test_index_categorical() -> None: + """Test creating an index with Categorical type GH1383.""" + sr = pd.Index([1], dtype="category") + check(assert_type(sr, CategoricalIndex), CategoricalIndex) + + def test_disallow_empty_index() -> None: # From GH 826 if TYPE_CHECKING_INVALID_USAGE: diff --git a/tests/series/test_series.py b/tests/series/test_series.py index bfa0c16ce..0452e1d28 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -50,6 +50,8 @@ Scalar, ) +from pandas.core.dtypes.dtypes import CategoricalDtype + from tests import ( PD_LTE_23, TYPE_CHECKING_INVALID_USAGE, @@ -1824,6 +1826,10 @@ def test_categorical_codes(): cat = pd.Categorical(["a", "b", "a"]) check(assert_type(cat.codes, np_1darray[np.signedinteger]), np_1darray[np.int8]) + # GH1383 + sr = pd.Series([1], dtype="category") + check(assert_type(sr, "pd.Series[CategoricalDtype]"), pd.Series, np.integer) + def test_relops() -> None: # GH 175 From edc6ec5128ed094357d2563dcb2f0bb2f65ffa3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Mon, 29 Sep 2025 21:27:49 -0400 Subject: [PATCH 2/4] GH1383 Add support for creating Series/Index with dtype='category' --- tests/series/test_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 0452e1d28..4999a931a 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -50,7 +50,7 @@ Scalar, ) -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import CategoricalDtype # noqa F401 from tests import ( PD_LTE_23, From 3b17e7eacd6e27ad881e4b395b5b3cfa5bc5faab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Tue, 30 Sep 2025 17:31:50 -0400 Subject: [PATCH 3/4] GH1383 Add support for creating Series/Index with dtype='category' --- tests/series/test_properties.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/series/test_properties.py b/tests/series/test_properties.py index 72cf432a6..f69f21a4d 100644 --- a/tests/series/test_properties.py +++ b/tests/series/test_properties.py @@ -1,6 +1,5 @@ from typing import ( TYPE_CHECKING, - cast, ) import numpy as np @@ -57,9 +56,7 @@ def test_array_property() -> None: # casting due to pandas-dev/pandas-stubs#1383 check( assert_type( - cast( - "pd.Series[pd.CategoricalDtype]", pd.Series([1], dtype="category") - ).array, + pd.Series([1], dtype="category").array, pd.Categorical, ), pd.Categorical, From b1a7e6b27d548413b7a4b508f54b7cb90e9c1d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Tue, 30 Sep 2025 17:48:00 -0400 Subject: [PATCH 4/4] GH1383 Add support for creating Series/Index with dtype='category' --- tests/series/test_properties.py | 1 - tests/series/test_series.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/series/test_properties.py b/tests/series/test_properties.py index f69f21a4d..2b76e50db 100644 --- a/tests/series/test_properties.py +++ b/tests/series/test_properties.py @@ -53,7 +53,6 @@ def test_dt_property() -> None: def test_array_property() -> None: """Test that Series.array returns ExtensionArray and its subclasses""" - # casting due to pandas-dev/pandas-stubs#1383 check( assert_type( pd.Series([1], dtype="category").array, diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 2950c0397..9079708c1 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -2914,8 +2914,6 @@ def test_astype_categorical(cast_arg: CategoryDtypeArg, target_type: type) -> No # pandas category assert_type(s.astype(pd.CategoricalDtype()), "pd.Series[pd.CategoricalDtype]") assert_type(s.astype(cast_arg), "pd.Series[pd.CategoricalDtype]") - # pyarrow dictionary - # assert_type(s.astype("dictionary[pyarrow]"), "pd.Series[Categorical]") @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_OBJECT_ARGS, ids=repr)