From b84c8393fe8ed132d7d87801a6509162bf9a3152 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:30:50 -0700 Subject: [PATCH 1/2] PERF: Don't create a CategoricalIndex._engine in __contains__ if categories are RangeIndex --- pandas/core/indexes/category.py | 7 +++++-- pandas/tests/indexes/categorical/test_category.py | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 3b04d95cb7cbd..947f52529190e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -377,8 +377,11 @@ def __contains__(self, key: Any) -> bool: # if key is a NaN, check if any NaN is in self. if is_valid_na_for_dtype(key, self.categories.dtype): return self.hasnans - - return contains(self, key, container=self._engine) + if self.categories._typ == "rangeindex": + container = self.categories + else: + container = self._engine + return contains(self, key, container=container) def reindex( self, target, method=None, level=None, limit: int | None = None, tolerance=None diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 03a298a13dc2b..f4967e1f4cc10 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -392,3 +392,10 @@ def test_remove_maintains_order(self): ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True ), ) + + +def test_contains_rangeindex_categories_no_engine(): + ci = CategoricalIndex(range(3)) + assert 2 in ci + assert 5 not in ci + assert "_engine" not in ci._cache From 6a622f4c522e7454e92b2b2d52a4c72f0bd66734 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:33:59 -0700 Subject: [PATCH 2/2] Fix typing --- pandas/core/indexes/category.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 947f52529190e..312219eb7b91a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -378,7 +378,9 @@ def __contains__(self, key: Any) -> bool: if is_valid_na_for_dtype(key, self.categories.dtype): return self.hasnans if self.categories._typ == "rangeindex": - container = self.categories + container: Index | libindex.IndexEngine | libindex.ExtensionEngine = ( + self.categories + ) else: container = self._engine return contains(self, key, container=container)