pandas-dev · niruta25 · Jun 24, 2025 · Jun 24, 2025 · Jun 25, 2025 · Jul 7, 2025
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -1178,3 +1178,40 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
     This also happens in some cases when you supply a NumPy array instead of a ``Categorical``:
     using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using
     a string array (e.g. ``np.array(["a","b","c","a"])``) will not.
+
+.. note::
+
+    When constructing a :class:`pandas.Categorical` from a pandas :class:`Series` or
+     :class:`Index` with ``dtype='object'``, the dtype of the categories will be
+     preserved as ``object``. When constructing from a NumPy array
+     with ``dtype='object'`` or a raw Python sequence, pandas will infer the most
+     specific dtype for the categories (for example, ``string`` if all elements are strings).
+
+.. ipython:: python
+
+    with pd.option_context("future.infer_string", True):
+        ser = pd.Series(["foo", "bar", "baz"], dtype="object")
+        idx = pd.Index(["foo", "bar", "baz"], dtype="object")
+        arr = np.array(["foo", "bar", "baz"], dtype="object")
+        pylist = ["foo", "bar", "baz"]
+
+        cat_from_ser = pd.Categorical(ser)
+        cat_from_idx = pd.Categorical(idx)
+        cat_from_arr = pd.Categorical(arr)
+        cat_from_list = pd.Categorical(pylist)
+
+        # Series/Index with object dtype: infer string dtype
+        assert cat_from_ser.categories.inferred_type == "string"
+        assert cat_from_idx.categories.inferred_type == "string"
+
+        # Numpy array or list: infer string dtype
+        assert cat_from_arr.categories.inferred_type == "string"
+        assert cat_from_list.categories.inferred_type == "string"
+
+        # Mixed types: preserve object dtype
+        ser_mixed = pd.Series(["foo", 1, None], dtype="object")
+        idx_mixed = pd.Index(["foo", 1, None], dtype="object")
+        cat_mixed_ser = pd.Categorical(ser_mixed)
+        cat_mixed_idx = pd.Categorical(idx_mixed)
+        assert cat_mixed_ser.categories.dtype == "object"
+        assert cat_mixed_idx.categories.dtype == "object"
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -861,7 +861,7 @@ Categorical
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
--
+- Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the dtype is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings).
 
 Datetimelike
 ^^^^^^^^^^^^

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -454,6 +454,11 @@ def __init__(
                 codes = arr.indices.to_numpy()
                 dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)
             else:
+                # Check for pandas Series/ Index with object dtye
+                preserve_object_dtpe = False
+                if isinstance(values, (ABCSeries, ABCIndex)):
+                    if getattr(values.dtype, "name", None) == "object":
+                        preserve_object_dtpe = True
                 if not isinstance(values, ABCIndex):
                     # in particular RangeIndex xref test_index_equal_range_categories
                     values = sanitize_array(values, None)
@@ -470,7 +475,13 @@ def __init__(
                             "by passing in a categories argument."
                         ) from err
 
-                # we're inferring from values
+                # If we should prserve object dtype, force categories to object dtype
+                if preserve_object_dtpe:
+                    # Only preserve object dtype if not all elements are strings
+                    if not all(isinstance(x, str) for x in categories):
+                        from pandas import Index
+
+                        categories = Index(categories, dtype=object, copy=False)
                 dtype = CategoricalDtype(categories, dtype.ordered)
 
         elif isinstance(values.dtype, CategoricalDtype):

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -786,3 +786,32 @@ def test_range_values_preserves_rangeindex_categories(self, values, categories):
         result = Categorical(values=values, categories=categories).categories
         expected = RangeIndex(range(5))
         tm.assert_index_equal(result, expected, exact=True)
+
+    def test_categorical_preserve_object_dtype_from_pandas(self):
+        with pd.option_context("future.infer_string", True):
+            ser = Series(["foo", "bar", "baz"], dtype="object")
+            idx = Index(["foo", "bar", "baz"], dtype="object")
+            arr = np.array(["foo", "bar", "baz"], dtype="object")
+            pylist = ["foo", "bar", "baz"]
+
+            cat_from_ser = Categorical(ser)
+            cat_from_idx = Categorical(idx)
+            cat_from_arr = Categorical(arr)
+            cat_from_list = Categorical(pylist)
+
+            # Series/Index with object dtype: infer string
+            # dtype if all elements are strings
+            assert cat_from_ser.categories.inferred_type == "string"
+            assert cat_from_idx.categories.inferred_type == "string"
+
+            # Numpy array or list: infer string dtype
+            assert cat_from_arr.categories.inferred_type == "string"
+            assert cat_from_list.categories.inferred_type == "string"
+
+            # Mixed types: preserve object dtype
+            ser_mixed = Series(["foo", 1, None], dtype="object")
+            idx_mixed = Index(["foo", 1, None], dtype="object")
+            cat_mixed_ser = Categorical(ser_mixed)
+            cat_mixed_idx = Categorical(idx_mixed)
+            assert cat_mixed_ser.categories.dtype == "object"
+            assert cat_mixed_idx.categories.dtype == "object"