From 0c940d81172745f938cbbcdbdbbd6593e7b96af8 Mon Sep 17 00:00:00 2001
From: ajpotts <ajpotts@users.noreply.github.com>
Date: Wed, 24 Dec 2025 07:29:14 -0500
Subject: [PATCH 1/2] working

---
 arkouda/pandas/extension/_arkouda_array.py    | 198 ++++++++++++++--
 .../extension/_arkouda_categorical_array.py   | 210 ++++++++++------
 .../pandas/extension/_arkouda_string_array.py | 224 ++++++++----------
 3 files changed, 409 insertions(+), 223 deletions(-)

diff --git a/arkouda/pandas/extension/_arkouda_array.py b/arkouda/pandas/extension/_arkouda_array.py
index 56eabafc226..3535f06670e 100644
--- a/arkouda/pandas/extension/_arkouda_array.py
+++ b/arkouda/pandas/extension/_arkouda_array.py
@@ -120,48 +120,206 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         # If scalars is already a numpy array, we can preserve its dtype
         return cls(ak_array(scalars, dtype=dtype, copy=copy))
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: Any) -> Any:
+        """
+        Retrieve one or more values using a pandas/NumPy-style indexer.
+
+        Parameters
+        ----------
+        key : Any
+            A valid indexer for 1D array-like data. This may be:
+            - A scalar integer position (e.g. ``1``)
+            - A Python ``slice`` (e.g. ``1:3``)
+            - A list-like of integer positions
+            - A boolean mask (NumPy array, pandas Series, or Arkouda ``pdarray``)
+            - A NumPy array, pandas Index/Series, or Arkouda ``pdarray``/``Strings``.
+
+        Returns
+        -------
+        Any
+            A scalar value for scalar indexers, or an ``ArkoudaArray`` for sequence-like
+            indexers.
+
+        Raises
+        ------
+        TypeError
+            If ``key`` is not a supported indexer type, or if a NumPy array or
+            list-like indexer has an unsupported dtype.
+        NotImplementedError
+            If a list-like indexer contains mixed element dtypes (e.g. a mixture
+            of booleans and integers), which is not supported.
+
+        Examples
+        --------
+        >>> import arkouda as ak
+        >>> from arkouda.pandas.extension import ArkoudaArray
+        >>> data = ak.arange(5)
+        >>> arr = ArkoudaArray(data)
+
+        Scalar integer index returns a Python scalar:
+
+        >>> arr[1]
+        np.int64(1)
+
+        Slicing returns another ArkoudaArray:
+
+        >>> arr[1:4]
+        ArkoudaArray([1 2 3])
+
+        List-like integer positions:
+
+        >>> arr[[0, 2, 4]]
+        ArkoudaArray([0 2 4])
+
+        Boolean mask (NumPy array):
+
+        >>> import numpy as np
+        >>> mask = np.array([True, False, True, False, True])
+        >>> arr[mask]
+        ArkoudaArray([0 2 4])
+        """
         from arkouda.numpy.pdarrayclass import pdarray
         from arkouda.numpy.pdarraycreation import array as ak_array
 
-        # Convert numpy boolean mask to arkouda pdarray
+        # Normalize NumPy ndarray indexers
         if isinstance(key, np.ndarray):
-            if key.dtype == bool:
-                key = ak_array(key)
-            elif key.dtype.kind in {"i"}:
+            if key.dtype == bool or key.dtype == np.bool_:
+                key = ak_array(key, dtype=bool)
+            elif np.issubdtype(key.dtype, np.integer):
                 key = ak_array(key, dtype="int64")
-            elif key.dtype.kind in {"u"}:
+            elif np.issubdtype(key.dtype, np.unsignedinteger):
                 key = ak_array(key, dtype="uint64")
             else:
-                raise TypeError(f"Unsupported numpy index type {key.dtype}")
+                raise TypeError(f"Unsupported NumPy index type {key.dtype}")
+
+        # Normalize Python lists
+        elif isinstance(key, list):
+            if len(key) == 0:
+                # Empty selection -> empty ArkoudaArray of same dtype
+                empty = ak_array([], dtype=self._data.dtype)
+                return self.__class__(empty)
+
+            first = key[0]
+            first_dtype = ak_dtype(first)
+            for item in key:
+                item_dtype = ak_dtype(item)
+                if first_dtype != item_dtype:
+                    raise NotImplementedError(
+                        f"Mixed dtypes are not supported: {item_dtype} vs {first_dtype}"
+                    )
+
+            if isinstance(first, (bool, np.bool_)):
+                key = ak_array(np.array(key, dtype=bool))
+            elif isinstance(first, (int, np.integer)):
+                key = ak_array(np.array(key, dtype=np.int64))
+            else:
+                raise TypeError(f"Unsupported list index type: {type(first)}")
 
+        # Perform the indexing operation
         result = self._data[key]
+
+        # Scalar key → return Python scalar
         if np.isscalar(key):
-            if isinstance(result, pdarray):
+            # If server returned a pdarray of length 1, extract scalar
+            if isinstance(result, pdarray) and result.size == 1:
                 return result[0]
-            else:
-                return result
+            return result
+
+        # All other cases → wrap result in same class
         return self.__class__(result)
 
-    #   TODO:  Simplify to use underlying array setter
-    def __setitem__(self, key, value):
-        from arkouda.numpy.dtypes import isSupportedInt
+    def __setitem__(self, key: Any, value: Any) -> None:
+        """
+        Assign one or more values to the underlying Arkouda array in-place.
+
+        Parameters
+        ----------
+        key : Any
+            A valid positional indexer for the array. This may be a scalar integer,
+            slice, list-like of integers, boolean mask, NumPy array, pandas Index/Series,
+            or Arkouda ``pdarray``.
+        value : Any
+            A scalar value broadcast to the selected positions, or an array-like
+            (NumPy array, Arkouda ``pdarray``, or ``ArkoudaArray``) that is
+            aligned with ``key``.
+
+        Notes
+        -----
+        This operation mutates the underlying server-side array in-place.
+
+        Examples
+        --------
+        Basic scalar assignment by position:
+
+        >>> import arkouda as ak
+        >>> from arkouda.pandas.extension import ArkoudaArray
+        >>> data = ak.arange(5)
+        >>> arr = ArkoudaArray(data)
+        >>> arr[0] = 42
+        >>> arr
+        ArkoudaArray([42 1 2 3 4])
+
+        Using a NumPy boolean mask:
+
+        >>> data = ak.arange(5)
+        >>> arr = ArkoudaArray(data)
+        >>> mask = arr.to_ndarray() % 2 == 0  # even positions
+        >>> arr[mask] = -1
+        >>> arr
+        ArkoudaArray([-1 1 -1 3 -1])
+
+        Using a NumPy integer indexer:
+
+        >>> data = ak.arange(5)
+        >>> arr = ArkoudaArray(data)
+        >>> idx = np.array([1, 3], dtype=np.int64)
+        >>> arr[idx] = 99
+        >>> arr
+        ArkoudaArray([0 99 2 99 4])
+
+        Assigning from another ArkoudaArray:
+
+        >>> data = ak.arange(5)
+        >>> arr = ArkoudaArray(data)
+        >>> other = ArkoudaArray(ak.arange(10, 15))
+        >>> idx = [1, 3, 4]
+        >>> arr[idx] = other[idx]
+        >>> arr
+        ArkoudaArray([0 11 2 13 14])
+        """
         from arkouda.numpy.pdarrayclass import pdarray
         from arkouda.numpy.pdarraycreation import array as ak_array
 
-        # Convert numpy mask to pdarray if necessary
-        if isinstance(key, np.ndarray) and key.dtype == bool:
-            key = ak_array(key)
-        elif isinstance(key, np.ndarray) and isSupportedInt(key.dtype):
-            key = ak_array(key)
+        # Normalize NumPy / Python indexers into Arkouda pdarrays where needed
+        if isinstance(key, np.ndarray):
+            # NumPy bool mask or integer indexer
+            if key.dtype == bool or key.dtype == np.bool_ or np.issubdtype(key.dtype, np.integer):
+                key = ak_array(key)
+        elif isinstance(key, list):
+            # Python list of bools or ints - convert to NumPy then to pdarray
+            if key and isinstance(key[0], (bool, np.bool_)):
+                key = ak_array(np.array(key, dtype=bool))
+            elif key and isinstance(key[0], (int, np.integer)):
+                key = ak_array(np.array(key, dtype=np.int64))
+
+        if isinstance(key, Sequence) and not isinstance(key, (str, bytes)):
+            #   Cannot set empty index, nothing to do
+            return
+
+        # Normalize the value into something the underlying pdarray understands
         if isinstance(value, ArkoudaArray):
             value = value._data
         elif isinstance(value, pdarray):
+            # already an Arkouda pdarray; nothing to do
             pass
-        elif isinstance(value, (int, float, bool)):  # Add scalar check
-            self._data[key] = value  # assign scalar to scalar position
+        elif np.isscalar(value):
+            # Fast path for scalar assignment
+
+            self._data[key] = value
             return
         else:
+            # Convert generic array-likes (Python lists, NumPy arrays, etc.)
+            # into Arkouda pdarrays.
             value = ak_array(value)
 
         self._data[key] = value
diff --git a/arkouda/pandas/extension/_arkouda_categorical_array.py b/arkouda/pandas/extension/_arkouda_categorical_array.py
index 8e10e427a67..3a2f8153e6c 100644
--- a/arkouda/pandas/extension/_arkouda_categorical_array.py
+++ b/arkouda/pandas/extension/_arkouda_categorical_array.py
@@ -7,7 +7,8 @@
 from numpy import ndarray
 from pandas.api.extensions import ExtensionArray
 
-import arkouda as ak
+from arkouda.numpy.dtypes import bool_
+from arkouda.numpy.pdarrayclass import pdarray
 
 from ._arkouda_array import ArkoudaArray
 from ._arkouda_extension_array import ArkoudaExtensionArray
@@ -70,6 +71,134 @@ def __init__(self, data: Categorical | "ArkoudaCategoricalArray" | ndarray | Seq
 
         self._data = data
 
+    def __getitem__(self, key: Any) -> Any:
+        """
+        Retrieve one or more categorical values.
+
+        Parameters
+        ----------
+        key : Any
+            Location(s) to retrieve. Supported forms include:
+
+            * scalar integer index
+            * slice objects (e.g. ``1:3``)
+            * NumPy integer array (any integer dtype)
+            * NumPy boolean mask with the same length as the array
+            * Python list of integers or booleans
+            * Arkouda ``pdarray`` of integers or booleans
+
+        Returns
+        -------
+        Any
+            A Python scalar for scalar access, or a new
+            :class:`ArkoudaCategoricalArray` for non-scalar indexers.
+
+        Raises
+        ------
+        TypeError
+            If a NumPy indexer with an unsupported dtype is provided.
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> import arkouda as ak
+        >>> from arkouda.pandas.extension import ArkoudaCategoricalArray
+        >>> data = ak.Categorical(ak.array(["a", "b", "c", "d"]))
+        >>> arr = ArkoudaCategoricalArray(data)
+
+        Scalar access returns a Python string-like scalar:
+
+        >>> arr[1]
+        np.str_('b')
+
+        Negative indexing:
+
+        >>> arr[-1]
+        np.str_('d')
+
+        Slice indexing returns a new ArkoudaCategoricalArray:
+
+        >>> result = arr[1:3]
+        >>> type(result)
+        <class 'arkouda.pandas.extension._arkouda_categorical_array.ArkoudaCategoricalArray'>
+
+        NumPy integer array indexing:
+
+        >>> idx = np.array([0, 2], dtype=np.int64)
+        >>> sliced = arr[idx]
+        >>> isinstance(sliced, ArkoudaCategoricalArray)
+        True
+
+        NumPy boolean mask:
+
+        >>> mask = np.array([True, False, True, False])
+        >>> masked = arr[mask]
+        >>> isinstance(masked, ArkoudaCategoricalArray)
+        True
+
+        Empty integer indexer returns an empty ArkoudaCategoricalArray:
+
+        >>> empty_idx = np.array([], dtype=np.int64)
+        >>> empty = arr[empty_idx]
+        >>> len(empty)
+        0
+        """
+        import numpy as np
+
+        from arkouda.numpy.pdarraycreation import array as ak_array
+        from arkouda.pandas.categorical import Categorical
+
+        # Handle empty indexer (list / tuple / ndarray of length 0)
+        if isinstance(key, (list, tuple, np.ndarray)) and len(key) == 0:
+            empty_strings = ak_array([], dtype="str_")
+            return ArkoudaCategoricalArray(Categorical(empty_strings))
+
+        # Scalar integers and slices: delegate directly to the underlying Categorical
+        if isinstance(key, (int, np.integer, slice)):
+            result = self._data[key]
+            # For scalar keys, just return the underlying scalar
+            if isinstance(key, (int, np.integer)):
+                return result
+            # For slices, underlying arkouda.Categorical returns a Categorical
+            return ArkoudaCategoricalArray(result)
+
+        # NumPy array indexers: normalize to Arkouda pdarrays
+        if isinstance(key, np.ndarray):
+            if key.dtype == bool:
+                key = ak_array(key)
+            elif np.issubdtype(key.dtype, np.signedinteger):
+                key = ak_array(key, dtype="int64")
+            elif np.issubdtype(key.dtype, np.unsignedinteger):
+                key = ak_array(key, dtype="uint64")
+            else:
+                raise TypeError(f"Unsupported numpy index type {key.dtype}")
+        elif not isinstance(key, (pdarray, Categorical)):
+            # Convert generic indexers (e.g. Python lists of ints/bools) to an Arkouda pdarray
+            key = ak_array(key)
+
+        # Delegate to underlying arkouda.Categorical
+        result = self._data[key]
+
+        # Scalar result: just return the underlying scalar
+        if isinstance(key, pdarray) and key.size == 1:
+            # Categorical.__getitem__ will generally still give a Categorical here;
+            # we normalize to a Python scalar by going through categories[codes].
+
+            codes = result.codes if isinstance(result, Categorical) else result
+            cats = self._data.categories
+            # codes is length-1, so this is length-1 Strings
+            labels = cats[codes]
+            # Return a Python scalar string
+            return labels[0]
+
+        # Non-scalar: wrap Categorical in ArkoudaCategoricalArray
+        if isinstance(result, Categorical):
+            return ArkoudaCategoricalArray(result)
+
+        # Fallback: if Categorical returned something array-like but not Categorical,
+        # rebuild a Categorical from it.
+        return ArkoudaCategoricalArray(Categorical(result))
+
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         from arkouda import Categorical, array
@@ -79,16 +208,13 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
             scalars = Categorical(array(scalars))
         return cls(scalars)
 
-    def __getitem__(self, idx):
-        if isinstance(idx, int):
-            return self._data[idx]
-        return ArkoudaCategoricalArray(self._data[idx])
-
     def astype(self, x, dtype):
         raise NotImplementedError("array_api.astype is not implemented in Arkouda yet")
 
     def isna(self):
-        return ak.zeros(self._data.size, dtype=ak.bool)
+        from arkouda.numpy.pdarraycreation import zeros
+
+        return zeros(self._data.size, dtype=bool_)
 
     @property
     def dtype(self):
@@ -130,73 +256,3 @@ def __eq__(self, other):
 
     def __repr__(self):
         return f"ArkoudaCategoricalArray({self._data})"
-
-    def _not_implemented(self, name: str):
-        raise NotImplementedError(f"`{name}` is not implemented for ArkoudaCategoricalArray yet.")
-
-    def add_categories(self, *args, **kwargs):
-        self._not_implemented("add_categories")
-
-    def as_ordered(self, *args, **kwargs):
-        self._not_implemented("as_ordered")
-
-    def as_unordered(self, *args, **kwargs):
-        self._not_implemented("as_unordered")
-
-    def check_for_ordered(self, *args, **kwargs):
-        self._not_implemented("check_for_ordered")
-
-    def describe(self, *args, **kwargs):
-        self._not_implemented("describe")
-
-    @classmethod
-    def from_codes(cls, *args, **kwargs):
-        raise NotImplementedError("`from_codes` is not implemented for ArkoudaCategoricalArray yet.")
-
-    def isnull(self, *args, **kwargs):
-        self._not_implemented("isnull")
-
-    def max(self, *args, **kwargs):
-        self._not_implemented("max")
-
-    def memory_usage(self, *args, **kwargs):
-        self._not_implemented("memory_usage")
-
-    def min(self, *args, **kwargs):
-        self._not_implemented("min")
-
-    def notna(self, *args, **kwargs):
-        self._not_implemented("notna")
-
-    def notnull(self, *args, **kwargs):
-        self._not_implemented("notnull")
-
-    def remove_categories(self, *args, **kwargs):
-        self._not_implemented("remove_categories")
-
-    def remove_unused_categories(self, *args, **kwargs):
-        self._not_implemented("remove_unused_categories")
-
-    def rename_categories(self, *args, **kwargs):
-        self._not_implemented("rename_categories")
-
-    def reorder_categories(self, *args, **kwargs):
-        self._not_implemented("reorder_categories")
-
-    def set_categories(self, *args, **kwargs):
-        self._not_implemented("set_categories")
-
-    def set_ordered(self, *args, **kwargs):
-        self._not_implemented("set_ordered")
-
-    def sort_values(self, *args, **kwargs):
-        self._not_implemented("sort_values")
-
-    def swapaxes(self, *args, **kwargs):
-        self._not_implemented("swapaxes")
-
-    def to_list(self, *args, **kwargs):
-        self._not_implemented("to_list")
-
-    def value_counts(self, *args, **kwargs):
-        self._not_implemented("value_counts")
diff --git a/arkouda/pandas/extension/_arkouda_string_array.py b/arkouda/pandas/extension/_arkouda_string_array.py
index 1bdf4bef020..e80138dda8b 100644
--- a/arkouda/pandas/extension/_arkouda_string_array.py
+++ b/arkouda/pandas/extension/_arkouda_string_array.py
@@ -77,14 +77,106 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
 
         return cls(ak_array(scalars))
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: Any) -> Any:
+        """
+        Retrieve one or more string values.
+
+        Parameters
+        ----------
+        key : Any
+            Positional indexer. Supports:
+            * scalar integer positions
+            * slice objects
+            * NumPy integer arrays (signed/unsigned)
+            * NumPy boolean masks
+            * Python lists of integers / booleans
+            * Arkouda pdarray indexers (int / uint / bool)
+
+        Returns
+        -------
+        Any
+            A Python string for scalar access, or a new ArkoudaStringArray
+            for non-scalar indexers.
+
+        Raises
+        ------
+        TypeError
+            If ``key`` is a NumPy array with an unsupported dtype (for example,
+            a floating point or object dtype).
+
+        Examples
+        --------
+        Basic scalar access:
+
+        >>> import arkouda as ak
+        >>> from arkouda.pandas.extension import ArkoudaStringArray
+        >>> arr = ArkoudaStringArray(ak.array(["a", "b", "c", "d"]))
+        >>> arr[1]
+        np.str_('b')
+
+        Negative indexing:
+
+        >>> arr[-1]
+        np.str_('d')
+
+        Slice indexing (returns a new ArkoudaStringArray):
+
+        >>> arr[1:3]
+        ArkoudaStringArray(['b', 'c'])
+
+        NumPy integer array indexing:
+
+        >>> idx = np.array([0, 2], dtype=np.int64)
+        >>> arr[idx]
+        ArkoudaStringArray(['a', 'c'])
+
+        NumPy boolean mask:
+
+        >>> mask = np.array([True, False, True, False])
+        >>> arr[mask]
+        ArkoudaStringArray(['a', 'c'])
+
+        Arkouda integer indexer:
+
+        >>> ak_idx = ak.array([3, 1])
+        >>> arr[ak_idx]
+        ArkoudaStringArray(['d', 'b'])
+
+        Empty indexer returns an empty ArkoudaStringArray:
+
+        >>> empty_idx = np.array([], dtype=np.int64)
+        >>> arr[empty_idx]
+        ArkoudaStringArray([])
+        """
+        from arkouda.numpy.pdarraycreation import array as ak_array
+        from arkouda.numpy.strings import Strings
+
+        # Normalize NumPy indexers to Arkouda pdarrays, mirroring ArkoudaArray.__getitem__
+        if isinstance(key, np.ndarray):
+            if key.dtype == bool:
+                key = ak_array(key)
+            elif key.dtype.kind in {"i"}:
+                # signed integer
+                key = ak_array(key, dtype="int64")
+            elif key.dtype.kind in {"u"}:
+                # unsigned integer
+                key = ak_array(key, dtype="uint64")
+            else:
+                raise TypeError(f"Unsupported numpy index type {key.dtype}")
+
         result = self._data[key]
+
+        # Scalar access: return a plain Python str (or scalar) instead of a Strings object
         if np.isscalar(key):
-            if hasattr(result, "to_ndarray"):
-                return result.to_ndarray()[()]
-            else:
-                return result
-        return ArkoudaStringArray(result)
+            return result
+
+        # Non-scalar: expect an Arkouda Strings, wrap it
+        if isinstance(result, Strings):
+            return ArkoudaStringArray(result)
+
+        # Fallback: if Arkouda returned something array-like but not Strings,
+        # materialize via ak.array and wrap again as Strings.
+        return ArkoudaStringArray(ak_array(result))
 
     def astype(self, dtype, copy: bool = False):
         if dtype in (object, np.object_, "object", np.dtype("O")):
@@ -135,123 +227,3 @@ def __eq__(self, other):
 
     def __repr__(self):
         return f"ArkoudaStringArray({self._data})"
-
-    def _not_implemented(self, name: str):
-        raise NotImplementedError(f"`{name}` is not implemented for Arkouda-backed arrays yet.")
-
-    def all(self, *args, **kwargs):
-        self._not_implemented("all")
-
-    def any(self, *args, **kwargs):
-        self._not_implemented("any")
-
-    def argpartition(self, *args, **kwargs):
-        self._not_implemented("argpartition")
-
-    def byteswap(self, *args, **kwargs):
-        self._not_implemented("byteswap")
-
-    def choose(self, *args, **kwargs):
-        self._not_implemented("choose")
-
-    def clip(self, *args, **kwargs):
-        self._not_implemented("clip")
-
-    def compress(self, *args, **kwargs):
-        self._not_implemented("compress")
-
-    def conj(self, *args, **kwargs):
-        self._not_implemented("conj")
-
-    def conjugate(self, *args, **kwargs):
-        self._not_implemented("conjugate")
-
-    def cumprod(self, *args, **kwargs):
-        self._not_implemented("cumprod")
-
-    def cumsum(self, *args, **kwargs):
-        self._not_implemented("cumsum")
-
-    def diagonal(self, *args, **kwargs):
-        self._not_implemented("diagonal")
-
-    def dot(self, *args, **kwargs):
-        self._not_implemented("dot")
-
-    def dump(self, *args, **kwargs):
-        self._not_implemented("dump")
-
-    def dumps(self, *args, **kwargs):
-        self._not_implemented("dumps")
-
-    def fill(self, *args, **kwargs):
-        self._not_implemented("fill")
-
-    def flatten(self, *args, **kwargs):
-        self._not_implemented("flatten")
-
-    def getfield(self, *args, **kwargs):
-        self._not_implemented("getfield")
-
-    def item(self, *args, **kwargs):
-        self._not_implemented("item")
-
-    def max(self, *args, **kwargs):
-        self._not_implemented("max")
-
-    def mean(self, *args, **kwargs):
-        self._not_implemented("mean")
-
-    def min(self, *args, **kwargs):
-        self._not_implemented("min")
-
-    def nonzero(self, *args, **kwargs):
-        self._not_implemented("nonzero")
-
-    def partition(self, *args, **kwargs):
-        self._not_implemented("partition")
-
-    def prod(self, *args, **kwargs):
-        self._not_implemented("prod")
-
-    def put(self, *args, **kwargs):
-        self._not_implemented("put")
-
-    def resize(self, *args, **kwargs):
-        self._not_implemented("resize")
-
-    def round(self, *args, **kwargs):
-        self._not_implemented("round")
-
-    def setfield(self, *args, **kwargs):
-        self._not_implemented("setfield")
-
-    def setflags(self, *args, **kwargs):
-        self._not_implemented("setflags")
-
-    def sort(self, *args, **kwargs):
-        self._not_implemented("sort")
-
-    def std(self, *args, **kwargs):
-        self._not_implemented("std")
-
-    def sum(self, *args, **kwargs):
-        self._not_implemented("sum")
-
-    def swapaxes(self, *args, **kwargs):
-        self._not_implemented("swapaxes")
-
-    def to_device(self, *args, **kwargs):
-        self._not_implemented("to_device")
-
-    def tobytes(self, *args, **kwargs):
-        self._not_implemented("tobytes")
-
-    def tofile(self, *args, **kwargs):
-        self._not_implemented("tofile")
-
-    def trace(self, *args, **kwargs):
-        self._not_implemented("trace")
-
-    def var(self, *args, **kwargs):
-        self._not_implemented("var")

From 937d368e03cc1b58e1730ba2de2113dae2e06721 Mon Sep 17 00:00:00 2001
From: ajpotts <ajpotts@users.noreply.github.com>
Date: Mon, 5 Jan 2026 14:03:04 -0500
Subject: [PATCH 2/2] Closes #5228:  remove type: ignore from factorize in
 extension module

---
 arkouda/pandas/extension/_arkouda_array.py    | 198 ++--------------
 .../extension/_arkouda_categorical_array.py   | 210 ++++++----------
 .../extension/_arkouda_extension_array.py     | 153 ++++++------
 .../pandas/extension/_arkouda_string_array.py | 224 ++++++++++--------
 tests/pandas/extension/arkouda_extension.py   | 129 ++++------
 tests/pandas/extension/dataframe_accessor.py  |   2 +-
 6 files changed, 351 insertions(+), 565 deletions(-)

diff --git a/arkouda/pandas/extension/_arkouda_array.py b/arkouda/pandas/extension/_arkouda_array.py
index 3535f06670e..56eabafc226 100644
--- a/arkouda/pandas/extension/_arkouda_array.py
+++ b/arkouda/pandas/extension/_arkouda_array.py
@@ -120,206 +120,48 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         # If scalars is already a numpy array, we can preserve its dtype
         return cls(ak_array(scalars, dtype=dtype, copy=copy))
 
-    def __getitem__(self, key: Any) -> Any:
-        """
-        Retrieve one or more values using a pandas/NumPy-style indexer.
-
-        Parameters
-        ----------
-        key : Any
-            A valid indexer for 1D array-like data. This may be:
-            - A scalar integer position (e.g. ``1``)
-            - A Python ``slice`` (e.g. ``1:3``)
-            - A list-like of integer positions
-            - A boolean mask (NumPy array, pandas Series, or Arkouda ``pdarray``)
-            - A NumPy array, pandas Index/Series, or Arkouda ``pdarray``/``Strings``.
-
-        Returns
-        -------
-        Any
-            A scalar value for scalar indexers, or an ``ArkoudaArray`` for sequence-like
-            indexers.
-
-        Raises
-        ------
-        TypeError
-            If ``key`` is not a supported indexer type, or if a NumPy array or
-            list-like indexer has an unsupported dtype.
-        NotImplementedError
-            If a list-like indexer contains mixed element dtypes (e.g. a mixture
-            of booleans and integers), which is not supported.
-
-        Examples
-        --------
-        >>> import arkouda as ak
-        >>> from arkouda.pandas.extension import ArkoudaArray
-        >>> data = ak.arange(5)
-        >>> arr = ArkoudaArray(data)
-
-        Scalar integer index returns a Python scalar:
-
-        >>> arr[1]
-        np.int64(1)
-
-        Slicing returns another ArkoudaArray:
-
-        >>> arr[1:4]
-        ArkoudaArray([1 2 3])
-
-        List-like integer positions:
-
-        >>> arr[[0, 2, 4]]
-        ArkoudaArray([0 2 4])
-
-        Boolean mask (NumPy array):
-
-        >>> import numpy as np
-        >>> mask = np.array([True, False, True, False, True])
-        >>> arr[mask]
-        ArkoudaArray([0 2 4])
-        """
+    def __getitem__(self, key):
         from arkouda.numpy.pdarrayclass import pdarray
         from arkouda.numpy.pdarraycreation import array as ak_array
 
-        # Normalize NumPy ndarray indexers
+        # Convert numpy boolean mask to arkouda pdarray
         if isinstance(key, np.ndarray):
-            if key.dtype == bool or key.dtype == np.bool_:
-                key = ak_array(key, dtype=bool)
-            elif np.issubdtype(key.dtype, np.integer):
+            if key.dtype == bool:
+                key = ak_array(key)
+            elif key.dtype.kind in {"i"}:
                 key = ak_array(key, dtype="int64")
-            elif np.issubdtype(key.dtype, np.unsignedinteger):
+            elif key.dtype.kind in {"u"}:
                 key = ak_array(key, dtype="uint64")
             else:
-                raise TypeError(f"Unsupported NumPy index type {key.dtype}")
-
-        # Normalize Python lists
-        elif isinstance(key, list):
-            if len(key) == 0:
-                # Empty selection -> empty ArkoudaArray of same dtype
-                empty = ak_array([], dtype=self._data.dtype)
-                return self.__class__(empty)
-
-            first = key[0]
-            first_dtype = ak_dtype(first)
-            for item in key:
-                item_dtype = ak_dtype(item)
-                if first_dtype != item_dtype:
-                    raise NotImplementedError(
-                        f"Mixed dtypes are not supported: {item_dtype} vs {first_dtype}"
-                    )
-
-            if isinstance(first, (bool, np.bool_)):
-                key = ak_array(np.array(key, dtype=bool))
-            elif isinstance(first, (int, np.integer)):
-                key = ak_array(np.array(key, dtype=np.int64))
-            else:
-                raise TypeError(f"Unsupported list index type: {type(first)}")
+                raise TypeError(f"Unsupported numpy index type {key.dtype}")
 
-        # Perform the indexing operation
         result = self._data[key]
-
-        # Scalar key → return Python scalar
         if np.isscalar(key):
-            # If server returned a pdarray of length 1, extract scalar
-            if isinstance(result, pdarray) and result.size == 1:
+            if isinstance(result, pdarray):
                 return result[0]
-            return result
-
-        # All other cases → wrap result in same class
+            else:
+                return result
         return self.__class__(result)
 
-    def __setitem__(self, key: Any, value: Any) -> None:
-        """
-        Assign one or more values to the underlying Arkouda array in-place.
-
-        Parameters
-        ----------
-        key : Any
-            A valid positional indexer for the array. This may be a scalar integer,
-            slice, list-like of integers, boolean mask, NumPy array, pandas Index/Series,
-            or Arkouda ``pdarray``.
-        value : Any
-            A scalar value broadcast to the selected positions, or an array-like
-            (NumPy array, Arkouda ``pdarray``, or ``ArkoudaArray``) that is
-            aligned with ``key``.
-
-        Notes
-        -----
-        This operation mutates the underlying server-side array in-place.
-
-        Examples
-        --------
-        Basic scalar assignment by position:
-
-        >>> import arkouda as ak
-        >>> from arkouda.pandas.extension import ArkoudaArray
-        >>> data = ak.arange(5)
-        >>> arr = ArkoudaArray(data)
-        >>> arr[0] = 42
-        >>> arr
-        ArkoudaArray([42 1 2 3 4])
-
-        Using a NumPy boolean mask:
-
-        >>> data = ak.arange(5)
-        >>> arr = ArkoudaArray(data)
-        >>> mask = arr.to_ndarray() % 2 == 0  # even positions
-        >>> arr[mask] = -1
-        >>> arr
-        ArkoudaArray([-1 1 -1 3 -1])
-
-        Using a NumPy integer indexer:
-
-        >>> data = ak.arange(5)
-        >>> arr = ArkoudaArray(data)
-        >>> idx = np.array([1, 3], dtype=np.int64)
-        >>> arr[idx] = 99
-        >>> arr
-        ArkoudaArray([0 99 2 99 4])
-
-        Assigning from another ArkoudaArray:
-
-        >>> data = ak.arange(5)
-        >>> arr = ArkoudaArray(data)
-        >>> other = ArkoudaArray(ak.arange(10, 15))
-        >>> idx = [1, 3, 4]
-        >>> arr[idx] = other[idx]
-        >>> arr
-        ArkoudaArray([0 11 2 13 14])
-        """
+    #   TODO:  Simplify to use underlying array setter
+    def __setitem__(self, key, value):
+        from arkouda.numpy.dtypes import isSupportedInt
         from arkouda.numpy.pdarrayclass import pdarray
         from arkouda.numpy.pdarraycreation import array as ak_array
 
-        # Normalize NumPy / Python indexers into Arkouda pdarrays where needed
-        if isinstance(key, np.ndarray):
-            # NumPy bool mask or integer indexer
-            if key.dtype == bool or key.dtype == np.bool_ or np.issubdtype(key.dtype, np.integer):
-                key = ak_array(key)
-        elif isinstance(key, list):
-            # Python list of bools or ints - convert to NumPy then to pdarray
-            if key and isinstance(key[0], (bool, np.bool_)):
-                key = ak_array(np.array(key, dtype=bool))
-            elif key and isinstance(key[0], (int, np.integer)):
-                key = ak_array(np.array(key, dtype=np.int64))
-
-        if isinstance(key, Sequence) and not isinstance(key, (str, bytes)):
-            #   Cannot set empty index, nothing to do
-            return
-
-        # Normalize the value into something the underlying pdarray understands
+        # Convert numpy mask to pdarray if necessary
+        if isinstance(key, np.ndarray) and key.dtype == bool:
+            key = ak_array(key)
+        elif isinstance(key, np.ndarray) and isSupportedInt(key.dtype):
+            key = ak_array(key)
         if isinstance(value, ArkoudaArray):
             value = value._data
         elif isinstance(value, pdarray):
-            # already an Arkouda pdarray; nothing to do
             pass
-        elif np.isscalar(value):
-            # Fast path for scalar assignment
-
-            self._data[key] = value
+        elif isinstance(value, (int, float, bool)):  # Add scalar check
+            self._data[key] = value  # assign scalar to scalar position
             return
         else:
-            # Convert generic array-likes (Python lists, NumPy arrays, etc.)
-            # into Arkouda pdarrays.
             value = ak_array(value)
 
         self._data[key] = value
diff --git a/arkouda/pandas/extension/_arkouda_categorical_array.py b/arkouda/pandas/extension/_arkouda_categorical_array.py
index 3a2f8153e6c..8e10e427a67 100644
--- a/arkouda/pandas/extension/_arkouda_categorical_array.py
+++ b/arkouda/pandas/extension/_arkouda_categorical_array.py
@@ -7,8 +7,7 @@
 from numpy import ndarray
 from pandas.api.extensions import ExtensionArray
 
-from arkouda.numpy.dtypes import bool_
-from arkouda.numpy.pdarrayclass import pdarray
+import arkouda as ak
 
 from ._arkouda_array import ArkoudaArray
 from ._arkouda_extension_array import ArkoudaExtensionArray
@@ -71,134 +70,6 @@ def __init__(self, data: Categorical | "ArkoudaCategoricalArray" | ndarray | Seq
 
         self._data = data
 
-    def __getitem__(self, key: Any) -> Any:
-        """
-        Retrieve one or more categorical values.
-
-        Parameters
-        ----------
-        key : Any
-            Location(s) to retrieve. Supported forms include:
-
-            * scalar integer index
-            * slice objects (e.g. ``1:3``)
-            * NumPy integer array (any integer dtype)
-            * NumPy boolean mask with the same length as the array
-            * Python list of integers or booleans
-            * Arkouda ``pdarray`` of integers or booleans
-
-        Returns
-        -------
-        Any
-            A Python scalar for scalar access, or a new
-            :class:`ArkoudaCategoricalArray` for non-scalar indexers.
-
-        Raises
-        ------
-        TypeError
-            If a NumPy indexer with an unsupported dtype is provided.
-
-        Examples
-        --------
-        >>> import numpy as np
-        >>> import arkouda as ak
-        >>> from arkouda.pandas.extension import ArkoudaCategoricalArray
-        >>> data = ak.Categorical(ak.array(["a", "b", "c", "d"]))
-        >>> arr = ArkoudaCategoricalArray(data)
-
-        Scalar access returns a Python string-like scalar:
-
-        >>> arr[1]
-        np.str_('b')
-
-        Negative indexing:
-
-        >>> arr[-1]
-        np.str_('d')
-
-        Slice indexing returns a new ArkoudaCategoricalArray:
-
-        >>> result = arr[1:3]
-        >>> type(result)
-        <class 'arkouda.pandas.extension._arkouda_categorical_array.ArkoudaCategoricalArray'>
-
-        NumPy integer array indexing:
-
-        >>> idx = np.array([0, 2], dtype=np.int64)
-        >>> sliced = arr[idx]
-        >>> isinstance(sliced, ArkoudaCategoricalArray)
-        True
-
-        NumPy boolean mask:
-
-        >>> mask = np.array([True, False, True, False])
-        >>> masked = arr[mask]
-        >>> isinstance(masked, ArkoudaCategoricalArray)
-        True
-
-        Empty integer indexer returns an empty ArkoudaCategoricalArray:
-
-        >>> empty_idx = np.array([], dtype=np.int64)
-        >>> empty = arr[empty_idx]
-        >>> len(empty)
-        0
-        """
-        import numpy as np
-
-        from arkouda.numpy.pdarraycreation import array as ak_array
-        from arkouda.pandas.categorical import Categorical
-
-        # Handle empty indexer (list / tuple / ndarray of length 0)
-        if isinstance(key, (list, tuple, np.ndarray)) and len(key) == 0:
-            empty_strings = ak_array([], dtype="str_")
-            return ArkoudaCategoricalArray(Categorical(empty_strings))
-
-        # Scalar integers and slices: delegate directly to the underlying Categorical
-        if isinstance(key, (int, np.integer, slice)):
-            result = self._data[key]
-            # For scalar keys, just return the underlying scalar
-            if isinstance(key, (int, np.integer)):
-                return result
-            # For slices, underlying arkouda.Categorical returns a Categorical
-            return ArkoudaCategoricalArray(result)
-
-        # NumPy array indexers: normalize to Arkouda pdarrays
-        if isinstance(key, np.ndarray):
-            if key.dtype == bool:
-                key = ak_array(key)
-            elif np.issubdtype(key.dtype, np.signedinteger):
-                key = ak_array(key, dtype="int64")
-            elif np.issubdtype(key.dtype, np.unsignedinteger):
-                key = ak_array(key, dtype="uint64")
-            else:
-                raise TypeError(f"Unsupported numpy index type {key.dtype}")
-        elif not isinstance(key, (pdarray, Categorical)):
-            # Convert generic indexers (e.g. Python lists of ints/bools) to an Arkouda pdarray
-            key = ak_array(key)
-
-        # Delegate to underlying arkouda.Categorical
-        result = self._data[key]
-
-        # Scalar result: just return the underlying scalar
-        if isinstance(key, pdarray) and key.size == 1:
-            # Categorical.__getitem__ will generally still give a Categorical here;
-            # we normalize to a Python scalar by going through categories[codes].
-
-            codes = result.codes if isinstance(result, Categorical) else result
-            cats = self._data.categories
-            # codes is length-1, so this is length-1 Strings
-            labels = cats[codes]
-            # Return a Python scalar string
-            return labels[0]
-
-        # Non-scalar: wrap Categorical in ArkoudaCategoricalArray
-        if isinstance(result, Categorical):
-            return ArkoudaCategoricalArray(result)
-
-        # Fallback: if Categorical returned something array-like but not Categorical,
-        # rebuild a Categorical from it.
-        return ArkoudaCategoricalArray(Categorical(result))
-
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         from arkouda import Categorical, array
@@ -208,13 +79,16 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
             scalars = Categorical(array(scalars))
         return cls(scalars)
 
+    def __getitem__(self, idx):
+        if isinstance(idx, int):
+            return self._data[idx]
+        return ArkoudaCategoricalArray(self._data[idx])
+
     def astype(self, x, dtype):
         raise NotImplementedError("array_api.astype is not implemented in Arkouda yet")
 
     def isna(self):
-        from arkouda.numpy.pdarraycreation import zeros
-
-        return zeros(self._data.size, dtype=bool_)
+        return ak.zeros(self._data.size, dtype=ak.bool)
 
     @property
     def dtype(self):
@@ -256,3 +130,73 @@ def __eq__(self, other):
 
     def __repr__(self):
         return f"ArkoudaCategoricalArray({self._data})"
+
+    def _not_implemented(self, name: str):
+        raise NotImplementedError(f"`{name}` is not implemented for ArkoudaCategoricalArray yet.")
+
+    def add_categories(self, *args, **kwargs):
+        self._not_implemented("add_categories")
+
+    def as_ordered(self, *args, **kwargs):
+        self._not_implemented("as_ordered")
+
+    def as_unordered(self, *args, **kwargs):
+        self._not_implemented("as_unordered")
+
+    def check_for_ordered(self, *args, **kwargs):
+        self._not_implemented("check_for_ordered")
+
+    def describe(self, *args, **kwargs):
+        self._not_implemented("describe")
+
+    @classmethod
+    def from_codes(cls, *args, **kwargs):
+        raise NotImplementedError("`from_codes` is not implemented for ArkoudaCategoricalArray yet.")
+
+    def isnull(self, *args, **kwargs):
+        self._not_implemented("isnull")
+
+    def max(self, *args, **kwargs):
+        self._not_implemented("max")
+
+    def memory_usage(self, *args, **kwargs):
+        self._not_implemented("memory_usage")
+
+    def min(self, *args, **kwargs):
+        self._not_implemented("min")
+
+    def notna(self, *args, **kwargs):
+        self._not_implemented("notna")
+
+    def notnull(self, *args, **kwargs):
+        self._not_implemented("notnull")
+
+    def remove_categories(self, *args, **kwargs):
+        self._not_implemented("remove_categories")
+
+    def remove_unused_categories(self, *args, **kwargs):
+        self._not_implemented("remove_unused_categories")
+
+    def rename_categories(self, *args, **kwargs):
+        self._not_implemented("rename_categories")
+
+    def reorder_categories(self, *args, **kwargs):
+        self._not_implemented("reorder_categories")
+
+    def set_categories(self, *args, **kwargs):
+        self._not_implemented("set_categories")
+
+    def set_ordered(self, *args, **kwargs):
+        self._not_implemented("set_ordered")
+
+    def sort_values(self, *args, **kwargs):
+        self._not_implemented("sort_values")
+
+    def swapaxes(self, *args, **kwargs):
+        self._not_implemented("swapaxes")
+
+    def to_list(self, *args, **kwargs):
+        self._not_implemented("to_list")
+
+    def value_counts(self, *args, **kwargs):
+        self._not_implemented("value_counts")
diff --git a/arkouda/pandas/extension/_arkouda_extension_array.py b/arkouda/pandas/extension/_arkouda_extension_array.py
index 5cda4787a36..313095b4a55 100644
--- a/arkouda/pandas/extension/_arkouda_extension_array.py
+++ b/arkouda/pandas/extension/_arkouda_extension_array.py
@@ -48,6 +48,7 @@
 
 import numpy as np
 
+from numpy.typing import NDArray
 from pandas.api.extensions import ExtensionArray
 
 from arkouda.numpy.dtypes import all_scalars
@@ -349,45 +350,47 @@ def take(self, indexer, fill_value=None, allow_fill=False):
         gathered = ak.where(mask, fv, self._data[idx_fix])
         return type(self)(gathered)
 
-    def factorize(  # type: ignore[override]
-        self, use_na_sentinel=True, sort=False, **kwargs
-    ) -> Tuple["ArkoudaExtensionArray", "ArkoudaExtensionArray"]:
+    def factorize(self, use_na_sentinel=True) -> Tuple[NDArray[np.intp], "ArkoudaExtensionArray"]:
         """
-        Encode the values of this array as integer codes and uniques,
-        similar to :func:`pandas.factorize`, but implemented with Arkouda.
+        Encode the values of this array as integer codes and unique values.
+
+        This is similar to :func:`pandas.factorize`, but the grouping/factorization
+        work is performed in Arkouda. The returned ``codes`` are a NumPy array for
+        pandas compatibility, while ``uniques`` are returned as an ExtensionArray
+        of the same type as ``self``.
 
         Each distinct non-missing value is assigned a unique integer code.
-        Missing values (NaN in floating dtypes) are encoded as -1 by default.
+        For floating dtypes, ``NaN`` is treated as missing; for all other dtypes,
+        no values are considered missing.
 
         Parameters
         ----------
         use_na_sentinel : bool, default True
-            If True, missing values are encoded as -1 in the codes array.
-            If False, missing values are assigned a valid code equal to
-            ``len(uniques)``.
-        sort : bool, default False
-            Whether to sort the unique values. If False, the unique values
-            appear in the order of first appearance in the array. If True,
-            the unique values are sorted, and codes are assigned accordingly.
-        **kwargs
-            Ignored for compatibility.
+            If True, missing values are encoded as ``-1`` in the returned codes.
+            If False, missing values are assigned the code ``len(uniques)``.
+            (Missingness is only detected for floating dtypes via ``NaN``.)
 
         Returns
         -------
-        Tuple[pdarray, ArkoudaExtensionArray]
+        (numpy.ndarray, ExtensionArray)
             A pair ``(codes, uniques)`` where:
-            - ``codes`` is a NumPy ``int64`` array of factor labels, one per element.
-              Missing values are ``-1`` if ``use_na_sentinel=True``; otherwise they
-              receive the code ``len(uniques)``.
-            - ``uniques`` is a NumPy array of the unique values.
+
+            * ``codes`` is a 1D NumPy array of dtype ``np.intp`` with the same length
+              as this array, containing the factor codes for each element.
+            * ``uniques`` is an ExtensionArray containing the unique (non-missing)
+              values, with the same extension type as ``self``.
+
+            If ``use_na_sentinel=True``, missing values in ``codes`` are ``-1``.
+            Otherwise they receive the code ``len(uniques)``.
 
         Notes
         -----
         * Only floating-point dtypes treat ``NaN`` as missing; for other dtypes,
-          no values are considered missing.
-        * This method executes all grouping and factorization in Arkouda,
-          returning results as NumPy arrays for compatibility with pandas.
-        * Unlike pandas, string/None/null handling is not yet unified.
+          all values are treated as non-missing.
+        * ``uniques`` are constructed from Arkouda's unique keys and returned as
+          ``type(self)(uniques_ak)`` so that pandas internals (e.g. ``groupby``)
+          can treat them as an ExtensionArray.
+        * String/None/null missing-value behavior is not yet unified with pandas.
 
         Examples
         --------
@@ -396,7 +399,7 @@ def factorize(  # type: ignore[override]
         >>> arr = ArkoudaArray(ak.array([1, 2, 1, 3]))
         >>> codes, uniques = arr.factorize()
         >>> codes
-        ArkoudaArray([0 1 0 2])
+        array([0, 1, 0, 2])
         >>> uniques
         ArkoudaArray([1 2 3])
         """
@@ -407,7 +410,6 @@ def factorize(  # type: ignore[override]
         from arkouda.numpy.pdarraycreation import array as ak_array
         from arkouda.numpy.sorting import argsort
         from arkouda.numpy.strings import Strings
-        from arkouda.pandas.extension import ArkoudaArray
         from arkouda.pandas.groupbyclass import GroupBy
 
         # Arkouda array backing
@@ -425,7 +427,7 @@ def factorize(  # type: ignore[override]
             sent = -1 if use_na_sentinel else 0
             from arkouda.numpy.pdarraycreation import full as ak_full
 
-            return ArkoudaArray(ak_full(n, sent, dtype=int64)), type(self)(
+            return ak_full(n, sent, dtype=int64).to_ndarray(), type(self)(
                 ak_array([], dtype=self.to_numpy().dtype)
             )
 
@@ -437,28 +439,16 @@ def factorize(  # type: ignore[override]
 
             uniques_ak = concatenate(uniques_ak)
 
-        if sort:
-            # Keys already sorted; group id -> 0..k-1
-            groupid_to_code = arange(uniques_ak.size, dtype=int64)
-
-            # Work around to account GroupBy not sorting Categorical properly
-            if isinstance(arr, Categorical):
-                perm = uniques_ak.argsort()
-                #   Inverse argsort:
-                groupid_to_code[perm] = arange(uniques_ak.size, dtype=int64)
-                uniques_ak = uniques_ak[perm]
-
-        else:
-            # First-appearance order
-            _keys, first_idx_per_group = g.min(arange(arr_nn.size, dtype=int64))
-            order = argsort(first_idx_per_group)
+        # First-appearance order
+        _keys, first_idx_per_group = g.min(arange(arr_nn.size, dtype=int64))
+        order = argsort(first_idx_per_group)
 
-            # Reorder uniques by first appearance
-            uniques_ak = uniques_ak[order]
+        # Reorder uniques by first appearance
+        uniques_ak = uniques_ak[order]
 
-            # Map group_id -> code in first-appearance order
-            groupid_to_code = zeros(order.size, dtype=int64)
-            groupid_to_code[order] = arange(order.size, dtype=int64)
+        # Map group_id -> code in first-appearance order
+        groupid_to_code = zeros(order.size, dtype=int64)
+        groupid_to_code[order] = arange(order.size, dtype=int64)
 
         # Per-element codes on the non-NA slice
         codes_nn = g.broadcast(groupid_to_code)
@@ -468,7 +458,9 @@ def factorize(  # type: ignore[override]
         codes_ak = full(n, sentinel, dtype=int64)
         codes_ak[non_na] = codes_nn
 
-        return ArkoudaArray(codes_ak), type(self)(uniques_ak)
+        codes_np = codes_ak.to_ndarray().astype(np.intp, copy=False)
+
+        return codes_np, type(self)(uniques_ak)
 
     # In each EA
     def _values_for_factorize(self):
@@ -527,42 +519,45 @@ def to_ndarray(self) -> np.ndarray:
         """
         return self._data.to_ndarray()
 
-    def argsort(  # type: ignore[override]
+    def argsort(
         self,
         *,
         ascending: bool = True,
-        kind="quicksort",
-        na_position: str = "last",
-        **kwargs,
-    ) -> pdarray:
+        kind: str = "quicksort",
+        **kwargs: object,
+    ) -> NDArray[np.intp]:
         """
         Return the indices that would sort the array.
 
-        This method computes the permutation indices that would sort the
-        underlying Arkouda data. It aligns with the pandas ``ExtensionArray``
-        contract, returning a 1-D ``pdarray`` of integer indices suitable for
-        reordering the array via ``take`` or ``iloc``. NaN values are placed
-        either at the beginning or end of the result depending on
-        ``na_position``.
+        This method computes the permutation indices that would sort the underlying
+        Arkouda data and returns them as a NumPy array, in accordance with the
+        pandas ``ExtensionArray`` contract. The indices can be used to reorder the
+        array via ``take`` or ``iloc``.
+
+        For floating-point data, ``NaN`` values are handled according to the
+        ``na_position`` keyword argument.
 
         Parameters
         ----------
         ascending : bool, default True
-            If True, sort values in ascending order. If False, sort in
-            descending order.
+            If True, sort values in ascending order. If False, sort in descending
+            order.
         kind : str, default "quicksort"
-            Sorting algorithm. Present for API compatibility with NumPy and
-            pandas but currently ignored.
-        na_position : {"first", "last"}, default "last"
-            Where to place NaN values in the sorted result.  Currently only implemented for pdarray.
-            For Strings and Categorical will have no effect.
-        **kwargs : Any
-            Additional keyword arguments for compatibility; ignored.
+            Sorting algorithm. Present for API compatibility with NumPy and pandas
+            but currently ignored.
+        **kwargs
+            Additional keyword arguments for compatibility. Supported keyword:
+
+            * ``na_position`` : {"first", "last"}, default "last"
+              Where to place ``NaN`` values in the sorted result. This option is
+              currently only applied for floating-point ``pdarray`` data; for
+              ``Strings`` and ``Categorical`` data it has no effect.
 
         Returns
         -------
-        pdarray
-            Integer indices (``int64``) that would sort the array.
+        numpy.ndarray
+            A 1D NumPy array of dtype ``np.intp`` containing the indices that would
+            sort the array.
 
         Raises
         ------
@@ -573,11 +568,12 @@ def argsort(  # type: ignore[override]
 
         Notes
         -----
-        - Supports Arkouda ``pdarray``, ``Strings``, and ``Categorical`` data.
-        - Floating-point arrays have NaNs repositioned according to
+        * Supports Arkouda ``pdarray``, ``Strings``, and ``Categorical`` data.
+        * For floating-point arrays, ``NaN`` values are repositioned according to
           ``na_position``.
-        - This method does not move data to the client; the computation
-          occurs on the Arkouda server.
+        * The sorting computation occurs on the Arkouda server, but the resulting
+          permutation indices are materialized on the client as a NumPy array, as
+          required by pandas internals.
 
         Examples
         --------
@@ -585,9 +581,9 @@ def argsort(  # type: ignore[override]
         >>> from arkouda.pandas.extension import ArkoudaArray
         >>> a = ArkoudaArray(ak.array([3.0, float("nan"), 1.0]))
         >>> a.argsort() # NA last by default
-        array([2 0 1])
+        array([2, 0, 1])
         >>> a.argsort(na_position="first")
-        array([1 2 0])
+        array([1, 2, 0])
         """
         from arkouda.numpy import argsort
         from arkouda.numpy.numeric import isnan as ak_isnan
@@ -596,6 +592,9 @@ def argsort(  # type: ignore[override]
         from arkouda.numpy.util import is_float
         from arkouda.pandas.categorical import Categorical
 
+        # Extract na_position from kwargs
+        na_position = kwargs.pop("na_position", "last")
+
         if na_position not in {"first", "last"}:
             raise ValueError("na_position must be 'first' or 'last'.")
 
@@ -613,7 +612,7 @@ def argsort(  # type: ignore[override]
         else:
             raise TypeError(f"Unsupported argsort dtype: {type(self._data)}")
 
-        return perm
+        return perm.to_ndarray()
 
     def broadcast_arrays(self, *arrays):
         raise NotImplementedError(
diff --git a/arkouda/pandas/extension/_arkouda_string_array.py b/arkouda/pandas/extension/_arkouda_string_array.py
index e80138dda8b..1bdf4bef020 100644
--- a/arkouda/pandas/extension/_arkouda_string_array.py
+++ b/arkouda/pandas/extension/_arkouda_string_array.py
@@ -77,106 +77,14 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
 
         return cls(ak_array(scalars))
 
-    def __getitem__(self, key: Any) -> Any:
-        """
-        Retrieve one or more string values.
-
-        Parameters
-        ----------
-        key : Any
-            Positional indexer. Supports:
-            * scalar integer positions
-            * slice objects
-            * NumPy integer arrays (signed/unsigned)
-            * NumPy boolean masks
-            * Python lists of integers / booleans
-            * Arkouda pdarray indexers (int / uint / bool)
-
-        Returns
-        -------
-        Any
-            A Python string for scalar access, or a new ArkoudaStringArray
-            for non-scalar indexers.
-
-        Raises
-        ------
-        TypeError
-            If ``key`` is a NumPy array with an unsupported dtype (for example,
-            a floating point or object dtype).
-
-        Examples
-        --------
-        Basic scalar access:
-
-        >>> import arkouda as ak
-        >>> from arkouda.pandas.extension import ArkoudaStringArray
-        >>> arr = ArkoudaStringArray(ak.array(["a", "b", "c", "d"]))
-        >>> arr[1]
-        np.str_('b')
-
-        Negative indexing:
-
-        >>> arr[-1]
-        np.str_('d')
-
-        Slice indexing (returns a new ArkoudaStringArray):
-
-        >>> arr[1:3]
-        ArkoudaStringArray(['b', 'c'])
-
-        NumPy integer array indexing:
-
-        >>> idx = np.array([0, 2], dtype=np.int64)
-        >>> arr[idx]
-        ArkoudaStringArray(['a', 'c'])
-
-        NumPy boolean mask:
-
-        >>> mask = np.array([True, False, True, False])
-        >>> arr[mask]
-        ArkoudaStringArray(['a', 'c'])
-
-        Arkouda integer indexer:
-
-        >>> ak_idx = ak.array([3, 1])
-        >>> arr[ak_idx]
-        ArkoudaStringArray(['d', 'b'])
-
-        Empty indexer returns an empty ArkoudaStringArray:
-
-        >>> empty_idx = np.array([], dtype=np.int64)
-        >>> arr[empty_idx]
-        ArkoudaStringArray([])
-        """
-        from arkouda.numpy.pdarraycreation import array as ak_array
-        from arkouda.numpy.strings import Strings
-
-        # Normalize NumPy indexers to Arkouda pdarrays, mirroring ArkoudaArray.__getitem__
-        if isinstance(key, np.ndarray):
-            if key.dtype == bool:
-                key = ak_array(key)
-            elif key.dtype.kind in {"i"}:
-                # signed integer
-                key = ak_array(key, dtype="int64")
-            elif key.dtype.kind in {"u"}:
-                # unsigned integer
-                key = ak_array(key, dtype="uint64")
-            else:
-                raise TypeError(f"Unsupported numpy index type {key.dtype}")
-
+    def __getitem__(self, key):
         result = self._data[key]
-
-        # Scalar access: return a plain Python str (or scalar) instead of a Strings object
         if np.isscalar(key):
-            return result
-
-        # Non-scalar: expect an Arkouda Strings, wrap it
-        if isinstance(result, Strings):
-            return ArkoudaStringArray(result)
-
-        # Fallback: if Arkouda returned something array-like but not Strings,
-        # materialize via ak.array and wrap again as Strings.
-        return ArkoudaStringArray(ak_array(result))
+            if hasattr(result, "to_ndarray"):
+                return result.to_ndarray()[()]
+            else:
+                return result
+        return ArkoudaStringArray(result)
 
     def astype(self, dtype, copy: bool = False):
         if dtype in (object, np.object_, "object", np.dtype("O")):
@@ -227,3 +135,123 @@ def __eq__(self, other):
 
     def __repr__(self):
         return f"ArkoudaStringArray({self._data})"
+
+    def _not_implemented(self, name: str):
+        raise NotImplementedError(f"`{name}` is not implemented for Arkouda-backed arrays yet.")
+
+    def all(self, *args, **kwargs):
+        self._not_implemented("all")
+
+    def any(self, *args, **kwargs):
+        self._not_implemented("any")
+
+    def argpartition(self, *args, **kwargs):
+        self._not_implemented("argpartition")
+
+    def byteswap(self, *args, **kwargs):
+        self._not_implemented("byteswap")
+
+    def choose(self, *args, **kwargs):
+        self._not_implemented("choose")
+
+    def clip(self, *args, **kwargs):
+        self._not_implemented("clip")
+
+    def compress(self, *args, **kwargs):
+        self._not_implemented("compress")
+
+    def conj(self, *args, **kwargs):
+        self._not_implemented("conj")
+
+    def conjugate(self, *args, **kwargs):
+        self._not_implemented("conjugate")
+
+    def cumprod(self, *args, **kwargs):
+        self._not_implemented("cumprod")
+
+    def cumsum(self, *args, **kwargs):
+        self._not_implemented("cumsum")
+
+    def diagonal(self, *args, **kwargs):
+        self._not_implemented("diagonal")
+
+    def dot(self, *args, **kwargs):
+        self._not_implemented("dot")
+
+    def dump(self, *args, **kwargs):
+        self._not_implemented("dump")
+
+    def dumps(self, *args, **kwargs):
+        self._not_implemented("dumps")
+
+    def fill(self, *args, **kwargs):
+        self._not_implemented("fill")
+
+    def flatten(self, *args, **kwargs):
+        self._not_implemented("flatten")
+
+    def getfield(self, *args, **kwargs):
+        self._not_implemented("getfield")
+
+    def item(self, *args, **kwargs):
+        self._not_implemented("item")
+
+    def max(self, *args, **kwargs):
+        self._not_implemented("max")
+
+    def mean(self, *args, **kwargs):
+        self._not_implemented("mean")
+
+    def min(self, *args, **kwargs):
+        self._not_implemented("min")
+
+    def nonzero(self, *args, **kwargs):
+        self._not_implemented("nonzero")
+
+    def partition(self, *args, **kwargs):
+        self._not_implemented("partition")
+
+    def prod(self, *args, **kwargs):
+        self._not_implemented("prod")
+
+    def put(self, *args, **kwargs):
+        self._not_implemented("put")
+
+    def resize(self, *args, **kwargs):
+        self._not_implemented("resize")
+
+    def round(self, *args, **kwargs):
+        self._not_implemented("round")
+
+    def setfield(self, *args, **kwargs):
+        self._not_implemented("setfield")
+
+    def setflags(self, *args, **kwargs):
+        self._not_implemented("setflags")
+
+    def sort(self, *args, **kwargs):
+        self._not_implemented("sort")
+
+    def std(self, *args, **kwargs):
+        self._not_implemented("std")
+
+    def sum(self, *args, **kwargs):
+        self._not_implemented("sum")
+
+    def swapaxes(self, *args, **kwargs):
+        self._not_implemented("swapaxes")
+
+    def to_device(self, *args, **kwargs):
+        self._not_implemented("to_device")
+
+    def tobytes(self, *args, **kwargs):
+        self._not_implemented("tobytes")
+
+    def tofile(self, *args, **kwargs):
+        self._not_implemented("tofile")
+
+    def trace(self, *args, **kwargs):
+        self._not_implemented("trace")
+
+    def var(self, *args, **kwargs):
+        self._not_implemented("var")
diff --git a/tests/pandas/extension/arkouda_extension.py b/tests/pandas/extension/arkouda_extension.py
index 4e643595608..1503938eef0 100644
--- a/tests/pandas/extension/arkouda_extension.py
+++ b/tests/pandas/extension/arkouda_extension.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from numpy.testing import assert_equal as np_assert_equal
+
 import arkouda as ak
 
 from arkouda.numpy.strings import Strings
@@ -16,6 +18,16 @@
 
 
 class TestArkoudaExtensionArray:
+    def test_extension_docstrings(self):
+        import doctest
+
+        from arkouda.pandas.extension import _arkouda_extension_array
+
+        result = doctest.testmod(
+            _arkouda_extension_array, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+        )
+        assert result.failed == 0, f"Doctest failed: {result.failed} failures"
+
     @pytest.fixture(params=["numeric", "strings", "categorical"])
     def ea(self, request):
         """
@@ -47,16 +59,6 @@ def ea(self, request):
         arr._test_kind = kind
         return arr
 
-    def test_extension_docstrings(self):
-        import doctest
-
-        from arkouda.pandas.extension import _arkouda_extension_array
-
-        result = doctest.testmod(
-            _arkouda_extension_array, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
-        )
-        assert result.failed == 0, f"Doctest failed: {result.failed} failures"
-
     def base_objs(self):
         """Provide canonical Arkouda objects for reuse in tests."""
         nums = ak.array([1, 2, 3, 4])  # pdarray[int64]
@@ -358,13 +360,13 @@ def test_concat_largeish_segments_length_only_smoke(self):
         assert out[1000] == 1000
         assert out[-1] == 1499
 
-    def assert_indices(self, perm: pdarray, expected_py_indices):
+    def assert_indices(self, perm: np.ndarray, expected_py_indices):
         """Compare returned indices to expected Python list, staying server-side where possible."""
-        assert isinstance(perm, pdarray)
+        assert isinstance(perm, np.ndarray)
         # Arkouda uses int64 for indices; accept any int dtype but verify it's integer
         assert np.issubdtype(perm.dtype, np.integer)
-        exp = ak.array(expected_py_indices)
-        assert ak.all(perm == exp)
+        exp = np.array(expected_py_indices)
+        assert np.all(perm == exp)
 
     # ---------- pdarray (float) with NaN handling ----------
 
@@ -450,116 +452,87 @@ class Dummy:
             # noinspection PyUnresolvedReferences
             ea.argsort()
 
-    @pytest.mark.parametrize("sort", [False, True])
     @pytest.mark.parametrize("use_na_sentinel", [True, False])
-    def test_factorize_int_basic(self, sort, use_na_sentinel):
+    def test_factorize_int_basic(self, use_na_sentinel):
         """
         Int array has no NAs; first-appearance order vs sorted uniques;
         NA sentinel only affects behavior if there are NAs (there aren't here).
         """
         a = ArkoudaArray(ak.array([1, 2, 1, 3]))
-        codes, uniques = a.factorize(sort=sort, use_na_sentinel=use_na_sentinel)
-
-        if not sort:
-            # First appearance: uniques [1, 2, 3]
-            assert_equal(uniques._data, ak.array([1, 2, 3]))
-            assert_equal(codes, ArkoudaArray(ak.array([0, 1, 0, 2])))
-        else:
-            # Sorted: uniques [1, 2, 3] (same here, but codes recomputed from sorted order)
-            assert_equal(uniques._data, ak.array([1, 2, 3]))
-            assert_equal(codes, ArkoudaArray(ak.array([0, 1, 0, 2])))
-
-    @pytest.mark.parametrize("sort", [False, True])
-    def test_factorize_float_with_nan_default_sentinel(self, sort):
+        codes, uniques = a.factorize(use_na_sentinel=use_na_sentinel)
+
+        # First appearance: uniques [1, 2, 3]
+        assert_equal(uniques._data, ak.array([1, 2, 3]))
+        np_assert_equal(codes, np.array([0, 1, 0, 2]))
+
+    def test_factorize_float_with_nan_default_sentinel(self):
         """Float array treats NaN as missing -> -1 sentinel by default."""
         a = ArkoudaArray(ak.array([1.0, np.nan, 1.0, 2.0]))
-        codes, uniques = a.factorize(sort=sort)
+        codes, uniques = a.factorize()
 
-        if not sort:
-            # First appearance uniques: [1.0, 2.0]
-            assert_arkouda_array_equal(uniques._data, ak.array([1.0, 2.0]))
-            assert_arkouda_array_equal(codes._data, ak.array([0, -1, 0, 1]))
-        else:
-            # Sorted uniques: [1.0, 2.0] (same set)
-            assert_arkouda_array_equal(uniques._data, ak.array([1.0, 2.0]))
-            assert_arkouda_array_equal(codes._data, ak.array([0, -1, 0, 1]))
+        # First appearance uniques: [1.0, 2.0]
+        assert_arkouda_array_equal(uniques._data, ak.array([1.0, 2.0]))
+        np_assert_equal(codes, np.array([0, -1, 0, 1]))
 
     def test_factorize_float_with_nan_no_sentinel(self):
         """With use_na_sentinel=False, NaNs get a valid code == len(uniques)."""
         a = ArkoudaArray(ak.array([1.0, np.nan, 1.0, 2.0]))
-        codes, uniques = a.factorize(sort=False, use_na_sentinel=False)
+        codes, uniques = a.factorize(use_na_sentinel=False)
         # uniques from first appearance: [1.0, 2.0]; NaN code == 2
         assert_arkouda_array_equal(uniques._data, ak.array([1.0, 2.0]))
-        assert_arkouda_array_equal(codes._data, ak.array([0, 2, 0, 1]))
+        np_assert_equal(codes, np.array([0, 2, 0, 1]))
 
     def test_factorize_float_all_nan(self):
         """Edge case: all values are NaN -> codes all sentinel, uniques empty."""
         a = ArkoudaArray(ak.array([np.nan, np.nan]))
         codes, uniques = a.factorize()
         assert_arkouda_array_equal(uniques._data, ak.array([], dtype=float))
-        assert_arkouda_array_equal(codes._data, ak.array([-1, -1], dtype=np.int64))
+        np_assert_equal(codes, np.array([-1, -1], dtype=np.int64))
 
-    @pytest.mark.parametrize("sort", [False, True])
-    def test_factorize_strings_basic(self, sort):
+    def test_factorize_strings_basic(self):
         """Strings: no NA handling; empty strings are treated as normal values."""
         s = ak.array(["a", "b", "a", "c"])
         a = ArkoudaStringArray(s)
-        codes, uniques = a.factorize(sort=sort)
+        codes, uniques = a.factorize()
 
-        if not sort:
-            assert_arkouda_array_equal(uniques._data, ak.array(["a", "b", "c"]))
-            assert_arkouda_array_equal(codes._data, ak.array([0, 1, 0, 2]))
-        else:
-            # Sorted: ["a", "b", "c"] -> same result for this set
-            assert_arkouda_array_equal(uniques._data, ak.array(["a", "b", "c"]))
-            assert_arkouda_array_equal(codes._data, ak.array([0, 1, 0, 2]))
+        # Sorted: ["a", "b", "c"] -> same result for this set
+        assert_arkouda_array_equal(uniques._data, ak.array(["a", "b", "c"]))
+        np_assert_equal(codes, np.array([0, 1, 0, 2]))
 
     def test_factorize_strings_with_empty_string(self):
         """Explicitly ensure "" is treated as a normal value (not missing)."""
         s = ak.array(["", "x", "", "y"])
         a = ArkoudaStringArray(s)
-        codes, uniques = a.factorize(sort=False)
+        codes, uniques = a.factorize()
         assert_arkouda_array_equal(uniques._data, ak.array(["", "x", "y"]))
-        assert_arkouda_array_equal(codes._data, ak.array([0, 1, 0, 2]))
+        np_assert_equal(codes, np.array([0, 1, 0, 2]))
 
-    @pytest.mark.parametrize("sort", [False, True])
-    def test_factorize_categorical_basic(self, sort):
+    def test_factorize_categorical_basic(self):
         """
         Categorical: factorization operates over observed values (not categories table),
-        honoring first-appearance vs sorted order semantics of the observed data.
+        honoring first-appearance semantics of the observed data.
         """
         s = ak.array(["red", "blue", "red", "green"])
         cat = ak.Categorical(s)  # construct from Strings
         a = ArkoudaCategoricalArray(cat)
-        codes, uniques = a.factorize(sort=sort)
-
-        if not sort:
-            # first appearance uniques: ["red", "blue", "green"]
-            assert_arkouda_array_equal(uniques._data, ak.Categorical(ak.array(["red", "blue", "green"])))
-            assert_arkouda_array_equal(codes._data, ak.array([0, 1, 0, 2]))
-        else:
-            # sorted uniques: ["blue", "green", "red"]
-            assert_equal(uniques._data, ak.Categorical(ak.array(["blue", "green", "red"])))
-            # remapped codes according to sorted order:
-            # red->2, blue->0, green->1
-            assert_arkouda_array_equal(codes._data, ak.array([2, 0, 2, 1]))
-
-    def test_factorize_stability_first_appearance_vs_sorted(self):
+        codes, uniques = a.factorize()
+
+        # order of first-appearance: ["red", "blue", "green"]
+        assert_equal(uniques._data, ak.Categorical(ak.array(["red", "blue", "green"])))
+        # remapped codes according to sorted order:
+        # red->2, blue->0, green->1
+        np_assert_equal(codes, np.array([0, 1, 0, 2]))
+
+    def test_factorize_stability_first_appearance(self):
         """Sanity check that switching sort changes code assignments consistently."""
         x = ak.array([2, 1, 3, 2])
         a = ArkoudaArray(x)
 
-        codes_unsorted, uniques_unsorted = a.factorize(sort=False)
-        codes_sorted, uniques_sorted = a.factorize(sort=True)
+        codes_unsorted, uniques_unsorted = a.factorize()
 
         # First appearance uniques: [2, 1, 3]
         assert_arkouda_array_equal(uniques_unsorted._data, ak.array([2, 1, 3]))
-        assert_arkouda_array_equal(codes_unsorted._data, ak.array([0, 1, 2, 0]))
-
-        # Sorted uniques: [1, 2, 3]
-        assert_arkouda_array_equal(uniques_sorted._data, ak.array([1, 2, 3]))
-        # mapping old->new: 2->1, 1->0, 3->2  => [1,0,2,1]
-        assert_arkouda_array_equal(codes_sorted._data, ak.array([1, 0, 2, 1]))
+        np_assert_equal(codes_unsorted, np.array([0, 1, 2, 0]))
 
     def test_from_sequence_dispatches_to_correct_subclass(self):
         """
diff --git a/tests/pandas/extension/dataframe_accessor.py b/tests/pandas/extension/dataframe_accessor.py
index 7b7bc46d177..be5469d0606 100644
--- a/tests/pandas/extension/dataframe_accessor.py
+++ b/tests/pandas/extension/dataframe_accessor.py
@@ -28,7 +28,7 @@
 
 
 class TestDataFrameAccessorInternals:
-    def test_extension_docstrings(self):
+    def test_dataframe_extension_docstrings(self):
         import doctest
 
         from arkouda.pandas.extension import _dataframe_accessor