Skip to content

remove type: ignore from factorize in extension module #5228

@ajpotts

Description

@ajpotts

Remove type: ignore from factorize method in extension module.

Returning type other than what pandas expects causes errors like this:

----> 1 t  = tcp.head().groupby("Destination Port").size()

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/groupby.py:3050, in GroupBy.size(self)
   2992 @final
   2993 @Substitution(name="groupby")
   2994 @Substitution(see_also=_common_see_also)
   2995 def size(self) -> DataFrame | Series:
   2996     """
   2997     Compute group sizes.
   2998 
   (...)   3048     Freq: MS, dtype: int64
   3049     """
-> 3050     result = self._grouper.size()
   3051     dtype_backend: None | Literal["pyarrow", "numpy_nullable"] = None
   3052     if isinstance(self.obj, Series):

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:714, in BaseGrouper.size(self)
    709 @final
    710 def size(self) -> Series:
    711     """
    712     Compute group sizes.
    713     """
--> 714     ids, _, ngroups = self.group_info
    715     out: np.ndarray | list
    716     if ngroups:

File pandas/_libs/properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:754, in BaseGrouper.group_info(self)
    752 @cache_readonly
    753 def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
--> 754     comp_ids, obs_group_ids = self._get_compressed_codes()
    756     ngroups = len(obs_group_ids)
    757     comp_ids = ensure_platform_int(comp_ids)

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:778, in BaseGrouper._get_compressed_codes(self)
    775     # FIXME: compress_group_index's second return value is int64, not intp
    777 ping = self.groupings[0]
--> 778 return ping.codes, np.arange(len(ping._group_index), dtype=np.intp)

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/grouper.py:691, in Grouping.codes(self)
    689 @property
    690 def codes(self) -> npt.NDArray[np.signedinteger]:
--> 691     return self._codes_and_uniques[0]

File pandas/_libs/properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/grouper.py:835, in Grouping._codes_and_uniques(self)
    830     uniques = self._uniques
    831 else:
    832     # GH35667, replace dropna=False with use_na_sentinel=False
    833     # error: Incompatible types in assignment (expression has type "Union[
    834     # ndarray[Any, Any], Index]", variable has type "Categorical")
--> 835     codes, uniques = algorithms.factorize(  # type: ignore[assignment]
    836         self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna
    837     )
    838 return codes, uniques

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/algorithms.py:802, in factorize(values, sort, use_na_sentinel, size_hint)
    795     codes, uniques = factorize_array(
    796         values,
    797         use_na_sentinel=use_na_sentinel,
    798         size_hint=size_hint,
    799     )
    801 if sort and len(uniques) > 0:
--> 802     uniques, codes = safe_sort(
    803         uniques,
    804         codes,
    805         use_na_sentinel=use_na_sentinel,
    806         assume_unique=True,
    807         verify=False,
    808     )
    810 uniques = _reconstruct_data(uniques, original.dtype, original)
    812 return codes, uniques

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/algorithms.py:1563, in safe_sort(values, codes, use_na_sentinel, assume_unique, verify)
   1561     else:
   1562         mask = None
-> 1563     new_codes = take_nd(order2, codes, fill_value=-1)
   1564 else:
   1565     reverse_indexer = np.empty(len(sorter), dtype=int)

File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/array_algos/take.py:110, in take_nd(arr, indexer, axis, fill_value, allow_fill)
    107     if not is_1d_only_ea_dtype(arr.dtype):
    108         # i.e. DatetimeArray, TimedeltaArray
    109         arr = cast("NDArrayBackedExtensionArray", arr)
--> 110         return arr.take(
    111             indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
    112         )
    114     return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
    116 arr = np.asarray(arr)

TypeError: pdarray.take() got an unexpected keyword argument 'fill_value'

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions