-
Notifications
You must be signed in to change notification settings - Fork 97
Closed
Milestone
Description
Remove type: ignore from factorize method in extension module.
Returning type other than what pandas expects causes errors like this:
----> 1 t = tcp.head().groupby("Destination Port").size()
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/groupby.py:3050, in GroupBy.size(self)
2992 @final
2993 @Substitution(name="groupby")
2994 @Substitution(see_also=_common_see_also)
2995 def size(self) -> DataFrame | Series:
2996 """
2997 Compute group sizes.
2998
(...) 3048 Freq: MS, dtype: int64
3049 """
-> 3050 result = self._grouper.size()
3051 dtype_backend: None | Literal["pyarrow", "numpy_nullable"] = None
3052 if isinstance(self.obj, Series):
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:714, in BaseGrouper.size(self)
709 @final
710 def size(self) -> Series:
711 """
712 Compute group sizes.
713 """
--> 714 ids, _, ngroups = self.group_info
715 out: np.ndarray | list
716 if ngroups:
File pandas/_libs/properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:754, in BaseGrouper.group_info(self)
752 @cache_readonly
753 def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
--> 754 comp_ids, obs_group_ids = self._get_compressed_codes()
756 ngroups = len(obs_group_ids)
757 comp_ids = ensure_platform_int(comp_ids)
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/ops.py:778, in BaseGrouper._get_compressed_codes(self)
775 # FIXME: compress_group_index's second return value is int64, not intp
777 ping = self.groupings[0]
--> 778 return ping.codes, np.arange(len(ping._group_index), dtype=np.intp)
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/grouper.py:691, in Grouping.codes(self)
689 @property
690 def codes(self) -> npt.NDArray[np.signedinteger]:
--> 691 return self._codes_and_uniques[0]
File pandas/_libs/properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/groupby/grouper.py:835, in Grouping._codes_and_uniques(self)
830 uniques = self._uniques
831 else:
832 # GH35667, replace dropna=False with use_na_sentinel=False
833 # error: Incompatible types in assignment (expression has type "Union[
834 # ndarray[Any, Any], Index]", variable has type "Categorical")
--> 835 codes, uniques = algorithms.factorize( # type: ignore[assignment]
836 self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna
837 )
838 return codes, uniques
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/algorithms.py:802, in factorize(values, sort, use_na_sentinel, size_hint)
795 codes, uniques = factorize_array(
796 values,
797 use_na_sentinel=use_na_sentinel,
798 size_hint=size_hint,
799 )
801 if sort and len(uniques) > 0:
--> 802 uniques, codes = safe_sort(
803 uniques,
804 codes,
805 use_na_sentinel=use_na_sentinel,
806 assume_unique=True,
807 verify=False,
808 )
810 uniques = _reconstruct_data(uniques, original.dtype, original)
812 return codes, uniques
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/algorithms.py:1563, in safe_sort(values, codes, use_na_sentinel, assume_unique, verify)
1561 else:
1562 mask = None
-> 1563 new_codes = take_nd(order2, codes, fill_value=-1)
1564 else:
1565 reverse_indexer = np.empty(len(sorter), dtype=int)
File ~/anaconda3/envs/arkouda-dev/lib/python3.13/site-packages/pandas/core/array_algos/take.py:110, in take_nd(arr, indexer, axis, fill_value, allow_fill)
107 if not is_1d_only_ea_dtype(arr.dtype):
108 # i.e. DatetimeArray, TimedeltaArray
109 arr = cast("NDArrayBackedExtensionArray", arr)
--> 110 return arr.take(
111 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
112 )
114 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
116 arr = np.asarray(arr)
TypeError: pdarray.take() got an unexpected keyword argument 'fill_value'
Metadata
Metadata
Assignees
Labels
No labels