99 TYPE_CHECKING ,
1010 final ,
1111)
12+ import warnings
1213
1314import numpy as np
1415
16+ from pandas ._libs import lib
1517from pandas ._libs .tslibs import OutOfBoundsDatetime
16- from pandas .errors import InvalidIndexError
18+ from pandas .errors import (
19+ InvalidIndexError ,
20+ NullKeyWarning ,
21+ )
1722from pandas .util ._decorators import cache_readonly
23+ from pandas .util ._exceptions import find_stack_level
1824
1925from pandas .core .dtypes .common import (
2026 is_list_like ,
5561 from pandas .core .generic import NDFrame
5662
5763
64+ _NULL_KEY_MESSAGE = (
65+ "`dropna` is not specified but grouper encountered null group keys. These keys "
66+ "will be dropped from the result by default. To keep null keys, set `dropna=True`, "
67+ "or to hide this warning and drop null keys, set `dropna=False`."
68+ )
69+
70+
5871class Grouper :
5972 """
6073 A Grouper allows the user to specify a groupby instruction for an object.
@@ -246,7 +259,7 @@ class Grouper:
246259 """
247260
248261 sort : bool
249- dropna : bool
262+ dropna : bool | lib . NoDefault
250263 _grouper : Index | None
251264
252265 _attributes : tuple [str , ...] = ("key" , "level" , "freq" , "sort" , "dropna" )
@@ -264,7 +277,7 @@ def __init__(
264277 level = None ,
265278 freq = None ,
266279 sort : bool = False ,
267- dropna : bool = True ,
280+ dropna : bool | lib . NoDefault = lib . no_default ,
268281 ) -> None :
269282 self .key = key
270283 self .level = level
@@ -442,7 +455,7 @@ def __init__(
442455 sort : bool = True ,
443456 observed : bool = False ,
444457 in_axis : bool = False ,
445- dropna : bool = True ,
458+ dropna : bool | lib . NoDefault = lib . no_default ,
446459 uniques : ArrayLike | None = None ,
447460 ) -> None :
448461 self .level = level
@@ -599,6 +612,12 @@ def codes(self) -> npt.NDArray[np.signedinteger]:
599612 def uniques (self ) -> ArrayLike :
600613 return self ._codes_and_uniques [1 ]
601614
615+ @property
616+ def dropna (self ) -> bool :
617+ if self ._dropna is lib .no_default :
618+ return True
619+ return self ._dropna
620+
602621 @cache_readonly
603622 def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], ArrayLike ]:
604623 uniques : ArrayLike
@@ -617,11 +636,11 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
617636 else :
618637 ucodes = np .arange (len (categories ))
619638
620- has_dropped_na = False
621- if not self . _dropna :
622- na_mask = cat . isna ()
623- if np . any ( na_mask ):
624- has_dropped_na = True
639+ has_na_values = False
640+ na_mask = cat . isna ()
641+ if np . any ( na_mask ):
642+ has_na_values = True
643+ if not self . dropna :
625644 if self ._sort :
626645 # NA goes at the end, gets `largest non-NA code + 1`
627646 na_code = len (categories )
@@ -637,11 +656,18 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
637656 )
638657 codes = cat .codes
639658
640- if has_dropped_na :
641- if not self ._sort :
642- # NA code is based on first appearance, increment higher codes
643- codes = np .where (codes >= na_code , codes + 1 , codes )
644- codes = np .where (na_mask , na_code , codes )
659+ if has_na_values :
660+ if not self .dropna :
661+ if not self ._sort :
662+ # NA code is based on first appearance, increment higher codes
663+ codes = np .where (codes >= na_code , codes + 1 , codes )
664+ codes = np .where (na_mask , na_code , codes )
665+ elif self ._dropna is lib .no_default :
666+ warnings .warn (
667+ _NULL_KEY_MESSAGE ,
668+ NullKeyWarning ,
669+ stacklevel = find_stack_level (),
670+ )
645671
646672 return codes , uniques
647673
@@ -660,8 +686,16 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
660686 # error: Incompatible types in assignment (expression has type "Union[
661687 # ndarray[Any, Any], Index]", variable has type "Categorical")
662688 codes , uniques = algorithms .factorize ( # type: ignore[assignment]
663- self .grouping_vector , sort = self ._sort , use_na_sentinel = self ._dropna
689+ self .grouping_vector , sort = self ._sort , use_na_sentinel = self .dropna
664690 )
691+ # TODO: Is `min(codes)` or `-1 in codes` faster?
692+ if self ._dropna is lib .no_default and (codes == - 1 ).any ():
693+ warnings .warn (
694+ _NULL_KEY_MESSAGE ,
695+ NullKeyWarning ,
696+ stacklevel = find_stack_level (),
697+ )
698+
665699 return codes , uniques
666700
667701 @cache_readonly
0 commit comments