@@ -1815,14 +1815,31 @@ def _set_name(
1815
1815
Parrot 30.0
1816
1816
Parrot 20.0
1817
1817
Name: Max Speed, dtype: float64
1818
+
1819
+ We can pass a list of values (Here: ["a", "b", "a", "b"]) to
1820
+ group the Series data by custom labels:
1821
+
1818
1822
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1819
1823
a 210.0
1820
1824
b 185.0
1821
1825
Name: Max Speed, dtype: float64
1826
+
1827
+ Grouping by numeric labels yields similar results (Here: [0, 1, 0, 1]):
1828
+
1829
+ >>> ser.groupby([0, 1, 0, 1]).mean()
1830
+ 0 210.0
1831
+ 1 185.0
1832
+ Name: Max Speed, dtype: float64
1833
+
1834
+ We can group by a level of the index:
1835
+
1822
1836
>>> ser.groupby(level=0).mean()
1823
1837
Falcon 370.0
1824
1838
Parrot 25.0
1825
1839
Name: Max Speed, dtype: float64
1840
+
1841
+ We can group by a condition applied to the Series values:
1842
+
1826
1843
>>> ser.groupby(ser > 100).mean()
1827
1844
Max Speed
1828
1845
False 25.0
@@ -1845,11 +1862,16 @@ def _set_name(
1845
1862
Parrot Captive 30.0
1846
1863
Wild 20.0
1847
1864
Name: Max Speed, dtype: float64
1865
+
1848
1866
>>> ser.groupby(level=0).mean()
1849
1867
Animal
1850
1868
Falcon 370.0
1851
1869
Parrot 25.0
1852
1870
Name: Max Speed, dtype: float64
1871
+
1872
+ We can also group by the 'Type' level of the hierarchical index
1873
+ to get the mean speed for each type:
1874
+
1853
1875
>>> ser.groupby(level="Type").mean()
1854
1876
Type
1855
1877
Captive 210.0
@@ -1865,12 +1887,17 @@ def _set_name(
1865
1887
b 3
1866
1888
dtype: int64
1867
1889
1890
+ To include `NA` values in the group keys, set `dropna=False`:
1891
+
1868
1892
>>> ser.groupby(level=0, dropna=False).sum()
1869
1893
a 3
1870
1894
b 3
1871
1895
NaN 3
1872
1896
dtype: int64
1873
1897
1898
+ We can also group by a custom list with NaN values to handle
1899
+ missing group labels (Here: ["a", "b", "a", np.nan]):
1900
+
1874
1901
>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
1875
1902
>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
1876
1903
>>> ser.groupby(["a", "b", "a", np.nan]).mean()
0 commit comments