@@ -1815,14 +1815,30 @@ def _set_name(
1815
1815
Parrot 30.0
1816
1816
Parrot 20.0
1817
1817
Name: Max Speed, dtype: float64
1818
+
1819
+ We can pass a list of values to group the Series data by custom labels:
1820
+
1818
1821
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1819
1822
a 210.0
1820
1823
b 185.0
1821
1824
Name: Max Speed, dtype: float64
1825
+
1826
+ Grouping by numeric labels yields similar results:
1827
+
1828
+ >>> ser.groupby([0, 1, 0, 1]).mean()
1829
+ 0 210.0
1830
+ 1 185.0
1831
+ Name: Max Speed, dtype: float64
1832
+
1833
+ We can group by a level of the index:
1834
+
1822
1835
>>> ser.groupby(level=0).mean()
1823
1836
Falcon 370.0
1824
1837
Parrot 25.0
1825
1838
Name: Max Speed, dtype: float64
1839
+
1840
+ We can group by a condition applied to the Series values:
1841
+
1826
1842
>>> ser.groupby(ser > 100).mean()
1827
1843
Max Speed
1828
1844
False 25.0
@@ -1831,7 +1847,7 @@ def _set_name(
1831
1847
1832
1848
**Grouping by Indexes**
1833
1849
1834
- We can groupb different levels of a hierarchical index
1850
+ We can groupby different levels of a hierarchical index
1835
1851
using the `level` parameter:
1836
1852
1837
1853
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
@@ -1845,11 +1861,16 @@ def _set_name(
1845
1861
Parrot Captive 30.0
1846
1862
Wild 20.0
1847
1863
Name: Max Speed, dtype: float64
1864
+
1848
1865
>>> ser.groupby(level=0).mean()
1849
1866
Animal
1850
1867
Falcon 370.0
1851
1868
Parrot 25.0
1852
1869
Name: Max Speed, dtype: float64
1870
+
1871
+ We can also group by the 'Type' level of the hierarchical index
1872
+ to get the mean speed for each type:
1873
+
1853
1874
>>> ser.groupby(level="Type").mean()
1854
1875
Type
1855
1876
Captive 210.0
@@ -1865,12 +1886,17 @@ def _set_name(
1865
1886
b 3
1866
1887
dtype: int64
1867
1888
1889
+ To include `NA` values in the group keys, set `dropna=False`:
1890
+
1868
1891
>>> ser.groupby(level=0, dropna=False).sum()
1869
1892
a 3
1870
1893
b 3
1871
1894
NaN 3
1872
1895
dtype: int64
1873
1896
1897
+ We can also group by a custom list with NaN values to handle
1898
+ missing group labels:
1899
+
1874
1900
>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
1875
1901
>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
1876
1902
>>> ser.groupby(["a", "b", "a", np.nan]).mean()
0 commit comments