Skip to content

Commit 009097f

Browse files
committed
DOC: update cut/qcut docstrings
* add some clarification about the useage of bins/labels and that they end up as categories. * Update the examples to produce the current output with the new categoricals.
1 parent 66cdd66 commit 009097f

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

pandas/tools/tile.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
3434
right == True (the default), then the bins [1,2,3,4] indicate
3535
(1,2], (2,3], (3,4].
3636
labels : array or boolean, default None
37-
Labels to use for bins, or False to return integer bin labels.
37+
Used as labels for the resulting bins. Must be of the same length as the resulting
38+
bins. If False, return only integer indicators of the bins.
3839
retbins : bool, optional
3940
Whether to return the bins or not. Can be useful if bins is given
4041
as a scalar.
@@ -47,7 +48,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
4748
-------
4849
out : Categorical or Series or array of integers if labels is False
4950
The return type (Categorical or Series) depends on the input: a Series of type category if
50-
input is a Series else Categorical.
51+
input is a Series else Categorical. Bins are represented as categories when categorical
52+
data is returned.
5153
bins : ndarray of floats
5254
Returned only if `retbins` is True.
5355
@@ -63,12 +65,15 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
6365
6466
Examples
6567
--------
66-
>>> cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
67-
(array([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533],
68-
(6.533, 9.7], (0.191, 3.367]], dtype=object),
69-
array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))
70-
>>> cut(np.ones(5), 4, labels=False)
71-
array([2, 2, 2, 2, 2])
68+
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
69+
([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533], (6.533, 9.7], (0.191, 3.367]]
70+
Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]],
71+
array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))
72+
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, labels=["good","medium","bad"])
73+
[good, good, good, medium, bad, good]
74+
Categories (3, object): [good < medium < bad]
75+
>>> pd.cut(np.ones(5), 4, labels=False)
76+
array([1, 1, 1, 1, 1], dtype=int64)
7277
"""
7378
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
7479
if not np.iterable(bins):
@@ -126,7 +131,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
126131
Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
127132
array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles
128133
labels : array or boolean, default None
129-
Labels to use for bin edges, or False to return integer bin labels
134+
Used as labels for the resulting bins. Must be of the same length as the resulting
135+
bins. If False, return only integer indicators of the bins.
130136
retbins : bool, optional
131137
Whether to return the bins or not. Can be useful if bins is given
132138
as a scalar.
@@ -135,15 +141,27 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
135141
136142
Returns
137143
-------
138-
cat : Categorical or Series
139-
Returns a Series of type category if input is a Series else Categorical.
144+
out : Categorical or Series or array of integers if labels is False
145+
The return type (Categorical or Series) depends on the input: a Series of type category if
146+
input is a Series else Categorical. Bins are represented as categories when categorical
147+
data is returned.
148+
bins : ndarray of floats
149+
Returned only if `retbins` is True.
140150
141151
Notes
142152
-----
143153
Out of bounds values will be NA in the resulting Categorical object
144154
145155
Examples
146156
--------
157+
>>> pd.qcut(range(5), 4)
158+
[[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]]
159+
Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]]
160+
>>> pd.qcut(range(5), 3, labels=["good","medium","bad"])
161+
[good, good, medium, bad, bad]
162+
Categories (3, object): [good < medium < bad]
163+
>>> pd.qcut(range(5), 4, labels=False)
164+
array([0, 0, 1, 2, 3], dtype=int64)
147165
"""
148166
if com.is_integer(q):
149167
quantiles = np.linspace(0, 1, q + 1)

0 commit comments

Comments
 (0)