Skip to content

Commit 0d986fc

Browse files
authored
Merge branch 'master' into docstring_rsplit
2 parents 773cfbf + 6131a59 commit 0d986fc

File tree

168 files changed

+5222
-1357
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+5222
-1357
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ doc:
2323
cd doc; \
2424
python make.py clean; \
2525
python make.py html
26+
python make.py spellcheck

asv_bench/benchmarks/categoricals.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,16 @@ def time_categorical_series_is_monotonic_increasing(self):
193193

194194
def time_categorical_series_is_monotonic_decreasing(self):
195195
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
self.ci = tm.makeCategoricalIndex(N)
205+
self.cat = self.ci.categories[0]
206+
207+
def time_contains(self):
208+
self.cat in self.ci

asv_bench/benchmarks/frame_methods.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
512512

513513
def time_nsmallest(self, keep):
514514
self.df.nsmallest(100, 'A', keep=keep)
515+
516+
517+
class Describe(object):
518+
519+
goal_time = 0.2
520+
521+
def setup(self):
522+
self.df = DataFrame({
523+
'a': np.random.randint(0, 100, int(1e6)),
524+
'b': np.random.randint(0, 100, int(1e6)),
525+
'c': np.random.randint(0, 100, int(1e6))
526+
})
527+
528+
def time_series_describe(self):
529+
self.df['a'].describe()
530+
531+
def time_dataframe_describe(self):
532+
self.df.describe()

asv_bench/benchmarks/groupby.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
8-
TimeGrouper, Categorical)
8+
TimeGrouper, Categorical, Timestamp)
99
import pandas.util.testing as tm
1010

1111
from .pandas_vb_common import setup # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
385385
self.as_field_method()
386386

387387

388+
class RankWithTies(object):
389+
# GH 21237
390+
goal_time = 0.2
391+
param_names = ['dtype', 'tie_method']
392+
params = [['float64', 'float32', 'int64', 'datetime64'],
393+
['first', 'average', 'dense', 'min', 'max']]
394+
395+
def setup(self, dtype, tie_method):
396+
N = 10**4
397+
if dtype == 'datetime64':
398+
data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
399+
else:
400+
data = np.array([1] * N, dtype=dtype)
401+
self.df = DataFrame({'values': data, 'key': ['foo'] * N})
402+
403+
def time_rank_ties(self, dtype, tie_method):
404+
self.df.groupby('key').rank(method=tie_method)
405+
406+
388407
class Float32(object):
389408
# GH 13335
390409
goal_time = 0.2

ci/environment-dev.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ dependencies:
1313
- pytz
1414
- setuptools>=24.2.0
1515
- sphinx
16+
- sphinxcontrib-spelling

ci/requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ python-dateutil>=2.5.0
99
pytz
1010
setuptools>=24.2.0
1111
sphinx
12+
sphinxcontrib-spelling

doc/make.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,9 @@ def _sphinx_build(self, kind):
224224
--------
225225
>>> DocBuilder(num_jobs=4)._sphinx_build('html')
226226
"""
227-
if kind not in ('html', 'latex'):
228-
raise ValueError('kind must be html or latex, not {}'.format(kind))
227+
if kind not in ('html', 'latex', 'spelling'):
228+
raise ValueError('kind must be html, latex or '
229+
'spelling, not {}'.format(kind))
229230

230231
self._run_os('sphinx-build',
231232
'-j{}'.format(self.num_jobs),
@@ -304,6 +305,18 @@ def zip_html(self):
304305
'-q',
305306
*fnames)
306307

308+
def spellcheck(self):
309+
"""Spell check the documentation."""
310+
self._sphinx_build('spelling')
311+
output_location = os.path.join('build', 'spelling', 'output.txt')
312+
with open(output_location) as output:
313+
lines = output.readlines()
314+
if lines:
315+
raise SyntaxError(
316+
'Found misspelled words.'
317+
' Check pandas/doc/build/spelling/output.txt'
318+
' for more details.')
319+
307320

308321
def main():
309322
cmds = [method for method in dir(DocBuilder) if not method.startswith('_')]

doc/source/_static/favicon.ico

3.81 KB
Binary file not shown.

doc/source/advanced.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
342342
columns=micolumns).sort_index().sort_index(axis=1)
343343
dfmi
344344
345-
Basic multi-index slicing using slices, lists, and labels.
345+
Basic MultiIndex slicing using slices, lists, and labels.
346346

347347
.. ipython:: python
348348
@@ -1039,7 +1039,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
10391039
KeyError: 'Cannot get right slice bound for non-unique label: 3'
10401040
10411041
:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
1042-
an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
1042+
an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with
10431043
:meth:`Index.is_unique`
10441044
10451045
.. ipython:: python

doc/source/basics.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ either match on the *index* or *columns* via the **axis** keyword:
168168
169169
df_orig = df
170170
171-
Furthermore you can align a level of a multi-indexed DataFrame with a Series.
171+
Furthermore you can align a level of a MultiIndexed DataFrame with a Series.
172172

173173
.. ipython:: python
174174
@@ -593,7 +593,7 @@ categorical columns:
593593
frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
594594
frame.describe()
595595
596-
This behaviour can be controlled by providing a list of types as ``include``/``exclude``
596+
This behavior can be controlled by providing a list of types as ``include``/``exclude``
597597
arguments. The special value ``all`` can also be used:
598598

599599
.. ipython:: python
@@ -1034,7 +1034,7 @@ Passing a single function to ``.transform()`` with a ``Series`` will yield a sin
10341034
Transform with multiple functions
10351035
+++++++++++++++++++++++++++++++++
10361036

1037-
Passing multiple functions will yield a column multi-indexed DataFrame.
1037+
Passing multiple functions will yield a column MultiIndexed DataFrame.
10381038
The first level will be the original frame column names; the second level
10391039
will be the names of the transforming functions.
10401040

@@ -1060,7 +1060,7 @@ Passing a dict of functions will allow selective transforming per column.
10601060
10611061
tsdf.transform({'A': np.abs, 'B': lambda x: x+1})
10621062
1063-
Passing a dict of lists will generate a multi-indexed DataFrame with these
1063+
Passing a dict of lists will generate a MultiIndexed DataFrame with these
10641064
selective transforms.
10651065

10661066
.. ipython:: python
@@ -1889,12 +1889,12 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
18891889
df.nsmallest(5, ['a', 'c'])
18901890
18911891
1892-
.. _basics.multi-index_sorting:
1892+
.. _basics.multiindex_sorting:
18931893

1894-
Sorting by a multi-index column
1895-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1894+
Sorting by a MultiIndex column
1895+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18961896

1897-
You must be explicit about sorting when the column is a multi-index, and fully specify
1897+
You must be explicit about sorting when the column is a MultiIndex, and fully specify
18981898
all levels to ``by``.
18991899

19001900
.. ipython:: python

0 commit comments

Comments
 (0)