pandas-dev
diff --git a/‎Makefile‎
Lines changed: 1 addition & 0 deletions b/‎Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/categoricals.py‎
Lines changed: 13 additions & 0 deletions b/‎asv_bench/benchmarks/categoricals.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/frame_methods.py‎
Lines changed: 18 additions & 0 deletions b/‎asv_bench/benchmarks/frame_methods.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 20 additions & 1 deletion b/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎ci/environment-dev.yaml‎
Lines changed: 1 addition & 0 deletions b/‎ci/environment-dev.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/requirements_dev.txt‎
Lines changed: 1 addition & 0 deletions b/‎ci/requirements_dev.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/make.py‎
Lines changed: 15 additions & 2 deletions b/‎doc/make.py‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎doc/source/_static/favicon.ico‎
3.81 KB b/‎doc/source/_static/favicon.ico‎
3.81 KB
diff --git a/‎doc/source/advanced.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/source/advanced.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/basics.rst‎
Lines changed: 8 additions & 8 deletions b/‎doc/source/basics.rst‎
Lines changed: 8 additions & 8 deletions
@@ -23,3 +23,4 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+	python make.py spellcheck
@@ -193,3 +193,16 @@ def time_categorical_series_is_monotonic_increasing(self):
 
     def time_categorical_series_is_monotonic_decreasing(self):
         self.s.is_monotonic_decreasing
+
+
+class Contains(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        N = 10**5
+        self.ci = tm.makeCategoricalIndex(N)
+        self.cat = self.ci.categories[0]
+
+    def time_contains(self):
+        self.cat in self.ci
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
 
     def time_nsmallest(self, keep):
         self.df.nsmallest(100, 'A', keep=keep)
+
+
+class Describe(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = DataFrame({
+            'a': np.random.randint(0, 100, int(1e6)),
+            'b': np.random.randint(0, 100, int(1e6)),
+            'c': np.random.randint(0, 100, int(1e6))
+        })
+
+    def time_series_describe(self):
+        self.df['a'].describe()
+
+    def time_dataframe_describe(self):
+        self.df.describe()
@@ -5,7 +5,7 @@
 
 import numpy as np
 from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
-                    TimeGrouper, Categorical)
+                    TimeGrouper, Categorical, Timestamp)
 import pandas.util.testing as tm
 
 from .pandas_vb_common import setup  # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
         self.as_field_method()
 
 
+class RankWithTies(object):
+    # GH 21237
+    goal_time = 0.2
+    param_names = ['dtype', 'tie_method']
+    params = [['float64', 'float32', 'int64', 'datetime64'],
+              ['first', 'average', 'dense', 'min', 'max']]
+
+    def setup(self, dtype, tie_method):
+        N = 10**4
+        if dtype == 'datetime64':
+            data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
+        else:
+            data = np.array([1] * N, dtype=dtype)
+        self.df = DataFrame({'values': data, 'key': ['foo'] * N})
+
+    def time_rank_ties(self, dtype, tie_method):
+        self.df.groupby('key').rank(method=tie_method)
+
+
 class Float32(object):
     # GH 13335
     goal_time = 0.2
 
@@ -13,3 +13,4 @@ dependencies:
   - pytz
   - setuptools>=24.2.0
   - sphinx
+  - sphinxcontrib-spelling
@@ -9,3 +9,4 @@ python-dateutil>=2.5.0
 pytz
 setuptools>=24.2.0
 sphinx
+sphinxcontrib-spelling
@@ -224,8 +224,9 @@ def _sphinx_build(self, kind):
         --------
         >>> DocBuilder(num_jobs=4)._sphinx_build('html')
         """
-        if kind not in ('html', 'latex'):
-            raise ValueError('kind must be html or latex, not {}'.format(kind))
+        if kind not in ('html', 'latex', 'spelling'):
+            raise ValueError('kind must be html, latex or '
+                             'spelling, not {}'.format(kind))
 
         self._run_os('sphinx-build',
                      '-j{}'.format(self.num_jobs),
@@ -304,6 +305,18 @@ def zip_html(self):
                      '-q',
                      *fnames)
 
+    def spellcheck(self):
+        """Spell check the documentation."""
+        self._sphinx_build('spelling')
+        output_location = os.path.join('build', 'spelling', 'output.txt')
+        with open(output_location) as output:
+            lines = output.readlines()
+            if lines:
+                raise SyntaxError(
+                    'Found misspelled words.'
+                    ' Check pandas/doc/build/spelling/output.txt'
+                    ' for more details.')
+
 
 def main():
     cmds = [method for method in dir(DocBuilder) if not method.startswith('_')]
 
@@ -342,7 +342,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
                        columns=micolumns).sort_index().sort_index(axis=1)
    dfmi
 
-Basic multi-index slicing using slices, lists, and labels.
+Basic MultiIndex slicing using slices, lists, and labels.
 
 .. ipython:: python
 
@@ -1039,7 +1039,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
     KeyError: 'Cannot get right slice bound for non-unique label: 3'
 
 :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
-an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
+an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with
 :meth:`Index.is_unique`
 
 .. ipython:: python
 
@@ -168,7 +168,7 @@ either match on the *index* or *columns* via the **axis** keyword:
 
    df_orig = df
 
-Furthermore you can align a level of a multi-indexed DataFrame with a Series.
+Furthermore you can align a level of a MultiIndexed DataFrame with a Series.
 
 .. ipython:: python
 
@@ -593,7 +593,7 @@ categorical columns:
     frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
     frame.describe()
 
-This behaviour can be controlled by providing a list of types as ``include``/``exclude``
+This behavior can be controlled by providing a list of types as ``include``/``exclude``
 arguments. The special value ``all`` can also be used:
 
 .. ipython:: python
@@ -1034,7 +1034,7 @@ Passing a single function to ``.transform()`` with a ``Series`` will yield a sin
 Transform with multiple functions
 +++++++++++++++++++++++++++++++++
 
-Passing multiple functions will yield a column multi-indexed DataFrame.
+Passing multiple functions will yield a column MultiIndexed DataFrame.
 The first level will be the original frame column names; the second level
 will be the names of the transforming functions.
 
@@ -1060,7 +1060,7 @@ Passing a dict of functions will allow selective transforming per column.
 
    tsdf.transform({'A': np.abs, 'B': lambda x: x+1})
 
-Passing a dict of lists will generate a multi-indexed DataFrame with these
+Passing a dict of lists will generate a MultiIndexed DataFrame with these
 selective transforms.
 
 .. ipython:: python
@@ -1889,12 +1889,12 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
    df.nsmallest(5, ['a', 'c'])
 
 
-.. _basics.multi-index_sorting:
+.. _basics.multiindex_sorting:
 
-Sorting by a multi-index column
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sorting by a MultiIndex column
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-You must be explicit about sorting when the column is a multi-index, and fully specify
+You must be explicit about sorting when the column is a MultiIndex, and fully specify
 all levels to ``by``.
 
 .. ipython:: python