diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index a0f9383336940..d32c19d6d0bb8 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,6 +1,7 @@ from .pandas_vb_common import * import string + class concat_categorical(object): goal_time = 0.2 @@ -26,6 +27,7 @@ def time_value_counts(self): def time_value_counts_dropna(self): self.ts.value_counts(dropna=True) + class categorical_constructor(object): goal_time = 0.2 @@ -43,3 +45,16 @@ def time_regular_constructor(self): def time_fastpath(self): Categorical(self.codes, self.cat_idx, fastpath=True) + +class categorical_rendering(object): + goal_time = 3e-3 + + def setup(self): + n = 1000 + items = [str(i) for i in range(n)] + s = pd.Series(items, dtype='category') + df = pd.DataFrame({'C': s, 'data': np.random.randn(n)}) + self.data = df[df.C == '20'] + + def time_rendering(self): + str(self.data.C) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 94f66f8cfc672..bd12050efbd12 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -3,7 +3,7 @@ v0.17.1 (November ??, 2015) --------------------------- -This is a minor bug-fix release from 0.17.0 and includes a a large number of +This is a minor bug-fix release from 0.17.0 and includes a large number of bug fixes along several new features, enhancements, and performance improvements. We recommend that all users upgrade to this version. @@ -50,6 +50,8 @@ Performance Improvements .. _whatsnew_0171.bug_fixes: +- Performance bug in ``Categorical._repr_categories`` was rendering string before chopping them for display (:issue: `11305`) + Bug Fixes ~~~~~~~~~ @@ -65,7 +67,6 @@ Bug Fixes - - Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 9decd5e212cbf..8068ad785b6d8 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1389,12 +1389,13 @@ def _repr_categories(self): max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) from pandas.core import format as fmt - category_strs = fmt.format_array(self.categories, None) - if len(category_strs) > max_categories: + if len(self.categories) > max_categories: num = max_categories // 2 - head = category_strs[:num] - tail = category_strs[-(max_categories - num):] + head = fmt.format_array(self.categories[:num], None) + tail = fmt.format_array(self.categories[-num:], None) category_strs = head + ["..."] + tail + else: + category_strs = fmt.format_array(self.categories, None) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs]