pandas-dev · JonasAbernot · Mar 14, 2015 · Mar 14, 2015 · Mar 15, 2015 · Mar 12, 2015
diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
@@ -14,11 +14,13 @@ Highlights include:
 - Backwards incompatible change to ``Timedelta`` to conform the ``.seconds`` attribute with ``datetime.timedelta``, see :ref:`here <whatsnew_0160.api_breaking.timedelta>`
 - Changes to the ``.loc`` slicing API to conform with the behavior of ``.ix`` see :ref:`here <whatsnew_0160.api_breaking.indexing>`
 - Changes to the default for ordering in the ``Categorical`` constructor, see :ref:`here <whatsnew_0160.api_breaking.categorical>`
+-  Enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0160.enhancements.string>`
 
 Check the :ref:`API Changes <whatsnew_0160.api>` and :ref:`deprecations <whatsnew_0160.deprecations>` before updating.
 
 .. contents:: What's new in v0.16.0
     :local:
+    :backlinks: none
 
 
 .. _whatsnew_0160.enhancements:
@@ -120,6 +122,45 @@ from a ``scipy.sparse.coo_matrix``:
    ss = SparseSeries.from_coo(A)
    ss
 
+.. _whatsnew_0160.enhancements.string:
+
+String Methods Enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9282`, :issue:`9352`, :issue:`9386`, :issue:`9387`, :issue:`9439`)
+
+=============  =============  =============  ===============    ===============
+..             ..             Methods        ..                 ..
+=============  =============  =============  ===============    ===============
+``isalnum()``  ``isalpha()``  ``isdigit()``  ``isdigit()``      ``isspace()``
+``islower()``  ``isupper()``  ``istitle()``  ``isnumeric()``    ``isdecimal()``
+``find()``     ``rfind()``    ``ljust()``    ``rjust()``        ``zfill()``
+=============  =============  =============  ===============    ===============
+
+.. ipython:: python
+
+   s = Series(['abcd', '3456', 'EFGH'])
+   s.str.isalpha()
+   s.str.find('ab')
+
+
+- :meth:`Series.str.pad` and :meth:`Series.str.center` now accept ``fillchar`` option to specify filling character (:issue:`9352`)
+
+.. ipython:: python
+
+   s = Series(['12', '300', '25'])
+   s.str.pad(5, fillchar='_')
+
+
+- Added :meth:`Series.str.slice_replace`, which previously raised ``NotImplementedError`` (:issue:`8888`)
+
+.. ipython:: python
+
+   s = Series(['ABCD', 'EFGH', 'IJK'])
+   s.str.slice_replace(1, 3, 'X')
+   # replaced with empty char
+   s.str.slice_replace(0, 1)
+
 .. _whatsnew_0160.enhancements.other:
 
 Other enhancements
@@ -137,23 +178,15 @@ Other enhancements
 - Allow Stata files to be read incrementally with an iterator; support for long strings in Stata files. See the docs :ref:`here<io.stata_reader>`. (issue:`9493`:)
 - Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`)
 - Added time interval selection in ``get_data_yahoo`` (:issue:`9071`)
-- Added ``Series.str.slice_replace()``, which previously raised ``NotImplementedError`` (:issue:`8888`)
 - Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`)
 - ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`)
 - Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`)
 - ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`)
 - SQL code now safely escapes table and column names (:issue:`8986`)
 
 - Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
-- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
-  ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)
-
-- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`)
-
 - ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`).
 - ``Index.asof`` now works on all index types (:issue:`9258`).
-
-- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`)
 - The ``read_excel()`` function's :ref:`sheetname <_io.specifying_sheets>` argument now accepts a list and ``None``, to get multiple or all sheets respectively.  If more than one sheet is specified, a dictionary is returned. (:issue:`9450`)
 
   .. code-block:: python
@@ -162,9 +195,6 @@ Other enhancements
      pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3])
 
 - A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`)
-- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`)
-- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`)
-- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`)
 - Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`)
 - Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`)
 - Added ``normalize`` option for ``Timestamp`` to normalized to midnight (:issue:`8794`)
@@ -454,6 +484,15 @@ Other API Changes
 
   To reproduce the old behavior, simply add more precision to the label (e.g., use ``2000-02-01`` instead of ``2000-02``).
 
+- A Spurious ``SettingWithCopy`` Warning was generated when setting a new item in a frame in some cases (:issue:`8730`)
+
+  The following would previously report a ``SettingWithCopy`` Warning.
+
+  .. ipython:: python
+
+     df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])})
+     df2 = df1[['x']]
+     df2['y'] = ['g', 'h', 'i']
 
 .. _whatsnew_0160.deprecations:
 
@@ -505,6 +544,7 @@ Performance Improvements
 - Performance improvements in ``MultiIndex.sortlevel`` (:issue:`9445`)
 - Performance and memory usage improvements in ``DataFrame.duplicated`` (:issue:`9398`)
 - Cythonized ``Period`` (:issue:`9440`)
+- Decreased memory usage on ``to_hdf`` (:issue:`9648`)
 
 .. _whatsnew_0160.bug_fixes:
 
@@ -567,3 +607,39 @@ Bug Fixes
 - Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`)
 - Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`)
 - Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`)
+- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`).
+- ``Series`` number formatting inconsistent when truncated (:issue:`8532`).
+
+  Previous Behavior
+
+  .. code-block:: python
+
+    In [2]: pd.options.display.max_rows = 10
+    In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10)
+    In [4]: s
+    Out[4]:
+    0    1
+    1    1
+    2    1
+    ...
+    127    0.9999
+    128    1.0000
+    129    1.0000
+    Length: 130, dtype: float64
+
+  New Behavior
+
+  .. code-block:: python
+
+    0      1.0000
+    1      1.0000
+    2      1.0000
+    3      1.0000
+    4      1.0000
+    ...
+    125    1.0000
+    126    1.0000
+    127    0.9999
+    128    1.0000
+    129    1.0000
+    dtype: float64
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -129,62 +129,80 @@ def to_string(self):
 
 class SeriesFormatter(object):
 
-    def __init__(self, series, buf=None, header=True, length=True,
-                 na_rep='NaN', name=False, float_format=None, dtype=True):
+    def __init__(self, series, buf=None, length=True, header=True,
+                 na_rep='NaN', name=False, float_format=None, dtype=True,
+                 max_rows=None):
         self.series = series
         self.buf = buf if buf is not None else StringIO()
         self.name = name
         self.na_rep = na_rep
-        self.length = length
         self.header = header
+        self.length = length
+        self.max_rows = max_rows
 
         if float_format is None:
             float_format = get_option("display.float_format")
         self.float_format = float_format
         self.dtype = dtype
 
+        self._chk_truncate()
+
+    def _chk_truncate(self):
+        from pandas.tools.merge import concat
+        max_rows = self.max_rows
+        truncate_v = max_rows and (len(self.series) > max_rows)
+        series = self.series
+        if truncate_v:
+            if max_rows == 1:
+                row_num = max_rows
+                series = series.iloc[:max_rows]
+            else:
+                row_num = max_rows // 2
+                series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
+            self.tr_row_num = row_num
+        self.tr_series = series
+        self.truncate_v = truncate_v
+
     def _get_footer(self):
+        name = self.series.name
         footer = u('')
 
-        if self.name:
-            if getattr(self.series.index, 'freq', None):
-                footer += 'Freq: %s' % self.series.index.freqstr
+        if getattr(self.series.index, 'freq', None) is not None:
+            footer += 'Freq: %s' % self.series.index.freqstr
 
-            if footer and self.series.name is not None:
-                # categories have already a comma + linebreak
-                if not com.is_categorical_dtype(self.series.dtype):
-                    footer += ', '
+        if self.name is not False and name is not None:
+            if footer:
+                footer += ', '
 
-            series_name = com.pprint_thing(self.series.name,
+            series_name = com.pprint_thing(name,
                                            escape_chars=('\t', '\r', '\n'))
             footer += ("Name: %s" %
-                       series_name) if self.series.name is not None else ""
+                       series_name) if name is not None else ""
 
         if self.length:
             if footer:
                 footer += ', '
             footer += 'Length: %d' % len(self.series)
 
-        # TODO: in tidy_repr, with freq index, no dtype is shown -> also include a guard here?
-        if self.dtype:
-            name = getattr(self.series.dtype, 'name', None)
+        if self.dtype is not False and self.dtype is not None:
+            name = getattr(self.tr_series.dtype, 'name', None)
             if name:
                 if footer:
                     footer += ', '
                 footer += 'dtype: %s' % com.pprint_thing(name)
 
         # level infos are added to the end and in a new line, like it is done for Categoricals
         # Only added when we request a name
-        if self.name and com.is_categorical_dtype(self.series.dtype):
-            level_info = self.series.values._repr_categories_info()
+        if name and com.is_categorical_dtype(self.tr_series.dtype):
+            level_info = self.tr_series.values._repr_categories_info()
             if footer:
                 footer += "\n"
             footer += level_info
 
         return compat.text_type(footer)
 
     def _get_formatted_index(self):
-        index = self.series.index
+        index = self.tr_series.index
         is_multi = isinstance(index, MultiIndex)
 
         if is_multi:
@@ -196,35 +214,44 @@ def _get_formatted_index(self):
         return fmt_index, have_header
 
     def _get_formatted_values(self):
-        return format_array(self.series.get_values(), None,
+        return format_array(self.tr_series.get_values(), None,
                             float_format=self.float_format,
                             na_rep=self.na_rep)
 
     def to_string(self):
-        series = self.series
+        series = self.tr_series
+        footer = self._get_footer()
 
         if len(series) == 0:
-            return u('')
+            return 'Series([], ' + footer + ')'
 
         fmt_index, have_header = self._get_formatted_index()
         fmt_values = self._get_formatted_values()
 
-        maxlen = max(len(x) for x in fmt_index)
+        maxlen = max(len(x) for x in fmt_index)  # max index len
         pad_space = min(maxlen, 60)
 
-        result = ['%s   %s'] * len(fmt_values)
-        for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)):
-            idx = k.ljust(pad_space)
-            result[i] = result[i] % (idx, v)
+        if self.truncate_v:
+            n_header_rows = 0
+            row_num = self.tr_row_num
+            width = len(fmt_values[row_num-1])
+            if width > 3:
+                dot_str = '...'
+            else:
+                dot_str = '..'
+            dot_str = dot_str.center(width)
+            fmt_values.insert(row_num + n_header_rows, dot_str)
+            fmt_index.insert(row_num + 1, '')
+
+        result = adjoin(3, *[fmt_index[1:], fmt_values])
 
         if self.header and have_header:
-            result.insert(0, fmt_index[0])
+            result = fmt_index[0] + '\n' + result
 
-        footer = self._get_footer()
         if footer:
-            result.append(footer)
+            result += '\n' + footer
 
-        return compat.text_type(u('\n').join(result))
+        return compat.text_type(u('').join(result))
 
 
 def _strlen_func():

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1265,6 +1265,14 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
             except:
                 pass
 
+            # we might be a false positive
+            try:
+                if self.is_copy().shape == self.shape:
+                    self.is_copy = None
+                    return
+            except:
+                pass
+
             # a custom message
             if isinstance(self.is_copy, string_types):
                 t = self.is_copy
@@ -1344,8 +1352,9 @@ def take(self, indices, axis=0, convert=True, is_copy=True):
         result = self._constructor(new_data).__finalize__(self)
 
         # maybe set copy if we didn't actually change the index
-        if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)):
-            result._set_is_copy(self)
+        if is_copy:
+            if not result._get_axis(axis).equals(self._get_axis(axis)):
+                result._set_is_copy(self)
 
         return result
 
@@ -2005,6 +2014,14 @@ def __setattr__(self, name, value):
     #----------------------------------------------------------------------
     # Consolidation of internals
 
+    def _protect_consolidate(self, f):
+        """ consolidate _data. if the blocks have changed, then clear the cache """
+        blocks_before = len(self._data.blocks)
+        result = f()
+        if len(self._data.blocks) != blocks_before:
+            self._clear_item_cache()
+        return result
+
     def _consolidate_inplace(self):
         f = lambda: self._data.consolidate()
         self._data = self._protect_consolidate(f)
@@ -2029,8 +2046,6 @@ def consolidate(self, inplace=False):
         else:
             f = lambda: self._data.consolidate()
             cons_data = self._protect_consolidate(f)
-            if cons_data is self._data:
-                cons_data = cons_data.copy()
             return self._constructor(cons_data).__finalize__(self)
 
     @property
@@ -2066,13 +2081,6 @@ def _check_inplace_setting(self, value):
 
         return True
 
-    def _protect_consolidate(self, f):
-        blocks_before = len(self._data.blocks)
-        result = f()
-        if len(self._data.blocks) != blocks_before:
-            self._clear_item_cache()
-        return result
-
     def _get_numeric_data(self):
         return self._constructor(
             self._data.get_numeric_data()).__finalize__(self)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1752,7 +1752,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
         if self.is_categorical_astype(dtype):
             values = self.values
         else:
-            values = np.array(self.values).astype(dtype)
+            values = np.asarray(self.values).astype(dtype, copy=False)
 
         if copy:
             values = values.copy()