From f0c1bd2994d08501ca5c4717d2f0754f70762528 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 1 Aug 2013 23:58:00 -0400 Subject: [PATCH] BUG: fix truncation for astype(str) --- doc/source/release.rst | 2 ++ pandas/core/common.py | 5 +++-- pandas/core/series.py | 1 - pandas/lib.pyx | 24 +++++++++++++++++------- pandas/tests/test_series.py | 15 +++++++++++++++ 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 1cdc2818b5fae..ddf0ecfc52d61 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -117,6 +117,8 @@ pandas 0.13 set _ref_locs (:issue:`4403`) - Fixed an issue where hist subplots were being overwritten when they were called using the top level matplotlib API (:issue:`4408`) + - Fixed a bug where calling ``Series.astype(str)`` would truncate the string + (:issue:`4405`, :issue:`4437`) pandas 0.12 =========== diff --git a/pandas/core/common.py b/pandas/core/common.py index 7e835a5b8a7ac..a4206fe26172c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -3,7 +3,6 @@ """ import re -from datetime import datetime import codecs import csv @@ -1628,7 +1627,7 @@ def _is_sequence(x): _ensure_object = algos.ensure_object -def _astype_nansafe(arr, dtype, copy = True): +def _astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False """ if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) @@ -1659,6 +1658,8 @@ def _astype_nansafe(arr, dtype, copy = True): elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + elif issubclass(dtype.type, compat.string_types): + return lib.astype_str(arr.ravel()).reshape(arr.shape) if copy: return arr.astype(dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index 394a0e6cabbab..10b03ccd3a310 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas import compat import operator from distutils.version import LooseVersion import types diff --git a/pandas/lib.pyx b/pandas/lib.pyx index a80ad5b7d0208..031f2c56deb13 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -722,6 +722,16 @@ def astype_intsafe(ndarray[object] arr, new_dtype): return result +cpdef ndarray[object] astype_str(ndarray arr): + cdef: + Py_ssize_t i, n = arr.size + ndarray[object] result = np.empty(n, dtype=object) + + for i in range(n): + util.set_value_at(result, i, str(arr[i])) + + return result + def clean_index_list(list obj): ''' Utility used in pandas.core.index._ensure_index @@ -838,7 +848,7 @@ def write_csv_rows(list data, list data_index, int nlevels, list cols, object wr def create_hdf_rows_2d(ndarray indexer0, object dtype, ndarray[np.uint8_t, ndim=1] mask, - ndarray[np.uint8_t, ndim=1] searchable, + ndarray[np.uint8_t, ndim=1] searchable, list values): """ return a list of objects ready to be converted to rec-array format """ @@ -857,7 +867,7 @@ def create_hdf_rows_2d(ndarray indexer0, for i in range(n_indexer0): if not mask[i]: - + tup = PyTuple_New(tup_size) v = indexer0[i] @@ -869,7 +879,7 @@ def create_hdf_rows_2d(ndarray indexer0, v = values[b][i] if searchable[b]: v = v[0] - + PyTuple_SET_ITEM(tup, b+1, v) Py_INCREF(v) @@ -882,8 +892,8 @@ def create_hdf_rows_2d(ndarray indexer0, @cython.wraparound(False) def create_hdf_rows_3d(ndarray indexer0, ndarray indexer1, object dtype, - ndarray[np.uint8_t, ndim=2] mask, - ndarray[np.uint8_t, ndim=1] searchable, + ndarray[np.uint8_t, ndim=2] mask, + ndarray[np.uint8_t, ndim=1] searchable, list values): """ return a list of objects ready to be converted to rec-array format """ @@ -932,8 +942,8 @@ def create_hdf_rows_3d(ndarray indexer0, ndarray indexer1, @cython.wraparound(False) def create_hdf_rows_4d(ndarray indexer0, ndarray indexer1, ndarray indexer2, object dtype, - ndarray[np.uint8_t, ndim=3] mask, - ndarray[np.uint8_t, ndim=1] searchable, + ndarray[np.uint8_t, ndim=3] mask, + ndarray[np.uint8_t, ndim=1] searchable, list values): """ return a list of objects ready to be converted to rec-array format """ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index e117c624e7d53..43fe96dbd8c12 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4,6 +4,7 @@ import os import operator import unittest +import string import nose @@ -2029,6 +2030,7 @@ def test_timedelta64_functions(self): expected = Series([timedelta(1)],dtype='timedelta64[ns]') assert_series_equal(result,expected) + def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime @@ -3354,6 +3356,19 @@ def test_astype_datetimes(self): s = s.astype('O') self.assert_(s.dtype == np.object_) + def test_astype_str(self): + # GH4405 + digits = string.digits + s1 = Series([digits * 10, tm.rands(63), tm.rands(64), + tm.rands(1000)]) + s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) + types = (compat.text_type,) + (np.str_, np.unicode_) + for typ in types: + for s in (s1, s2): + res = s.astype(typ) + expec = s.map(compat.text_type) + assert_series_equal(res, expec) + def test_map(self): index, data = tm.getMixedTypeDict()