From 0902bb80a120a8d0e7a3666e36e1a75fa93a4cbb Mon Sep 17 00:00:00 2001 From: rockg Date: Thu, 21 Jan 2016 20:55:15 -0500 Subject: [PATCH] PEP: pandas/sparse cleanup --- pandas/sparse/api.py | 2 +- pandas/sparse/array.py | 107 +++++---- pandas/sparse/frame.py | 176 +++++++-------- pandas/sparse/panel.py | 62 +++--- pandas/sparse/scipy_sparse.py | 54 ++--- pandas/sparse/series.py | 126 ++++++----- pandas/sparse/tests/test_array.py | 17 +- pandas/sparse/tests/test_libsparse.py | 111 ++++------ pandas/sparse/tests/test_sparse.py | 300 +++++++++++++------------- 9 files changed, 456 insertions(+), 499 deletions(-) diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py index 230ad15937c92..b4d874e6a1ab9 100644 --- a/pandas/sparse/api.py +++ b/pandas/sparse/api.py @@ -1,5 +1,5 @@ # pylint: disable=W0611 - +# flake8: noqa from pandas.sparse.array import SparseArray from pandas.sparse.list import SparseList from pandas.sparse.series import SparseSeries, SparseTimeSeries diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index b40a23fb4556a..a370bcf42fbaa 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -19,12 +19,13 @@ import pandas.core.ops as ops -def _arith_method(op, name, str_rep=None, default_axis=None, - fill_zeros=None, **eval_kwargs): +def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, + **eval_kwargs): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + def wrapper(self, other): if isinstance(other, np.ndarray): if len(self) != len(other): @@ -37,14 +38,14 @@ def wrapper(self, other): else: return _sparse_array_op(self, other, op, name) elif np.isscalar(other): - new_fill_value = op(np.float64(self.fill_value), - np.float64(other)) + new_fill_value = op(np.float64(self.fill_value), np.float64(other)) return SparseArray(op(self.sp_values, other), sparse_index=self.sp_index, fill_value=new_fill_value) else: # pragma: no cover raise TypeError('operation with %s not supported' % type(other)) + if name.startswith("__"): name = name[2:-2] wrapper.__name__ = name @@ -74,44 +75,38 @@ def _sparse_array_op(left, right, op, name): def _sparse_nanop(this, other, name): sparse_op = getattr(splib, 'sparse_nan%s' % name) - result, result_index = sparse_op(this.sp_values, - this.sp_index, - other.sp_values, - other.sp_index) + result, result_index = sparse_op(this.sp_values, this.sp_index, + other.sp_values, other.sp_index) return result, result_index def _sparse_fillop(this, other, name): sparse_op = getattr(splib, 'sparse_%s' % name) - result, result_index = sparse_op(this.sp_values, - this.sp_index, - this.fill_value, - other.sp_values, - other.sp_index, - other.fill_value) + result, result_index = sparse_op(this.sp_values, this.sp_index, + this.fill_value, other.sp_values, + other.sp_index, other.fill_value) return result, result_index class SparseArray(PandasObject, np.ndarray): - """Data structure for labeled, sparse floating point data -Parameters ----------- -data : {array-like, Series, SparseSeries, dict} -kind : {'block', 'integer'} -fill_value : float - Defaults to NaN (code for missing) -sparse_index : {BlockIndex, IntIndex}, optional - Only if you have one. Mainly used internally - -Notes ------ -SparseArray objects are immutable via the typical Python means. If you -must change values, convert to dense, make your changes, then convert back -to sparse + Parameters + ---------- + data : {array-like, Series, SparseSeries, dict} + kind : {'block', 'integer'} + fill_value : float + Defaults to NaN (code for missing) + sparse_index : {BlockIndex, IntIndex}, optional + Only if you have one. Mainly used internally + + Notes + ----- + SparseArray objects are immutable via the typical Python means. If you + must change values, convert to dense, make your changes, then convert back + to sparse """ __array_priority__ = 15 _typ = 'array' @@ -120,9 +115,8 @@ class SparseArray(PandasObject, np.ndarray): sp_index = None fill_value = None - def __new__( - cls, data, sparse_index=None, index=None, kind='integer', fill_value=None, - dtype=np.float64, copy=False): + def __new__(cls, data, sparse_index=None, index=None, kind='integer', + fill_value=None, dtype=np.float64, copy=False): if index is not None: if data is None: @@ -164,7 +158,8 @@ def __new__( subarr = np.asarray(values, dtype=dtype) # if we have a bool type, make sure that we have a bool fill_value - if (dtype is not None and issubclass(dtype.type, np.bool_)) or (data is not None and lib.is_bool_array(subarr)): + if ((dtype is not None and issubclass(dtype.type, np.bool_)) or + (data is not None and lib.is_bool_array(subarr))): if np.isnan(fill_value) or not fill_value: fill_value = False else: @@ -284,9 +279,9 @@ def __getitem__(self, key): else: if isinstance(key, SparseArray): key = np.asarray(key) - if hasattr(key,'__len__') and len(self) != len(key): + if hasattr(key, '__len__') and len(self) != len(key): indices = self.sp_index - if hasattr(indices,'to_int_index'): + if hasattr(indices, 'to_int_index'): indices = indices.to_int_index() data_slice = self.values.take(indices.indices)[key] else: @@ -355,7 +350,8 @@ def __setitem__(self, key, value): # if com.is_integer(key): # self.values[key] = value # else: - # raise Exception("SparseArray does not support seting non-scalars via setitem") + # raise Exception("SparseArray does not support seting non-scalars + # via setitem") raise TypeError( "SparseArray does not support item assignment via setitem") @@ -364,16 +360,17 @@ def __setslice__(self, i, j, value): i = 0 if j < 0: j = 0 - slobj = slice(i, j) + slobj = slice(i, j) # noqa # if not np.isscalar(value): - # raise Exception("SparseArray does not support seting non-scalars via slices") + # raise Exception("SparseArray does not support seting non-scalars + # via slices") - #x = self.values - #x[slobj] = value - #self.values = x - raise TypeError( - "SparseArray does not support item assignment via slices") + # x = self.values + # x[slobj] = value + # self.values = x + raise TypeError("SparseArray does not support item assignment via " + "slices") def astype(self, dtype=None): """ @@ -394,8 +391,7 @@ def copy(self, deep=True): else: values = self.sp_values return SparseArray(values, sparse_index=self.sp_index, - dtype=self.dtype, - fill_value=self.fill_value) + dtype=self.dtype, fill_value=self.fill_value) def count(self): """ @@ -453,8 +449,7 @@ def cumsum(self, axis=0, dtype=None, out=None): if com.notnull(self.fill_value): return self.to_dense().cumsum() # TODO: what if sp_values contains NaN?? - return SparseArray(self.sp_values.cumsum(), - sparse_index=self.sp_index, + return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index, fill_value=self.fill_value) def mean(self, axis=None, dtype=None, out=None): @@ -485,8 +480,8 @@ def _maybe_to_dense(obj): def _maybe_to_sparse(array): if isinstance(array, com.ABCSparseSeries): - array = SparseArray( - array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True) + array = SparseArray(array.values, sparse_index=array.sp_index, + fill_value=array.fill_value, copy=True) if not isinstance(array, SparseArray): array = com._values_from_object(array) return array @@ -538,15 +533,15 @@ def make_sparse(arr, kind='block', fill_value=nan): sparsified_values = arr[mask] return sparsified_values, index -ops.add_special_arithmetic_methods(SparseArray, - arith_method=_arith_method, - use_numexpr=False) +ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, + use_numexpr=False) def _concat_compat(to_concat, axis=0): """ - provide concatenation of an sparse/dense array of arrays each of which is a single dtype + provide concatenation of an sparse/dense array of arrays each of which is a + single dtype Parameters ---------- @@ -570,10 +565,10 @@ def convert_sparse(x, axis): typs = com.get_dtype_kinds(to_concat) # we have more than one type here, so densify and regular concat - to_concat = [ convert_sparse(x, axis) for x in to_concat ] - result = np.concatenate(to_concat,axis=axis) + to_concat = [convert_sparse(x, axis) for x in to_concat] + result = np.concatenate(to_concat, axis=axis) - if not len(typs-set(['sparse','f','i'])): + if not len(typs - set(['sparse', 'f', 'i'])): # we can remain sparse result = SparseArray(result.ravel()) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 5e3f59a24e5a1..25f1f16831317 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,21 +6,18 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan -from pandas.compat import range, lmap, map +from pandas.compat import lmap from pandas import compat import numpy as np -from pandas.core.common import (isnull, notnull, _pickle_array, - _unpickle_array, _try_sort) +from pandas.core.common import isnull, _unpickle_array, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) -from pandas.util.decorators import cache_readonly import pandas.core.common as com -import pandas.core.datetools as datetools -from pandas.core.internals import BlockManager, create_block_manager_from_arrays - +from pandas.core.internals import (BlockManager, + create_block_manager_from_arrays) from pandas.core.generic import NDFrame from pandas.sparse.series import SparseSeries, SparseArray from pandas.util.decorators import Appender @@ -28,7 +25,6 @@ class SparseDataFrame(DataFrame): - """ DataFrame containing sparse floating point data in the form of SparseSeries objects @@ -48,9 +44,8 @@ class SparseDataFrame(DataFrame): _constructor_sliced = SparseSeries _subtyp = 'sparse_frame' - def __init__(self, data=None, index=None, columns=None, - default_kind=None, default_fill_value=None, - dtype=None, copy=False): + def __init__(self, data=None, index=None, columns=None, default_kind=None, + default_fill_value=None, dtype=None, copy=False): # pick up the defaults from the Sparse structures if isinstance(data, SparseDataFrame): @@ -90,15 +85,16 @@ def __init__(self, data=None, index=None, columns=None, if dtype is not None: mgr = mgr.astype(dtype) elif isinstance(data, SparseDataFrame): - mgr = self._init_mgr( - data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy) + mgr = self._init_mgr(data._data, + dict(index=index, columns=columns), + dtype=dtype, copy=copy) elif isinstance(data, DataFrame): mgr = self._init_dict(data, data.index, data.columns) if dtype is not None: mgr = mgr.astype(dtype) elif isinstance(data, BlockManager): - mgr = self._init_mgr( - data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) + mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), + dtype=dtype, copy=copy) elif data is None: data = DataFrame() @@ -111,8 +107,7 @@ def __init__(self, data=None, index=None, columns=None, columns = Index([]) else: for c in columns: - data[c] = SparseArray(np.nan, - index=index, + data[c] = SparseArray(np.nan, index=index, kind=self._default_kind, fill_value=self._default_fill_value) mgr = to_manager(data, columns, index) @@ -123,11 +118,12 @@ def __init__(self, data=None, index=None, columns=None, @property def _constructor(self): - def wrapper(data=None, index=None, columns=None, default_fill_value=None, kind=None, fill_value=None, copy=False): + def wrapper(data=None, index=None, columns=None, + default_fill_value=None, kind=None, fill_value=None, + copy=False): result = SparseDataFrame(data, index=index, columns=columns, default_fill_value=fill_value, - default_kind=kind, - copy=copy) + default_kind=kind, copy=copy) # fill if requested if fill_value is not None and not isnull(fill_value): @@ -144,15 +140,15 @@ def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) - data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) + if k in columns) else: columns = Index(_try_sort(list(data.keys()))) if index is None: index = extract_index(list(data.values())) - sp_maker = lambda x: SparseArray(x, - kind=self._default_kind, + sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True) sdict = DataFrame() @@ -193,24 +189,23 @@ def _init_matrix(self, data, index, columns, dtype=None): if len(columns) != K: raise ValueError('Column length mismatch: %d vs. %d' % - (len(columns), K)) + (len(columns), K)) if len(index) != N: raise ValueError('Index length mismatch: %d vs. %d' % - (len(index), N)) + (len(index), N)) data = dict([(idx, data[:, i]) for i, idx in enumerate(columns)]) return self._init_dict(data, index, columns, dtype) def __array_wrap__(self, result): - return SparseDataFrame(result, index=self.index, columns=self.columns, - default_kind=self._default_kind, - default_fill_value=self._default_fill_value).__finalize__(self) + return SparseDataFrame( + result, index=self.index, columns=self.columns, + default_kind=self._default_kind, + default_fill_value=self._default_fill_value).__finalize__(self) def __getstate__(self): # pickling - return dict(_typ=self._typ, - _subtyp=self._subtyp, - _data=self._data, + return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data, _default_fill_value=self._default_fill_value, _default_kind=self._default_kind) @@ -246,7 +241,7 @@ def to_dense(self): df : DataFrame """ data = dict((k, v.to_dense()) for k, v in compat.iteritems(self)) - return DataFrame(data, index=self.index,columns=self.columns) + return DataFrame(data, index=self.index, columns=self.columns) def astype(self, dtype): raise NotImplementedError @@ -281,32 +276,32 @@ def density(self): def fillna(self, value=None, method=None, axis=0, inplace=False, limit=None, downcast=None): - new_self = super( - SparseDataFrame, self).fillna(value=value, method=method, axis=axis, - inplace=inplace, limit=limit, downcast=downcast) + new_self = super(SparseDataFrame, + self).fillna(value=value, method=method, axis=axis, + inplace=inplace, limit=limit, + downcast=downcast) if not inplace: self = new_self # set the fill value if we are filling as a scalar with nothing special # going on - if value is not None and value == value and method is None and limit is None: + if (value is not None and value == value and method is None and + limit is None): self._default_fill_value = value if not inplace: return self - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Support different internal representation of SparseDataFrame def _sanitize_column(self, key, value): - sp_maker = lambda x, index=None: SparseArray(x, - index=index, - fill_value=self._default_fill_value, - kind=self._default_kind) + sp_maker = lambda x, index=None: SparseArray( + x, index=index, fill_value=self._default_fill_value, + kind=self._default_kind) if isinstance(value, SparseSeries): - clean = value.reindex( - self.index).as_sparse_array(fill_value=self._default_fill_value, - kind=self._default_kind) + clean = value.reindex(self.index).as_sparse_array( + fill_value=self._default_fill_value, kind=self._default_kind) elif isinstance(value, SparseArray): if len(value) != len(self.index): @@ -409,12 +404,11 @@ def xs(self, key, axis=0, copy=False): data = self.take([i]).get_values()[0] return Series(data, index=self.columns) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): - this, other = self.align(other, join='outer', level=level, - copy=False) + this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns if level is not None: @@ -444,13 +438,14 @@ def _combine_frame(self, other, func, fill_value=None, level=None): other_fill_value = getattr(other, 'default_fill_value', np.nan) if self.default_fill_value == other_fill_value: new_fill_value = self.default_fill_value - elif np.isnan(self.default_fill_value) and not np.isnan(other_fill_value): + elif np.isnan(self.default_fill_value) and not np.isnan( + other_fill_value): new_fill_value = other_fill_value - elif not np.isnan(self.default_fill_value) and np.isnan(other_fill_value): + elif not np.isnan(self.default_fill_value) and np.isnan( + other_fill_value): new_fill_value = self.default_fill_value - return self._constructor(data=new_data, - index=new_index, + return self._constructor(data=new_data, index=new_index, columns=new_columns, default_fill_value=new_fill_value, fill_value=new_fill_value).__finalize__(self) @@ -481,11 +476,10 @@ def _combine_match_index(self, other, func, level=None, fill_value=None): fill_value = func(np.float64(self.default_fill_value), np.float64(other.fill_value)) - return self._constructor(new_data, - index=new_index, - columns=self.columns, - default_fill_value=fill_value, - fill_value=self.default_fill_value).__finalize__(self) + return self._constructor( + new_data, index=new_index, columns=self.columns, + default_fill_value=fill_value, + fill_value=self.default_fill_value).__finalize__(self) def _combine_match_columns(self, other, func, level=None, fill_value=None): # patched version of DataFrame._combine_match_columns to account for @@ -509,22 +503,20 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None): for col in intersection: new_data[col] = func(self[col], float(other[col])) - return self._constructor(new_data, - index=self.index, - columns=union, - default_fill_value=self.default_fill_value, - fill_value=self.default_fill_value).__finalize__(self) + return self._constructor( + new_data, index=self.index, columns=union, + default_fill_value=self.default_fill_value, + fill_value=self.default_fill_value).__finalize__(self) def _combine_const(self, other, func): new_data = {} for col, series in compat.iteritems(self): new_data[col] = func(series, other) - return self._constructor(data=new_data, - index=self.index, - columns=self.columns, - default_fill_value=self.default_fill_value, - fill_value=self.default_fill_value).__finalize__(self) + return self._constructor( + data=new_data, index=self.index, columns=self.columns, + default_fill_value=self.default_fill_value, + fill_value=self.default_fill_value).__finalize__(self) def _reindex_index(self, index, method, copy, level, fill_value=np.nan, limit=None, takeable=False): @@ -577,16 +569,17 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, return SparseDataFrame(sdict, index=self.index, columns=columns, default_fill_value=self._default_fill_value) - def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, limit=None, - copy=False, allow_dups=False): + def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, + limit=None, copy=False, allow_dups=False): if method is not None or limit is not None: - raise NotImplementedError("cannot reindex with a method or limit with sparse") + raise NotImplementedError("cannot reindex with a method or limit " + "with sparse") if fill_value is None: fill_value = np.nan - index, row_indexer = reindexers.get(0, (None, None)) + index, row_indexer = reindexers.get(0, (None, None)) columns, col_indexer = reindexers.get(1, (None, None)) if columns is None: @@ -597,13 +590,14 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, limit if col not in self: continue if row_indexer is not None: - new_arrays[col] = com.take_1d( - self[col].get_values(), row_indexer, - fill_value=fill_value) + new_arrays[col] = com.take_1d(self[col].get_values(), + row_indexer, + fill_value=fill_value) else: new_arrays[col] = self[col] - return SparseDataFrame(new_arrays, index=index, columns=columns).__finalize__(self) + return SparseDataFrame(new_arrays, index=index, + columns=columns).__finalize__(self) def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): @@ -617,8 +611,9 @@ def _join_index(self, other, how, lsuffix, rsuffix): if other.name is None: raise ValueError('Other Series must have a name') - other = SparseDataFrame({other.name: other}, - default_fill_value=self._default_fill_value) + other = SparseDataFrame( + {other.name: other}, + default_fill_value=self._default_fill_value) join_index = self.index.join(other.index, how=how) @@ -658,10 +653,11 @@ def transpose(self): """ Returns a DataFrame with the rows/columns switched. """ - return SparseDataFrame(self.values.T, index=self.columns, - columns=self.index, - default_fill_value=self._default_fill_value, - default_kind=self._default_kind).__finalize__(self) + return SparseDataFrame( + self.values.T, index=self.columns, columns=self.index, + default_fill_value=self._default_fill_value, + default_kind=self._default_kind).__finalize__(self) + T = property(transpose) @Appender(DataFrame.count.__doc__) @@ -710,10 +706,10 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): applied = func(v) applied.fill_value = func(applied.fill_value) new_series[k] = applied - return self._constructor(new_series, index=self.index, - columns=self.columns, - default_fill_value=self._default_fill_value, - kind=self._default_kind).__finalize__(self) + return self._constructor( + new_series, index=self.index, columns=self.columns, + default_fill_value=self._default_fill_value, + kind=self._default_kind).__finalize__(self) else: if not broadcast: return self._apply_standard(func, axis, reduce=reduce) @@ -737,13 +733,17 @@ def applymap(self, func): """ return self.apply(lambda x: lmap(func, x)) + def to_manager(sdf, columns, index): - """ create and return the block manager from a dataframe of series, columns, index """ + """ create and return the block manager from a dataframe of series, + columns, index + """ # from BlockManager perspective axes = [_ensure_index(columns), _ensure_index(index)] - return create_block_manager_from_arrays([sdf[c] for c in columns], columns, axes) + return create_block_manager_from_arrays( + [sdf[c] for c in columns], columns, axes) def stack_sparse_frame(frame): @@ -759,8 +759,8 @@ def stack_sparse_frame(frame): inds_to_concat = [] vals_to_concat = [] # TODO: Figure out whether this can be reached. - # I think this currently can't be reached because you can't build a SparseDataFrame - # with a non-np.NaN fill value (fails earlier). + # I think this currently can't be reached because you can't build a + # SparseDataFrame with a non-np.NaN fill value (fails earlier). for _, series in compat.iteritems(frame): if not np.isnan(series.fill_value): raise TypeError('This routine assumes NaN fill value') diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index f57339fea0a7f..4bacaadd915d1 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -6,7 +6,7 @@ # pylint: disable=E1101,E1103,W0231 import warnings -from pandas.compat import range, lrange, zip +from pandas.compat import lrange, zip from pandas import compat import numpy as np @@ -21,7 +21,6 @@ class SparsePanelAxis(object): - def __init__(self, cache_field, frame_attr): self.cache_field = cache_field self.frame_attr = frame_attr @@ -42,7 +41,6 @@ def __set__(self, obj, value): class SparsePanel(Panel): - """ Sparse version of Panel @@ -66,13 +64,13 @@ class SparsePanel(Panel): _typ = 'panel' _subtyp = 'sparse_panel' - def __init__(self, frames=None, items=None, major_axis=None, minor_axis=None, - default_fill_value=np.nan, default_kind='block', - copy=False): + def __init__(self, frames=None, items=None, major_axis=None, + minor_axis=None, default_fill_value=np.nan, + default_kind='block', copy=False): # deprecation #11157 - warnings.warn("SparsePanel is deprecated and will be removed in a future version", - FutureWarning, stacklevel=2) + warnings.warn("SparsePanel is deprecated and will be removed in a " + "future version", FutureWarning, stacklevel=2) if frames is None: frames = {} @@ -80,11 +78,10 @@ def __init__(self, frames=None, items=None, major_axis=None, minor_axis=None, if isinstance(frames, np.ndarray): new_frames = {} for item, vals in zip(items, frames): - new_frames[item] = \ - SparseDataFrame(vals, index=major_axis, - columns=minor_axis, - default_fill_value=default_fill_value, - default_kind=default_kind) + new_frames[item] = SparseDataFrame( + vals, index=major_axis, columns=minor_axis, + default_fill_value=default_fill_value, + default_kind=default_kind) frames = new_frames if not isinstance(frames, dict): @@ -99,11 +96,9 @@ def __init__(self, frames=None, items=None, major_axis=None, minor_axis=None, items = Index(sorted(frames.keys())) items = _ensure_index(items) - (clean_frames, - major_axis, - minor_axis) = _convert_frames(frames, major_axis, - minor_axis, kind=kind, - fill_value=fill_value) + (clean_frames, major_axis, + minor_axis) = _convert_frames(frames, major_axis, minor_axis, + kind=kind, fill_value=fill_value) self._frames = clean_frames @@ -142,8 +137,7 @@ def to_dense(self): ------- dense : Panel """ - return Panel(self.values, self.items, self.major_axis, - self.minor_axis) + return Panel(self.values, self.items, self.major_axis, self.minor_axis) def as_matrix(self): return self.values @@ -151,8 +145,7 @@ def as_matrix(self): @property def values(self): # return dense values - return np.array([self._frames[item].values - for item in self.items]) + return np.array([self._frames[item].values for item in self.items]) # need a special property for items to make the field assignable @@ -173,6 +166,7 @@ def _set_items(self, new_items): self._frames = dict((new_k, old_frame_dict[old_k]) for new_k, old_k in zip(new_items, old_items)) self._items = new_items + items = property(fget=_get_items, fset=_set_items) # DataFrame's index @@ -257,8 +251,8 @@ def __getstate__(self): # pickling return (self._frames, com._pickle_array(self.items), com._pickle_array(self.major_axis), - com._pickle_array(self.minor_axis), - self.default_fill_value, self.default_kind) + com._pickle_array(self.minor_axis), self.default_fill_value, + self.default_kind) def __setstate__(self, state): frames, items, major, minor, fv, kind = state @@ -281,12 +275,13 @@ def copy(self, deep=True): d = self._construct_axes_dict() if deep: - new_data = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(self._frames)) + new_data = dict((k, v.copy(deep=True)) + for k, v in compat.iteritems(self._frames)) d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d)) else: new_data = self._frames.copy() - d['default_fill_value']=self.default_fill_value - d['default_kind']=self.default_kind + d['default_fill_value'] = self.default_fill_value + d['default_kind'] = self.default_kind return SparsePanel(new_data, **d) @@ -376,16 +371,16 @@ def reindex(self, major=None, items=None, minor=None, major_axis=None, if item in self._frames: new_frames[item] = self._frames[item] else: - raise NotImplementedError('Reindexing with new items not yet ' - 'supported') + raise NotImplementedError('Reindexing with new items not ' + 'yet supported') else: new_frames = self._frames if copy: - new_frames = dict((k, v.copy()) for k, v in compat.iteritems(new_frames)) + new_frames = dict((k, v.copy()) + for k, v in compat.iteritems(new_frames)) - return SparsePanel(new_frames, items=items, - major_axis=major, + return SparsePanel(new_frames, items=items, major_axis=major, minor_axis=minor, default_fill_value=self.default_fill_value, default_kind=self.default_kind) @@ -509,7 +504,8 @@ def mod(self, val, *args, **kwargs): # Sparse objects opt out of numexpr SparsePanel._add_aggregate_operations(use_numexpr=False) -ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, **ops.panel_special_funcs) +ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, ** + ops.panel_special_funcs) SparseWidePanel = SparsePanel diff --git a/pandas/sparse/scipy_sparse.py b/pandas/sparse/scipy_sparse.py index a815ca7545561..ea108e3e89935 100644 --- a/pandas/sparse/scipy_sparse.py +++ b/pandas/sparse/scipy_sparse.py @@ -3,13 +3,9 @@ Currently only includes SparseSeries.to_coo helpers. """ -from pandas.core.frame import DataFrame from pandas.core.index import MultiIndex, Index from pandas.core.series import Series -import itertools -import numpy as np from pandas.compat import OrderedDict, lmap -from pandas.tools.util import cartesian_product def _check_is_partition(parts, whole): @@ -19,10 +15,10 @@ def _check_is_partition(parts, whole): raise ValueError( 'Is not a partition because intersection is not null.') if set.union(*parts) != whole: - raise ValueError('Is not a partition becuase union is not the whole.') + raise ValueError('Is not a partition because union is not the whole.') -def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): +def _to_ijv(ss, row_levels=(0, ), column_levels=(1, ), sort_labels=False): """ For arbitrary (MultiIndexed) SparseSeries return (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo constructor. """ @@ -44,7 +40,6 @@ def get_indexers(levels): if len(levels) == 1: values_ilabels = [x[0] for x in values_ilabels] - ####################################################################### # # performance issues with groupby ################################### # TODO: these two lines can rejplace the code below but # groupby is too slow (in some cases at least) @@ -53,36 +48,37 @@ def get_indexers(levels): def _get_label_to_i_dict(labels, sort_labels=False): """ Return OrderedDict of unique labels to number. - Optionally sort by label. """ + Optionally sort by label. + """ labels = Index(lmap(tuple, labels)).unique().tolist() # squish if sort_labels: labels = sorted(list(labels)) d = OrderedDict((k, i) for i, k in enumerate(labels)) - return(d) + return (d) def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): def robust_get_level_values(i): # if index has labels (that are not None) use those, # else use the level location try: - return(index.get_level_values(index.names[i])) + return index.get_level_values(index.names[i]) except KeyError: - return(index.get_level_values(i)) - ilabels = list( - zip(*[robust_get_level_values(i) for i in subset])) - labels_to_i = _get_label_to_i_dict( - ilabels, sort_labels=sort_labels) + return index.get_level_values(i) + + ilabels = list(zip(*[robust_get_level_values(i) for i in subset])) + labels_to_i = _get_label_to_i_dict(ilabels, + sort_labels=sort_labels) labels_to_i = Series(labels_to_i) if len(subset) > 1: labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) labels_to_i.index.names = [index.names[i] for i in subset] labels_to_i.name = 'value' - return(labels_to_i) + return (labels_to_i) - labels_to_i = _get_index_subset_to_coord_dict( - ss.index, levels, sort_labels=sort_labels) - ####################################################################### - ####################################################################### + labels_to_i = _get_index_subset_to_coord_dict(ss.index, levels, + sort_labels=sort_labels) + # ##################################################################### + # ##################################################################### i_coord = labels_to_i[values_ilabels].tolist() i_labels = labels_to_i.index.tolist() @@ -95,25 +91,28 @@ def robust_get_level_values(i): return values, i_coord, j_coord, i_labels, j_labels -def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): +def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ), + sort_labels=False): """ Convert a SparseSeries to a scipy.sparse.coo_matrix using index levels row_levels, column_levels as the row and column - labels respectively. Returns the sparse_matrix, row and column labels. """ + labels respectively. Returns the sparse_matrix, row and column labels. + """ import scipy.sparse if ss.index.nlevels < 2: raise ValueError('to_coo requires MultiIndex with nlevels > 2') if not ss.index.is_unique: - raise ValueError( - 'Duplicate index entries are not allowed in to_coo transformation.') + raise ValueError('Duplicate index entries are not allowed in to_coo ' + 'transformation.') # to keep things simple, only rely on integer indexing (not labels) row_levels = [ss.index._get_level_number(x) for x in row_levels] column_levels = [ss.index._get_level_number(x) for x in column_levels] - v, i, j, rows, columns = _to_ijv( - ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels) + v, i, j, rows, columns = _to_ijv(ss, row_levels=row_levels, + column_levels=column_levels, + sort_labels=sort_labels) sparse_matrix = scipy.sparse.coo_matrix( (v, (i, j)), shape=(len(rows), len(columns))) return sparse_matrix, rows, columns @@ -121,7 +120,8 @@ def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=F def _coo_to_sparse_series(A, dense_index=False): """ Convert a scipy.sparse.coo_matrix to a SparseSeries. - Use the defaults given in the SparseSeries constructor. """ + Use the defaults given in the SparseSeries constructor. + """ s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) s = s.sort_index() s = s.to_sparse() # TODO: specify kind? diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 96d509ed9b7c1..1a2fc9698da2f 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -18,20 +18,16 @@ from pandas.core import generic import pandas.core.common as com import pandas.core.ops as ops -import pandas.core.datetools as datetools import pandas.index as _index -from pandas import compat - from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray) from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib -from pandas.util.decorators import Appender - -from pandas.sparse.scipy_sparse import _sparse_series_to_coo, _coo_to_sparse_series +from pandas.sparse.scipy_sparse import (_sparse_series_to_coo, + _coo_to_sparse_series) -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Wrapper function for Series arithmetic methods @@ -41,8 +37,8 @@ def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, Wrapper function for Series arithmetic operations, to avoid code duplication. - str_rep, default_axis, fill_zeros and eval_kwargs are not used, but are present - for compatibility. + str_rep, default_axis, fill_zeros and eval_kwargs are not used, but are + present for compatibility. """ def wrapper(self, other): @@ -69,8 +65,8 @@ def wrapper(self, other): wrapper.__name__ = name if name.startswith("__"): - # strip special method names, e.g. `__add__` needs to be `add` when passed - # to _sparse_series_op + # strip special method names, e.g. `__add__` needs to be `add` when + # passed to _sparse_series_op name = name[2:-2] return wrapper @@ -85,7 +81,6 @@ def _sparse_series_op(left, right, op, name): class SparseSeries(Series): - """Data structure for labeled, sparse floating point data Parameters @@ -135,7 +130,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', if isinstance(data, SparseSeries) and index is None: index = data.index.view() elif index is not None: - assert(len(index) == len(data)) + assert (len(index) == len(data)) sparse_index = data.sp_index data = np.asarray(data) @@ -161,7 +156,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: - assert(len(data) == sparse_index.npoints) + assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: @@ -175,8 +170,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', length = len(index) - if data == fill_value or (isnull(data) - and isnull(fill_value)): + if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: @@ -206,8 +200,9 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', # create a sparse array if not isinstance(data, SparseArray): - data = SparseArray( - data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) + data = SparseArray(data, sparse_index=sparse_index, + fill_value=fill_value, dtype=dtype, + copy=copy) data = SingleBlockManager(data, index) @@ -254,11 +249,13 @@ def npoints(self): return self.sp_index.npoints @classmethod - def from_array(cls, arr, index=None, name=None, copy=False, fill_value=None, fastpath=False): + def from_array(cls, arr, index=None, name=None, copy=False, + fill_value=None, fastpath=False): """ Simplified alternate constructor """ - return cls(arr, index=index, name=name, copy=copy, fill_value=fill_value, fastpath=fastpath) + return cls(arr, index=index, name=name, copy=copy, + fill_value=fill_value, fastpath=fastpath) @property def _constructor(self): @@ -278,11 +275,8 @@ def as_sparse_array(self, kind=None, fill_value=None, copy=False): fill_value = self.fill_value if kind is None: kind = self.kind - return SparseArray(self.values, - sparse_index=self.sp_index, - fill_value=fill_value, - kind=kind, - copy=copy) + return SparseArray(self.values, sparse_index=self.sp_index, + fill_value=fill_value, kind=kind, copy=copy) def __len__(self): return len(self.block) @@ -297,8 +291,7 @@ def __array_wrap__(self, result): """ Gets called prior to a ufunc (and after) """ - return self._constructor(result, - index=self.index, + return self._constructor(result, index=self.index, sparse_index=self.sp_index, fill_value=self.fill_value, copy=False).__finalize__(self) @@ -318,11 +311,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, def __getstate__(self): # pickling - return dict(_typ=self._typ, - _subtyp=self._subtyp, - _data=self._data, - fill_value=self.fill_value, - name=self.name) + return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data, + fill_value=self.fill_value, name=self.name) def _unpickle_series_compat(self, state): @@ -339,8 +329,8 @@ def _unpickle_series_compat(self, state): # create a sparse array if not isinstance(data, SparseArray): - data = SparseArray( - data, sparse_index=sp_index, fill_value=fill_value, copy=False) + data = SparseArray(data, sparse_index=sp_index, + fill_value=fill_value, copy=False) # recreate data = SingleBlockManager(data, index, fastpath=True) @@ -473,8 +463,8 @@ def set_value(self, label, value, takeable=False): if new_values is not None: values = new_values new_index = values.index - values = SparseArray( - values, fill_value=self.fill_value, kind=self.kind) + values = SparseArray(values, fill_value=self.fill_value, + kind=self.kind) self._data = SingleBlockManager(values, new_index) self._index = new_index @@ -489,8 +479,8 @@ def _set_values(self, key, value): values = self.values.to_dense() values[key] = _index.convert_scalar(values, value) - values = SparseArray( - values, fill_value=self.fill_value, kind=self.kind) + values = SparseArray(values, fill_value=self.fill_value, + kind=self.kind) self._data = SingleBlockManager(values, self.index) def to_dense(self, sparse_only=False): @@ -502,7 +492,8 @@ def to_dense(self, sparse_only=False): index = self.index.take(int_index.indices) return Series(self.sp_values, index=index, name=self.name) else: - return Series(self.values.to_dense(), index=self.index, name=self.name) + return Series(self.values.to_dense(), index=self.index, + name=self.name) @property def density(self): @@ -518,8 +509,7 @@ def copy(self, deep=True): if deep: new_data = self._data.copy() - return self._constructor(new_data, - sparse_index=self.sp_index, + return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) def reindex(self, index=None, method=None, copy=True, limit=None): @@ -539,7 +529,8 @@ def reindex(self, index=None, method=None, copy=True, limit=None): return self.copy() else: return self - return self._constructor(self._data.reindex(new_index, method=method, limit=limit, copy=copy), + return self._constructor(self._data.reindex(new_index, method=method, + limit=limit, copy=copy), index=new_index).__finalize__(self) def sparse_reindex(self, new_index): @@ -573,7 +564,8 @@ def take(self, indices, axis=0, convert=True): """ new_values = SparseArray.take(self.values, indices) new_index = self.index.take(indices) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def cumsum(self, axis=0, dtype=None, out=None): """ @@ -585,7 +577,9 @@ def cumsum(self, axis=0, dtype=None, out=None): """ new_array = SparseArray.cumsum(self.values) if isinstance(new_array, SparseArray): - return self._constructor(new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self) + return self._constructor( + new_array, index=self.index, + sparse_index=new_array.sp_index).__finalize__(self) return Series(new_array, index=self.index).__finalize__(self) def dropna(self, axis=0, inplace=False, **kwargs): @@ -611,8 +605,8 @@ def shift(self, periods, freq=None): # no special handling of fill values yet if not isnull(self.fill_value): - dense_shifted = self.to_dense().shift(periods, freq=freq, - **kwds) + # TODO: kwds is not defined...should this work? + dense_shifted = self.to_dense().shift(periods, freq=freq, **kwds) # noqa return dense_shifted.to_sparse(fill_value=self.fill_value, kind=self.kind) @@ -620,10 +614,10 @@ def shift(self, periods, freq=None): return self.copy() if freq is not None: - return self._constructor(self.sp_values, - sparse_index=self.sp_index, - index=self.index.shift(periods, freq), - fill_value=self.fill_value).__finalize__(self) + return self._constructor( + self.sp_values, sparse_index=self.sp_index, + index=self.index.shift(periods, freq), + fill_value=self.fill_value).__finalize__(self) int_index = self.sp_index.to_int_index() new_indices = int_index.indices + periods @@ -636,8 +630,7 @@ def shift(self, periods, freq=None): new_sp_index = new_sp_index.to_block_index() return self._constructor(self.sp_values[start:end].copy(), - index=self.index, - sparse_index=new_sp_index, + index=self.index, sparse_index=new_sp_index, fill_value=self.fill_value).__finalize__(self) def combine_first(self, other): @@ -659,13 +652,14 @@ def combine_first(self, other): dense_combined = self.to_dense().combine_first(other) return dense_combined.to_sparse(fill_value=self.fill_value) - def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): + def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): """ Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex. - Use row_levels and column_levels to determine the row and column coordinates respectively. - row_levels and column_levels are the names (labels) or numbers of the levels. - {row_levels, column_levels} must be a partition of the MultiIndex level names (or numbers). + Use row_levels and column_levels to determine the row and column + coordinates respectively. row_levels and column_levels are the names + (labels) or numbers of the levels. {row_levels, column_levels} must be + a partition of the MultiIndex level names (or numbers). .. versionadded:: 0.16.0 @@ -709,8 +703,9 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): >>> columns [('a', 0), ('a', 1), ('b', 0), ('b', 1)] """ - A, rows, columns = _sparse_series_to_coo( - self, row_levels, column_levels, sort_labels=sort_labels) + A, rows, columns = _sparse_series_to_coo(self, row_levels, + column_levels, + sort_labels=sort_labels) return A, rows, columns @classmethod @@ -724,8 +719,10 @@ def from_coo(cls, A, dense_index=False): ---------- A : scipy.sparse.coo_matrix dense_index : bool, default False - If False (default), the SparseSeries index consists of only the coords of the non-null entries of the original coo_matrix. - If True, the SparseSeries index consists of the full sorted (row, col) coordinates of the coo_matrix. + If False (default), the SparseSeries index consists of only the + coords of the non-null entries of the original coo_matrix. + If True, the SparseSeries index consists of the full sorted + (row, col) coordinates of the coo_matrix. Returns ------- @@ -764,14 +761,15 @@ def from_coo(cls, A, dense_index=False): # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, _arith_method, radd_func=operator.add, comp_method=None, - bool_method=None, use_numexpr=False, force=True) + bool_method=None, use_numexpr=False, + force=True) + # backwards compatiblity class SparseTimeSeries(SparseSeries): - def __init__(self, *args, **kwargs): # deprecation TimeSeries, #10890 - warnings.warn("SparseTimeSeries is deprecated. Please use SparseSeries", - FutureWarning, stacklevel=2) + warnings.warn("SparseTimeSeries is deprecated. Please use " + "SparseSeries", FutureWarning, stacklevel=2) super(SparseTimeSeries, self).__init__(*args, **kwargs) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index add680489548d..b1e731bd8e2e5 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -1,13 +1,11 @@ from pandas.compat import range import re -from numpy import nan, ndarray +from numpy import nan import numpy as np import operator import warnings -from pandas.core.series import Series -from pandas.core.common import notnull from pandas.sparse.api import SparseArray from pandas.util.testing import assert_almost_equal, assertRaisesRegexp import pandas.util.testing as tm @@ -15,11 +13,11 @@ def assert_sp_array_equal(left, right): assert_almost_equal(left.sp_values, right.sp_values) - assert(left.sp_index.equals(right.sp_index)) + assert (left.sp_index.equals(right.sp_index)) if np.isnan(left.fill_value): - assert(np.isnan(right.fill_value)) + assert (np.isnan(right.fill_value)) else: - assert(left.fill_value == right.fill_value) + assert (left.fill_value == right.fill_value) class TestSparseArray(tm.TestCase): @@ -46,6 +44,7 @@ def setitem(): def setslice(): self.arr[1:5] = 2 + assertRaisesRegexp(TypeError, "item assignment", setitem) assertRaisesRegexp(TypeError, "item assignment", setslice) @@ -79,7 +78,7 @@ def _get_base(values): base = base.base return base - assert(_get_base(arr2) is _get_base(self.arr)) + assert (_get_base(arr2) is _get_base(self.arr)) def test_values_asarray(self): assert_almost_equal(self.arr.values, self.arr_data) @@ -150,7 +149,7 @@ def _check_op(op, first, second): exp_fv = op(first.fill_value, 4) assert_almost_equal(res4.fill_value, exp_fv) assert_almost_equal(res4.values, exp) - except (ValueError) : + except ValueError: pass def _check_inplace_op(op): @@ -184,7 +183,7 @@ def test_generator_warnings(self): category=PendingDeprecationWarning) for _ in sp_arr: pass - assert len(w)==0 + assert len(w) == 0 if __name__ == '__main__': diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 7f9e61571ebfc..57baae08725c0 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -1,51 +1,30 @@ from pandas import Series -import nose -from numpy import nan +import nose # noqa import numpy as np import operator -from numpy.testing import assert_almost_equal, assert_equal +from numpy.testing import assert_equal import pandas.util.testing as tm -from pandas.core.sparse import SparseSeries -from pandas import DataFrame, compat +from pandas import compat from pandas._sparse import IntIndex, BlockIndex import pandas._sparse as splib TEST_LENGTH = 20 -plain_case = dict(xloc=[0, 7, 15], - xlen=[3, 5, 5], - yloc=[2, 9, 14], - ylen=[2, 3, 5], - intersect_loc=[2, 9, 15], +plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14], + ylen=[2, 3, 5], intersect_loc=[2, 9, 15], intersect_len=[1, 3, 4]) -delete_blocks = dict(xloc=[0, 5], - xlen=[4, 4], - yloc=[1], - ylen=[4], - intersect_loc=[1], - intersect_len=[3]) -split_blocks = dict(xloc=[0], - xlen=[10], - yloc=[0, 5], - ylen=[3, 7], - intersect_loc=[0, 5], - intersect_len=[3, 5]) -skip_block = dict(xloc=[10], - xlen=[5], - yloc=[0, 12], - ylen=[5, 3], - intersect_loc=[12], - intersect_len=[3]) - -no_intersect = dict(xloc=[0, 10], - xlen=[4, 6], - yloc=[5, 17], - ylen=[4, 2], - intersect_loc=[], - intersect_len=[]) +delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], + intersect_loc=[1], intersect_len=[3]) +split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7], + intersect_loc=[0, 5], intersect_len=[3, 5]) +skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3], + intersect_loc=[12], intersect_len=[3]) + +no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2], + intersect_loc=[], intersect_len=[]) def check_cases(_check_case): @@ -69,14 +48,14 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) bresult = xindex.make_union(yindex) - assert(isinstance(bresult, BlockIndex)) + assert (isinstance(bresult, BlockIndex)) assert_equal(bresult.blocs, eloc) assert_equal(bresult.blengths, elen) ixindex = xindex.to_int_index() iyindex = yindex.to_int_index() iresult = ixindex.make_union(iyindex) - assert(isinstance(iresult, IntIndex)) + assert (isinstance(iresult, IntIndex)) assert_equal(iresult.indices, bresult.to_int_index().indices) """ @@ -91,7 +70,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [0] elen = [9] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ----- ----- y: ----- -- @@ -103,7 +81,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [0, 10, 17] elen = [7, 5, 2] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ------ y: ------- @@ -116,7 +93,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [1] elen = [7] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ------ ----- y: ------- @@ -129,7 +105,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [2] elen = [12] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: --- ----- y: ------- @@ -142,7 +117,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [0] elen = [10] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ------ ----- y: ------- --- @@ -155,7 +129,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [2] elen = [15] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ---------------------- y: ---- ---- --- @@ -168,7 +141,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): eloc = [2] elen = [15] _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ x: ---- --- y: --- --- @@ -185,18 +157,17 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): def test_lookup(): - def _check(index): - assert(index.lookup(0) == -1) - assert(index.lookup(5) == 0) - assert(index.lookup(7) == 2) - assert(index.lookup(8) == -1) - assert(index.lookup(9) == -1) - assert(index.lookup(10) == -1) - assert(index.lookup(11) == -1) - assert(index.lookup(12) == 3) - assert(index.lookup(17) == 8) - assert(index.lookup(18) == -1) + assert (index.lookup(0) == -1) + assert (index.lookup(5) == 0) + assert (index.lookup(7) == 2) + assert (index.lookup(8) == -1) + assert (index.lookup(9) == -1) + assert (index.lookup(10) == -1) + assert (index.lookup(11) == -1) + assert (index.lookup(12) == 3) + assert (index.lookup(17) == 8) + assert (index.lookup(18) == -1) bindex = BlockIndex(20, [5, 12], [3, 6]) iindex = bindex.to_int_index() @@ -210,7 +181,7 @@ def _check(index): def test_intersect(): def _check_correct(a, b, expected): result = a.intersect(b) - assert(result.equals(expected)) + assert (result.equals(expected)) def _check_length_exc(a, longer): nose.tools.assert_raises(Exception, a.intersect, longer) @@ -222,13 +193,11 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) _check_correct(xindex, yindex, expected) - _check_correct(xindex.to_int_index(), - yindex.to_int_index(), + _check_correct(xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index()) _check_length_exc(xindex, longer_index) - _check_length_exc(xindex.to_int_index(), - longer_index.to_int_index()) + _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) if compat.is_platform_windows(): raise nose.SkipTest("segfaults on win-64 when all tests are run") @@ -236,7 +205,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): class TestBlockIndex(tm.TestCase): - def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) @@ -248,10 +216,11 @@ def test_check_integrity(self): lengths = [] # 0-length OK - index = BlockIndex(0, locs, lengths) + # TODO: index variables are not used...is that right? + index = BlockIndex(0, locs, lengths) # noqa # also OK even though empty - index = BlockIndex(1, locs, lengths) + index = BlockIndex(1, locs, lengths) # noqa # block extend beyond end self.assertRaises(Exception, BlockIndex, 10, [5], [10]) @@ -275,7 +244,6 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): - def test_equals(self): index = IntIndex(10, [0, 1, 2, 3, 4]) self.assertTrue(index.equals(index)) @@ -292,6 +260,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): tm.assertIsInstance(xbindex, BlockIndex) self.assertTrue(xbindex.equals(xindex)) self.assertTrue(ybindex.equals(yindex)) + check_cases(_check_case) def test_to_int_index(self): @@ -300,7 +269,6 @@ def test_to_int_index(self): class TestSparseOperators(tm.TestCase): - def _nan_op_tests(self, sparse_op, python_op): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -341,10 +309,10 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xfill = 0 yfill = 2 - result_block_vals, rb_index = sparse_op( - x, xindex, xfill, y, yindex, yfill) - result_int_vals, ri_index = sparse_op(x, xdindex, xfill, - y, ydindex, yfill) + result_block_vals, rb_index = sparse_op(x, xindex, xfill, y, + yindex, yfill) + result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y, + ydindex, yfill) self.assertTrue(rb_index.to_int_index().equals(ri_index)) assert_equal(result_block_vals, result_int_vals) @@ -374,6 +342,7 @@ def f(self): sparse_op = getattr(splib, 'sparse_nan%s' % op) python_op = getattr(operator, op) self._nan_op_tests(sparse_op, python_op) + f.__name__ = 'test_nan%s' % op return f @@ -383,9 +352,11 @@ def f(self): sparse_op = getattr(splib, 'sparse_%s' % op) python_op = getattr(operator, op) self._op_tests(sparse_op, python_op) + f.__name__ = 'test_%s' % op return f + for op in check_ops: f = make_nanoptestf(op) g = make_optestf(op) @@ -395,6 +366,6 @@ def f(self): del g if __name__ == '__main__': - import nose + import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 64ffd7482ee34..6add74f778404 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1,45 +1,43 @@ # pylint: disable-msg=E1101,W0612 import operator -from datetime import datetime -import functools - -import nose +import nose # noqa from numpy import nan import numpy as np import pandas as pd -dec = np.testing.dec -from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal, - assert_frame_equal, assert_panel_equal, assertRaisesRegexp, +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_index_equal, assert_frame_equal, + assert_panel_equal, assertRaisesRegexp, assert_numpy_array_equal, assert_attr_equal) from numpy.testing import assert_equal -from pandas import Series, DataFrame, bdate_range, Panel, MultiIndex +from pandas import Series, DataFrame, bdate_range, Panel from pandas.core.datetools import BDay from pandas.core.index import Index from pandas.tseries.index import DatetimeIndex import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm -from pandas.compat import range, lrange, StringIO, lrange +from pandas.compat import range, StringIO, lrange from pandas import compat from pandas.tools.util import cartesian_product import pandas.sparse.frame as spf from pandas._sparse import BlockIndex, IntIndex -from pandas.sparse.api import (SparseSeries, - SparseDataFrame, SparsePanel, - SparseArray) -from pandas.tests.frame.test_misc_api import ( - SafeForSparse as SparseFrameTests) +from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel +from pandas.tests.frame.test_misc_api import (SafeForSparse as + SparseFrameTests) + +from pandas.sparse.tests.test_array import assert_sp_array_equal import pandas.tests.test_panel as test_panel import pandas.tests.test_series as test_series -from pandas.sparse.tests.test_array import assert_sp_array_equal +dec = np.testing.dec + def _test_data1(): # nan-based @@ -76,7 +74,7 @@ def _test_data2_zero(): def assert_sp_series_equal(a, b, exact_indices=True, check_names=True): - assert(a.index.equals(b.index)) + assert (a.index.equals(b.index)) assert_sp_array_equal(a, b) if check_names: assert_attr_equal('name', a, b) @@ -88,7 +86,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): compare dense representations """ for col, series in compat.iteritems(left): - assert(col in right) + assert (col in right) # trade-off? if exact_indices: @@ -96,32 +94,29 @@ def assert_sp_frame_equal(left, right, exact_indices=True): else: assert_series_equal(series.to_dense(), right[col].to_dense()) - assert_almost_equal(left.default_fill_value, - right.default_fill_value) + assert_almost_equal(left.default_fill_value, right.default_fill_value) # do I care? # assert(left.default_kind == right.default_kind) for col in right: - assert(col in left) + assert (col in left) def assert_sp_panel_equal(left, right, exact_indices=True): for item, frame in compat.iteritems(left): - assert(item in right) + assert (item in right) # trade-off? assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) - assert_almost_equal(left.default_fill_value, - right.default_fill_value) - assert(left.default_kind == right.default_kind) + assert_almost_equal(left.default_fill_value, right.default_fill_value) + assert (left.default_kind == right.default_kind) for item in right: - assert(item in left) + assert (item in left) -class TestSparseSeries(tm.TestCase, - test_series.CheckNameIntegration): +class TestSparseSeries(tm.TestCase, test_series.CheckNameIntegration): _multiprocess_can_split_ = True def setUp(self): @@ -162,7 +157,7 @@ def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): - pd.SparseTimeSeries(1,index=pd.date_range('20130101',periods=3)) + pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) def test_construct_DataFrame_with_sp_series(self): # it works! @@ -227,7 +222,7 @@ def test_dense_to_sparse(self): self.assertEqual(ziseries.name, self.zbseries.name) def test_to_dense_preserve_name(self): - assert(self.bseries.name is not None) + assert (self.bseries.name is not None) result = self.bseries.to_dense() self.assertEqual(result.name, self.bseries.name) @@ -364,10 +359,10 @@ def _check_getitem(sp, dense): # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) - # API change 1/6/2012 - # negative getitem works - # for i in xrange(len(dense)): - # assert_almost_equal(sp[-i], dense[-i]) + # API change 1/6/2012 + # negative getitem works + # for i in xrange(len(dense)): + # assert_almost_equal(sp[-i], dense[-i]) _check_getitem(self.bseries, self.bseries.to_dense()) _check_getitem(self.btseries, self.btseries.to_dense()) @@ -453,8 +448,9 @@ def test_setitem(self): def test_setslice(self): self.bseries[5:10] = 7. - assert_series_equal(self.bseries[5:10].to_dense(), Series( - 7., index=range(5, 10), name=self.bseries.name)) + assert_series_equal(self.bseries[5:10].to_dense(), + Series(7., index=range(5, 10), + name=self.bseries.name)) def test_operators(self): def _check_op(a, b, op): @@ -515,8 +511,8 @@ def _check_inplace_op(iop, op): inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow'] for op in inplace_ops: - _check_inplace_op( - getattr(operator, "i%s" % op), getattr(operator, op)) + _check_inplace_op(getattr(operator, "i%s" % op), + getattr(operator, op)) def test_abs(self): s = SparseSeries([1, 2, -3], name='x') @@ -560,7 +556,8 @@ def _compare_with_series(sps, new_index): # corner cases sp = SparseSeries([], index=[]) - sp_zero = SparseSeries([], index=[], fill_value=0) + # TODO: sp_zero is not used anywhere...remove? + sp_zero = SparseSeries([], index=[], fill_value=0) # noqa _compare_with_series(sp, np.arange(10)) # with copy=False @@ -589,7 +586,8 @@ def _check(values, index1, index2, fill_value): assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts - expected = expected.reindex(int_indices2).fillna(fill_value) + # TODO: expected is not used anywhere...remove? + expected = expected.reindex(int_indices2).fillna(fill_value) # noqa def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) @@ -614,16 +612,17 @@ def _check_all(values, first, second): _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) - first_series = SparseSeries(values1, sparse_index=IntIndex(length, - index1), + first_series = SparseSeries(values1, + sparse_index=IntIndex(length, index1), fill_value=nan) with tm.assertRaisesRegexp(TypeError, 'new index must be a SparseIndex'): - reindexed = first_series.sparse_reindex(0) + reindexed = first_series.sparse_reindex(0) # noqa def test_repr(self): - bsrepr = repr(self.bseries) - isrepr = repr(self.iseries) + # TODO: These aren't used + bsrepr = repr(self.bseries) # noqa + isrepr = repr(self.iseries) # noqa def test_iter(self): pass @@ -670,8 +669,7 @@ def _compare_all(obj): _compare_all(nonna2) def test_dropna(self): - sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], - fill_value=0) + sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() @@ -696,16 +694,14 @@ def _check_matches(indices, expected): homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): - assert(v.sp_index.equals(expected)) + assert (v.sp_index.equals(expected)) - indices1 = [BlockIndex(10, [2], [7]), - BlockIndex(10, [1, 6], [3, 4]), + indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10])] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) - indices2 = [BlockIndex(10, [2], [7]), - BlockIndex(10, [2], [7])] + indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) @@ -727,8 +723,7 @@ def test_fill_value_corner(self): self.assertTrue(np.isnan(result.fill_value)) def test_shift(self): - series = SparseSeries([nan, 1., 2., 3., nan, nan], - index=np.arange(6)) + series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assertIsNot(shifted, series) @@ -772,23 +767,29 @@ def test_combine_first(self): assert_sp_series_equal(result, result2) assert_sp_series_equal(result, expected) -class TestSparseHandlingMultiIndexes(tm.TestCase): +class TestSparseHandlingMultiIndexes(tm.TestCase): def setUp(self): - miindex = pd.MultiIndex.from_product([["x","y"], ["10","20"]],names=['row-foo', 'row-bar']) - micol = pd.MultiIndex.from_product([['a','b','c'], ["1","2"]],names=['col-foo', 'col-bar']) - dense_multiindex_frame = pd.DataFrame(index=miindex, columns=micol).sortlevel().sortlevel(axis=1) + miindex = pd.MultiIndex.from_product( + [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar']) + micol = pd.MultiIndex.from_product( + [['a', 'b', 'c'], ["1", "2"]], names=['col-foo', 'col-bar']) + dense_multiindex_frame = pd.DataFrame( + index=miindex, columns=micol).sortlevel().sortlevel(axis=1) self.dense_multiindex_frame = dense_multiindex_frame.fillna(value=3.14) def test_to_sparse_preserve_multiindex_names_columns(self): - sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse().copy() - assert_index_equal(sparse_multiindex_frame.columns,self.dense_multiindex_frame.columns) + sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse() + sparse_multiindex_frame = sparse_multiindex_frame.copy() + assert_index_equal(sparse_multiindex_frame.columns, + self.dense_multiindex_frame.columns) def test_round_trip_preserve_multiindex_names(self): sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse() round_trip_multiindex_frame = sparse_multiindex_frame.to_dense() - assert_frame_equal(self.dense_multiindex_frame,round_trip_multiindex_frame, - check_column_type=True,check_names=True) + assert_frame_equal(self.dense_multiindex_frame, + round_trip_multiindex_frame, check_column_type=True, + check_names=True) class TestSparseSeriesScipyInteraction(tm.TestCase): @@ -813,8 +814,9 @@ def setUp(self): ss.index.names = [3, 0, 1, 2] self.sparse_series.append(ss) - ss = pd.Series( - [nan] * 12, index=cartesian_product((range(3), range(4)))).to_sparse() + ss = pd.Series([ + nan + ] * 12, index=cartesian_product((range(3), range(4)))).to_sparse() for k, v in zip([(0, 0), (1, 2), (1, 3)], [3.0, 1.0, 2.0]): ss[k] = v self.sparse_series.append(ss) @@ -827,7 +829,8 @@ def setUp(self): ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4))) self.coo_matrices.append(scipy.sparse.coo_matrix( ([3.0, 1.0, 2.0], ([0, 1, 1], [0, 0, 1])), shape=(3, 2))) - self.ils = [[(1, 2), (1, 1), (2, 1)], [(1, 1), (1, 2), (2, 1)], [(1, 2, 'a'), (1, 1, 'b'), (2, 1, 'b')]] + self.ils = [[(1, 2), (1, 1), (2, 1)], [(1, 1), (1, 2), (2, 1)], + [(1, 2, 'a'), (1, 1, 'b'), (2, 1, 'b')]] self.jls = [[('a', 0), ('a', 1), ('b', 0), ('b', 1)], [0, 1]] def test_to_coo_text_names_integer_row_levels_nosort(self): @@ -839,14 +842,16 @@ def test_to_coo_text_names_integer_row_levels_nosort(self): def test_to_coo_text_names_integer_row_levels_sort(self): ss = self.sparse_series[0] kwargs = {'row_levels': [0, 1], - 'column_levels': [2, 3], 'sort_labels': True} + 'column_levels': [2, 3], + 'sort_labels': True} result = (self.coo_matrices[1], self.ils[1], self.jls[0]) self._run_test(ss, kwargs, result) def test_to_coo_text_names_text_row_levels_nosort_col_level_single(self): ss = self.sparse_series[0] kwargs = {'row_levels': ['A', 'B', 'C'], - 'column_levels': ['D'], 'sort_labels': False} + 'column_levels': ['D'], + 'sort_labels': False} result = (self.coo_matrices[2], self.ils[2], self.jls[1]) self._run_test(ss, kwargs, result) @@ -880,8 +885,8 @@ def test_to_coo_bad_ilevel(self): self.assertRaises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) def test_to_coo_duplicate_index_entries(self): - ss = pd.concat( - [self.sparse_series[0], self.sparse_series[0]]).to_sparse() + ss = pd.concat([self.sparse_series[0], + self.sparse_series[0]]).to_sparse() self.assertRaises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) def test_from_coo_dense_index(self): @@ -945,8 +950,7 @@ def setUp(self): values[np.isnan(values)] = 0 self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], - default_fill_value=0, - index=self.dates) + default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 @@ -1005,11 +1009,10 @@ def test_constructor(self): # init dict with different index idx = self.frame.index[:5] - cons = SparseDataFrame(self.frame, index=idx, - columns=self.frame.columns, - default_fill_value=self.frame.default_fill_value, - default_kind=self.frame.default_kind, - copy=True) + cons = SparseDataFrame( + self.frame, index=idx, columns=self.frame.columns, + default_fill_value=self.frame.default_fill_value, + default_kind=self.frame.default_kind, copy=True) reindexed = self.frame.reindex(idx) assert_sp_frame_equal(cons, reindexed, exact_indices=False) @@ -1023,8 +1026,7 @@ def test_constructor_ndarray(self): sp = SparseDataFrame(self.frame.values) # 1d - sp = SparseDataFrame(self.data['A'], index=self.dates, - columns=['A']) + sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A']) assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) # raise on level argument @@ -1032,12 +1034,10 @@ def test_constructor_ndarray(self): level=1) # wrong length index / columns - assertRaisesRegexp( - ValueError, "^Index length", SparseDataFrame, self.frame.values, - index=self.frame.index[:-1]) - assertRaisesRegexp( - ValueError, "^Column length", SparseDataFrame, self.frame.values, - columns=self.frame.columns[:-1]) + assertRaisesRegexp(ValueError, "^Index length", SparseDataFrame, + self.frame.values, index=self.frame.index[:-1]) + assertRaisesRegexp(ValueError, "^Column length", SparseDataFrame, + self.frame.values, columns=self.frame.columns[:-1]) # GH 9272 def test_constructor_empty(self): @@ -1068,13 +1068,15 @@ def test_constructor_from_series(self): y = Series(np.random.randn(10000), name='b') x2 = x.astype(float) x2.ix[:9998] = np.NaN - x_sparse = x2.to_sparse(fill_value=np.NaN) + # TODO: x_sparse is unused...fix + x_sparse = x2.to_sparse(fill_value=np.NaN) # noqa # Currently fails too with weird ufunc error # df1 = SparseDataFrame([x_sparse, y]) y.ix[:9998] = 0 - y_sparse = y.to_sparse(fill_value=0) + # TODO: y_sparse is unsused...fix + y_sparse = y.to_sparse(fill_value=0) # noqa # without sparse value raises error # df2 = SparseDataFrame([x2_sparse, y]) @@ -1129,6 +1131,13 @@ def test_density(self): df = SparseSeries([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6]) self.assertEqual(df.density, 0.7) + df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C': np.arange(10), + 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) + + self.assertEqual(df.density, 0.75) + def test_sparse_to_dense(self): pass @@ -1196,11 +1205,10 @@ def _compare_to_dense(a, b, da, db, op): # time series operations - series = [frame['A'], frame['B'], - frame['C'], frame['D'], - frame['A'].reindex(fidx[:7]), - frame['A'].reindex(fidx[::2]), - SparseSeries([], index=[])] + series = [frame['A'], frame['B'], frame['C'], frame['D'], + frame['A'].reindex(fidx[:7]), frame['A'].reindex(fidx[::2]), + SparseSeries( + [], index=[])] for op in opnames: _compare_to_dense(frame, frame[::2], frame.to_dense(), @@ -1208,30 +1216,24 @@ def _compare_to_dense(a, b, da, db, op): # 2304, no auto-broadcasting for i, s in enumerate(series): - f = lambda a, b: getattr(a,op)(b,axis='index') - _compare_to_dense(frame, s, frame.to_dense(), - s.to_dense(), f) + f = lambda a, b: getattr(a, op)(b, axis='index') + _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f) # rops are not implemented - #_compare_to_dense(s, frame, s.to_dense(), - # frame.to_dense(), f) + # _compare_to_dense(s, frame, s.to_dense(), + # frame.to_dense(), f) - # cross-sectional operations - series = [frame.xs(fidx[0]), - frame.xs(fidx[3]), - frame.xs(fidx[5]), - frame.xs(fidx[7]), - frame.xs(fidx[5])[:2]] + # cross-sectional operations + series = [frame.xs(fidx[0]), frame.xs(fidx[3]), frame.xs(fidx[5]), + frame.xs(fidx[7]), frame.xs(fidx[5])[:2]] for op in ops: for s in series: - _compare_to_dense(frame, s, frame.to_dense(), - s, op) - _compare_to_dense(s, frame, s, - frame.to_dense(), op) + _compare_to_dense(frame, s, frame.to_dense(), s, op) + _compare_to_dense(s, frame, s, frame.to_dense(), op) # it works! - result = self.frame + self.frame.ix[:, ['A', 'B']] + result = self.frame + self.frame.ix[:, ['A', 'B']] # noqa def test_op_corners(self): empty = self.empty + self.empty @@ -1330,8 +1332,8 @@ def _check_frame(frame): # insert SparseSeries differently-indexed to_insert = frame['A'][::2] frame['E'] = to_insert - expected = to_insert.to_dense().reindex( - frame.index).fillna(to_insert.fill_value) + expected = to_insert.to_dense().reindex(frame.index).fillna( + to_insert.fill_value) result = frame['E'].to_dense() assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'E') @@ -1344,8 +1346,8 @@ def _check_frame(frame): # insert Series differently-indexed to_insert = frame['A'].to_dense()[::2] frame['G'] = to_insert - expected = to_insert.reindex( - frame.index).fillna(frame.default_fill_value) + expected = to_insert.reindex(frame.index).fillna( + frame.default_fill_value) expected.name = 'G' assert_series_equal(frame['G'].to_dense(), expected) @@ -1374,18 +1376,21 @@ def _check_frame(frame): def test_setitem_corner(self): self.frame['a'] = self.frame['B'] - assert_sp_series_equal(self.frame['a'], self.frame['B'], check_names=False) + assert_sp_series_equal(self.frame['a'], self.frame['B'], + check_names=False) def test_setitem_array(self): arr = self.frame['B'] self.frame['E'] = arr - assert_sp_series_equal(self.frame['E'], self.frame['B'], check_names=False) + assert_sp_series_equal(self.frame['E'], self.frame['B'], + check_names=False) self.frame['F'] = arr[:-1] index = self.frame.index[:-1] assert_sp_series_equal(self.frame['E'].reindex(index), - self.frame['F'].reindex(index), check_names=False) + self.frame['F'].reindex(index), + check_names=False) def test_delitem(self): A = self.frame['A'] @@ -1422,8 +1427,8 @@ def test_append(self): a = self.frame.ix[:5, :3] b = self.frame.ix[5:] appended = a.append(b) - assert_sp_frame_equal( - appended.ix[:, :3], self.frame.ix[:, :3], exact_indices=False) + assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3], + exact_indices=False) def test_apply(self): applied = self.frame.apply(np.sqrt) @@ -1456,8 +1461,8 @@ def test_apply_nonuq(self): # df.T breaks df = df_orig.T.to_sparse() - rs = df.apply(lambda s: s[0], axis=0) - # no non-unique columns supported in sparse yet + rs = df.apply(lambda s: s[0], axis=0) # noqa + # TODO: no non-unique columns supported in sparse yet # assert_series_equal(rs, xp) def test_applymap(self): @@ -1486,8 +1491,8 @@ def test_fillna(self): def test_rename(self): # just check this works - renamed = self.frame.rename(index=str) - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + renamed = self.frame.rename(index=str) # noqa + renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa def test_corr(self): res = self.frame.corr() @@ -1497,7 +1502,7 @@ def test_describe(self): self.frame['foo'] = np.nan self.frame.get_dtype_counts() str(self.frame) - desc = self.frame.describe() + desc = self.frame.describe() # noqa def test_join(self): left = self.frame.ix[:, ['A', 'B']] @@ -1508,16 +1513,16 @@ def test_join(self): right = self.frame.ix[:, ['B', 'D']] self.assertRaises(Exception, left.join, right) - with tm.assertRaisesRegexp(ValueError, 'Other Series must have a name'): - self.frame.join(Series(np.random.randn(len(self.frame)), - index=self.frame.index)) + with tm.assertRaisesRegexp(ValueError, + 'Other Series must have a name'): + self.frame.join(Series( + np.random.randn(len(self.frame)), index=self.frame.index)) def test_reindex(self): - def _check_frame(frame): index = frame.index sidx = index[::2] - sidx2 = index[:5] + sidx2 = index[:5] # noqa sparse_result = frame.reindex(sidx) dense_result = frame.to_dense().reindex(sidx) @@ -1527,8 +1532,8 @@ def _check_frame(frame): dense_result) sparse_result2 = sparse_result.reindex(index) - dense_result2 = dense_result.reindex( - index).fillna(frame.default_fill_value) + dense_result2 = dense_result.reindex(index).fillna( + frame.default_fill_value) assert_frame_equal(sparse_result2.to_dense(), dense_result2) # propagate CORRECT fill value @@ -1581,14 +1586,6 @@ def test_take(self): expected = self.frame.reindex(columns=['B', 'A', 'C']) assert_sp_frame_equal(result, expected) - def test_density(self): - df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10), - 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) - - self.assertEqual(df.density, 0.75) - def test_to_dense(self): def _check(frame): dense_dm = frame.to_dense() @@ -1598,7 +1595,7 @@ def _check(frame): def test_stack_sparse_frame(self): def _check(frame): - dense_frame = frame.to_dense() + dense_frame = frame.to_dense() # noqa wp = Panel.from_dict({'foo': frame}) from_dense_lp = wp.to_frame() @@ -1620,6 +1617,7 @@ def _check(frame): transposed = frame.T untransposed = transposed.T assert_sp_frame_equal(frame, untransposed) + self._check_all(_check) def test_shift(self): @@ -1700,7 +1698,7 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1 ** df + result = 1**df r1 = result.take([0], 1)['A'] r2 = result['A'] @@ -1717,21 +1715,21 @@ def test_as_blocks(self): def test_nan_columnname(self): # GH 8822 - nan_colname = DataFrame(Series(1.0,index=[0]),columns=[nan]) + nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan]) nan_colname_sparse = nan_colname.to_sparse() self.assertTrue(np.isnan(nan_colname_sparse.columns[0])) def _dense_series_compare(s, f): result = f(s) - assert(isinstance(result, SparseSeries)) + assert (isinstance(result, SparseSeries)) dense_result = f(s.to_dense()) assert_series_equal(result.to_dense(), dense_result) def _dense_frame_compare(frame, f): result = f(frame) - assert(isinstance(frame, SparseDataFrame)) + assert (isinstance(frame, SparseDataFrame)) dense_result = f(frame.to_dense()).fillna(frame.default_fill_value) assert_frame_equal(result.to_dense(), dense_result) @@ -1769,8 +1767,7 @@ def panel_data3(): }, index=index) -class TestSparsePanel(tm.TestCase, - test_panel.SafeForLongAndSparse, +class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse, test_panel.SafeForSparse): _multiprocess_can_split_ = True @@ -1800,7 +1797,8 @@ def test_constructor(self): self.assertRaises(ValueError, SparsePanel, self.data_dict, items=['Item0', 'ItemA', 'ItemB']) with tm.assertRaisesRegexp(TypeError, - "input must be a dict, a 'list' was passed"): + "input must be a dict, a 'list' was " + "passed"): SparsePanel(['a', 'b', 'c']) # deprecation GH11157 @@ -1909,8 +1907,7 @@ def test_reindex(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): def _compare_with_dense(swp, items, major, minor): - swp_re = swp.reindex(items=items, major=major, - minor=minor) + swp_re = swp.reindex(items=items, major=major, minor=minor) dwp_re = swp.to_dense().reindex(items=items, major=major, minor=minor) assert_panel_equal(swp_re.to_dense(), dwp_re) @@ -1918,8 +1915,7 @@ def _compare_with_dense(swp, items, major, minor): _compare_with_dense(self.panel, self.panel.items[:2], self.panel.major_axis[::2], self.panel.minor_axis[::2]) - _compare_with_dense(self.panel, None, - self.panel.major_axis[::2], + _compare_with_dense(self.panel, None, self.panel.major_axis[::2], self.panel.minor_axis[::2]) self.assertRaises(ValueError, self.panel.reindex) @@ -1935,16 +1931,17 @@ def _compare_with_dense(swp, items, major, minor): def test_operators(self): def _check_ops(panel): - def _dense_comp(op): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): dense = panel.to_dense() sparse_result = op(panel) dense_result = op(dense) assert_panel_equal(sparse_result.to_dense(), dense_result) def _mixed_comp(op): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): result = op(panel, panel.to_dense()) expected = op(panel.to_dense(), panel.to_dense()) assert_panel_equal(result, expected) @@ -1992,8 +1989,9 @@ def _dense_comp(sparse): _dense_comp(self.panel) + if __name__ == '__main__': - import nose + import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)