diff --git a/examples/4D Example 2.pdf b/examples/4D Example 2.pdf new file mode 100755 index 0000000000000..7190b17772562 Binary files /dev/null and b/examples/4D Example 2.pdf differ diff --git a/examples/HDFStore.ipynb b/examples/HDFStore.ipynb new file mode 100755 index 0000000000000..52b8e45d70d5d --- /dev/null +++ b/examples/HDFStore.ipynb @@ -0,0 +1,450 @@ +{ + "metadata": { + "name": "HDFStore" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extension of the HDFStore Documenation to show Table usage \n", + "Excellect original docs @ http://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables \n", + " \n", + "HDFStore supports a Table object to enable HDF5 storage of appendable DataFrames and Panels" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import pandas\n", + "import numpy as np\n", + "import os" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "if os.path.exists('store.h5'):\n", + " os.remove('store.h5')\n", + "store = pandas.io.pytables.HDFStore('store.h5')\n", + "store" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 2, + "text": [ + "\n", + "File path: store.h5\n", + "Empty" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "p = pandas.Panel(np.random.randn(2, 30, 4), items=['Item1', 'Item2'],\n", + " major_axis=pandas.date_range('1/1/2000', periods=30),\n", + " minor_axis=['A', 'B', 'C', 'D'])\n", + "p" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 3, + "text": [ + "\n", + "Dimensions: 2 (items) x 30 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: A to D" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# regular store and retreive of a panel (.put is equivalent to store['mypanel'] = p, .get to store['mypanel'])\n", + "store.put('mypanel',p)\n", + "mypanel = store.get('mypanel')\n", + "mypanel" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 18, + "text": [ + "\n", + "Dimensions: 2 (items) x 30 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: A to D" + ] + } + ], + "prompt_number": 18 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# slice out 2 panels from the major_axis\n", + "p1 = p.ix[:,0:10,:]\n", + "p2 = p.ix[:,10:,:]\n", + "print p1\n", + "print p2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Dimensions: 2 (items) x 10 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-10 00:00:00\n", + "Minor axis: A to D\n", + "\n", + "Dimensions: 2 (items) x 20 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-11 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: A to D\n" + ] + } + ], + "prompt_number": 19 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# store panels via append\n", + "store.append('appendpanel',p1)\n", + "store.append('appendpanel',p2)\n", + "store" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 6, + "text": [ + "\n", + "File path: store.h5\n", + "appendpanel Panel (Table)\n", + "mypanel Panel " + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# retrieve\n", + "appendpanel = store.select('appendpanel')\n", + "appendpanel" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 7, + "text": [ + "\n", + "Dimensions: 2 (items) x 30 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: A to D" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# let's select on the major_axis\n", + "import datetime\n", + "slicepanel = store.select('appendpanel',\n", + " where = [ dict(field = 'index', op = '>=', value = datetime.datetime(2000,1,9)), dict(field = 'index', op = '<=', value = datetime.datetime(2000,1,25)) ])\n", + "slicepanel" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 8, + "text": [ + "\n", + "Dimensions: 2 (items) x 17 (major) x 4 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-09 00:00:00 to 2000-01-25 00:00:00\n", + "Minor axis: A to D" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# here we select on the minor axis\n", + "slicepanel2 = store.select('appendpanel', where = [ dict(field = 'column', value = ['A','B']) ])\n", + "slicepanel2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 9, + "text": [ + "\n", + "Dimensions: 2 (items) x 30 (major) x 2 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: A to B" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# delete operations\n", + "store.remove('appendpanel',where = [ dict(field = 'column', value = ['A','B']) ])\n", + "store.select('appendpanel')\n", + " " + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 10, + "text": [ + "\n", + "Dimensions: 2 (items) x 30 (major) x 2 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2000-01-30 00:00:00\n", + "Minor axis: C to D" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "notes & caveats\n", + " \n", + "seleting on both major & minor axis is possible (extend the where clause) \n", + "selection by items (top level panel dimension) is not possible; you always get all of the items in the returned panel \n", + "in general it is best to store your panel with the most frequently selected dimension in the minor axis and a time/date like dimension in the major axis \n", + "mixed type items are currently not supported (e.g. all of your data must be floats) \n", + "currently the major_axis is NOT indexed by pytables (as there is a bug in the pytables spec for this)\n", + " \n", + "performance is quite good on the sub-selections and tables sizes can be quite large \n", + "in fact you can often append panels objects to create a giant table on disk, then subselect out as needed (e.g. write once - read many)\n", + " \n", + "in general I compress tables after writing them (using blosc compression) - much slower if you compress as you go \n", + " \n", + "If I am deleting a lot of data, I will either rebuild the table (erase and rewrite), \n", + " or use the pytables utilities ptrepack to rewrite the file (and also can change compression methods) \n", + " \n", + "once a table is written, the items are fixed for that table; you can append only items that match exactly those on disk \n", + "(if you want to change this, then rebuild - e.g. erase and write a new table) \n", + " \n", + "duplicate items can be written, but are filtered out in selection (with the last items being selected; thus a table is unique on major, minor pairs)\n", + "\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "large_p = pandas.Panel(np.random.randn(2, 1000, 1000), items=['Item1', 'Item2'],\n", + " major_axis=pandas.date_range('1/1/2000', periods=1000), minor_axis = [ 'E%s' % i for i in xrange(1000) ])\n", + "large_p" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 11, + "text": [ + "\n", + "Dimensions: 2 (items) x 1000 (major) x 1000 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2002-09-26 00:00:00\n", + "Minor axis: E0 to E999" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "if os.path.exists('large_store.h5'):\n", + " os.remove('large_store.h5')\n", + "large_store = pandas.io.pytables.HDFStore('large_store.h5')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 12 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "large_store.append('large',large_p)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 13 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# we basically wrote a structure of major_axis x minor_axis rows (with items as a numpy array of x items) (less if nans in the data)\n", + "print large_store.handle.root.large.table" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "/large/table (Table(1000000,)) ''\n" + ] + } + ], + "prompt_number": 17 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "### on a slow machine!\n", + "\n", + "def f():\n", + " return large_store.select('large',where = [ dict(field = 'index', op = '>=', value = datetime.datetime(2002,9,12)) ])\n", + "print f(), \"\\n\" \n", + "\n", + "print \"selection by major_axis\"\n", + "%timeit f()\n", + "\n", + "print \"\\n\"\n", + "def f():\n", + " return large_store.select('large',where = [ dict(field = 'column', value = [ \"E%s\" % i for i in xrange(100) ]) ])\n", + "print f(), \"\\n\"\n", + "\n", + "print \"selection by minor axis\"\n", + "%timeit f()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Dimensions: 2 (items) x 15 (major) x 1000 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2002-09-12 00:00:00 to 2002-09-26 00:00:00\n", + "Minor axis: E0 to E999 \n", + "\n", + "selection by major_axis\n", + "1 loops, best of 3: 607 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + "\n", + "\n", + "Dimensions: 2 (items) x 1000 (major) x 100 (minor)\n", + "Items: Item1 to Item2\n", + "Major axis: 2000-01-01 00:00:00 to 2002-09-26 00:00:00\n", + "Minor axis: E0 to E99" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " \n", + "\n", + "selection by minor axis\n", + "1 loops, best of 3: 1.1 s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 16 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 15 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/examples/HDFStore.pdf b/examples/HDFStore.pdf new file mode 100755 index 0000000000000..b4ad497c76e1d Binary files /dev/null and b/examples/HDFStore.pdf differ diff --git a/pandas/core/api.py b/pandas/core/api.py old mode 100644 new mode 100755 index 8cf3b7f4cbda4..f8a0dcdb07ba4 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -14,6 +14,7 @@ from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame from pandas.core.panel import Panel +from pandas.core.fdpanel import FDPanel from pandas.core.groupby import groupby from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape) diff --git a/pandas/core/fdpanel.py b/pandas/core/fdpanel.py new file mode 100755 index 0000000000000..1a7e4fe191809 --- /dev/null +++ b/pandas/core/fdpanel.py @@ -0,0 +1,568 @@ +""" FDPanel: a 4-d dict like collection of panels """ + +import operator +import sys +import numpy as np + +from pandas.core.common import (PandasError, _mut_exclusive, + _try_sort, _default_index, _infer_dtype) +from pandas.core.index import (Index, MultiIndex, _ensure_index, + _get_combined_index) +from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels, _is_null_slice +from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.panel import Panel +from pandas.util import py3compat +from pandas.util.decorators import deprecate, Appender, Substitution +import pandas.core.common as com +import pandas.core.nanops as nanops +import pandas.lib as lib + +class FDPanel(Panel): + _AXIS_NUMBERS = { + 'labels' : 0, + 'items' : 1, + 'major_axis' : 2, + 'minor_axis' : 3 + } + + _AXIS_ALIASES = { + 'major' : 'major_axis', + 'minor' : 'minor_axis' + } + + _AXIS_NAMES = { + 0 : 'labels', + 1 : 'items', + 2 : 'major_axis', + 3 : 'minor_axis' + } + + # major + _default_stat_axis = 2 + + labels = lib.AxisProperty(0) + items = lib.AxisProperty(1) + major_axis = lib.AxisProperty(2) + minor_axis = lib.AxisProperty(3) + + def __init__(self, data=None, labels=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): + """ + Represents a 4 dimensonal structured + + Parameters + ---------- + data : ndarray (labels x items x major x minor), or dict of Panels + + labels : Index or array-like : axis=0 + items : Index or array-like : axis=1 + major_axis : Index or array-like: axis=2 + minor_axis : Index or array-like: axis=3 + + dtype : dtype, default None + Data type to force, otherwise infer + copy : boolean, default False + Copy data from inputs. Only affects DataFrame / 2d ndarray input + """ + if data is None: + data = {} + + passed_axes = [labels,items, major_axis, minor_axis] + axes = None + if isinstance(data, BlockManager): + if any(x is not None for x in passed_axes): + axes = [x if x is not None else y + for x, y in zip(passed_axes, data.axes)] + mgr = data + elif isinstance(data, dict): + mgr = self._init_dict(data, passed_axes, dtype=dtype) + copy = False + dtype = None + elif isinstance(data, (np.ndarray, list)): + mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) + copy = False + dtype = None + else: # pragma: no cover + raise PandasError('FDPanel constructor not properly called!') + + NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) + + @classmethod + def from_dict(cls, data, intersect=False, orient='items', dtype=None): + """ not supporting intersect/orient arguments """ + return cls(data, dtype = dtype) + + def _init_dict(self, data, axes, dtype=None): + labels, items, major, minor = axes + + # prefilter if labels passed + if labels is not None: + labels = _ensure_index(labels) + data = dict((k, v) for k, v in data.iteritems() if k in labels) + else: + labels = Index(_try_sort(data.keys())) + + for k, v in data.iteritems(): + if isinstance(v, dict): + data[k] = Panel(v) + + if items is None: + items = _extract_axis(data, axis=0) + + if major is None: + major = _extract_axis(data, axis=1) + + if minor is None: + minor = _extract_axis(data, axis=2) + + axes = [labels, items, major, minor] + reshaped_data = data.copy() # shallow + + label_shape = len(items), len(major), len(minor) + for label in labels: + v = values = data.get(label) + if v is None: + values = np.empty(item_shape, dtype=dtype) + values.fill(np.nan) + elif isinstance(v, Panel): + v = v.reindex(items=items, major_axis=major, minor_axis=minor, copy=False) + if dtype is not None: + v = v.astype(dtype) + values = v.values + reshaped_data[label] = values + + # segregates dtypes and forms blocks matching to columns + blocks = form_blocks(reshaped_data, axes) + mgr = BlockManager(blocks, axes).consolidate() + return mgr + + def _init_matrix(self, data, axes, dtype=None, copy=False): + values = _prep_ndarray(data, copy=copy) + + if dtype is not None: + try: + values = values.astype(dtype) + except Exception: + raise ValueError('failed to cast to %s' % dtype) + + shape = values.shape + fixed_axes = [] + for i, ax in enumerate(axes): + if ax is None: + ax = _default_index(shape[i]) + else: + ax = _ensure_index(ax) + fixed_axes.append(ax) + + items = fixed_axes[0] + block = make_block(values, items, items) + return BlockManager([block], fixed_axes) + + @property + def shape(self): + return len(self.labels), len(self.items), len(self.major_axis), len(self.minor_axis) + + def __array_wrap__(self, result): + return self._constructor(result, + labels =self.labels, + items =self.items, + major_axis=self.major_axis, + minor_axis=self.minor_axis, copy=False) + + #---------------------------------------------------------------------- + # Magic methods + + def __repr__(self): + class_name = str(self.__class__) + + L, I, N, K = len(self.labels), len(self.items), len(self.major_axis), len(self.minor_axis) + + dims = 'Dimensions: %d (labels) x %d (items) x %d (major) x %d (minor)' % (L, I, N, K) + + if len(self.major_axis) > 0: + major = 'Major axis: %s to %s' % (self.major_axis[0], + self.major_axis[-1]) + else: + major = 'Major axis: None' + + if len(self.minor_axis) > 0: + minor = 'Minor axis: %s to %s' % (self.minor_axis[0], + self.minor_axis[-1]) + else: + minor = 'Minor axis: None' + + if len(self.items) > 0: + items = 'Items: %s to %s' % (self.items[0], self.items[-1]) + else: + items = 'Items: None' + + if len(self.labels) > 0: + labels= 'Labels: %s to %s' % (self.labels[0], self.labels[-1]) + else: + labels = 'Labels: None' + + output = '%s\n%s\n%s\n%s\n%s\n%s' % (class_name, dims, labels, items, major, minor) + + return output + + def __iter__(self): + return iter(self.labels) + + def iteritems(self): + for label in self.labels: + yield label, self[label] + + iterkv = iteritems + + #---------------------------------------------------------------------- + # Getting and setting elements + + def get_value(self, label, item, major, minor): + """ + Quickly retrieve single value at (labe, item, major, minor) location + + Parameters + ---------- + label : label (fdpanel item) + item : item label (fdpanel item) + major : major axis label (fdpanel item row) + minor : minor axis label (fdpanel item column) + + Returns + ------- + value : scalar value + """ + # hm, two layers to the onion + p = self._get_item_cache(label) + return p.get_value(item, major, minor) + + def set_value(self, label, item, major, minor, value): + """ + Quickly set single value at (labe, item, major, minor) location + + Parameters + ---------- + label : label (fdpanel item) + item : item label (fdpanel item) + major : major axis label (fdpanel item row) + minor : minor axis label (fdpanel item column) + + Returns + ------- + label : FDPanel + If label combo is contained, will be reference to calling Panel, + otherwise a new object + """ + try: + p = self._get_item_cache(label) + p.set_value(item, major, minor, value) + return self + except KeyError: + ax1, ax2, ax3, ax4 = self._expand_axes((label,item, major, minor)) + result = self.reindex(labels = ax1, items=ax2, major=ax3, minor=ax4, copy=False) + + likely_dtype = com._infer_dtype(value) + made_bigger = not np.array_equal(ax1, self.labels) + # how to make this logic simpler? + if made_bigger: + com._possibly_cast_item(result, label, likely_dtype) + + return result.set_value(label, item, major, minor, value) + + def _box_item_values(self, key, values): + return Panel(values, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis) + + def __getattr__(self, name): + """After regular attribute access, try looking up the name of an item. + This allows simpler access to items for interactive use.""" + if name in self.labels: + return self[name] + raise AttributeError("'%s' object has no attribute '%s'" % + (type(self).__name__, name)) + + def __setitem__(self, key, value): + _, I, N, K = self.shape + if isinstance(value, Panel): + value = value.reindex(items =self.items, + major_axis=self.major_axis, + minor_axis=self.minor_axis) + mat = value.values + elif isinstance(value, np.ndarray): + assert(value.shape == (I, N, K)) + mat = np.asarray(value) + elif np.isscalar(value): + dtype = _infer_dtype(value) + mat = np.empty((I, N, K), dtype=dtype) + mat.fill(value) + + mat = mat.reshape((1, I, N, K)) + NDFrame._set_item(self, key, mat) + + def _get_plane_axes(self, axis): + axis = self._get_axis_name(axis) + + if axis == 'major_axis': + items = self.labels + major = self.items + minor = self.minor_axis + elif axis == 'minor_axis': + items = self.labels + major = self.items + minor = self.major_axis + elif axis == 'items': + items = self.labels + major = self.major_axis + minor = self.minor_axis + elif axis == 'labels': + items = self.items + major = self.major_axis + minor = self.minor_axis + + return items, major, minor + + def _reduce(self, op, axis=0, skipna=True): + axis_name = self._get_axis_name(axis) + axis_number = self._get_axis_number(axis_name) + f = lambda x: op(x, axis=axis_number, skipna=skipna) + + result = f(self.values) + + items, major, minor = self._get_plane_axes(axis_name) + return Panel(result, items=items, major_axis=major, minor_axis=minor) + + def conform(self, panel, axis='labels'): + """ + Conform input Panel to align with chosen axis pair. + + Parameters + ---------- + panel : Panel + axis : {'labels', 'items', 'major', 'minor'} + + Returns + ------- + Panel + """ + items, major, minor = self._get_plane_axes(axis) + return panel.reindex(items=items,major_axis=major,minor_axis=minor) + + def reindex(self, labels=None, major=None, items=None, minor=None, method=None, + major_axis=None, minor_axis=None, copy=True): + """ + Conform fdpanel to new axis or axes + + Parameters + ---------- + labels: Index or sequence, default None + items : Index or sequence, default None + major : Index or sequence, default None + Can also use 'major_axis' keyword + minor : Index or sequence, default None + Can also use 'minor_axis' keyword + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + copy : boolean, default True + Return a new object, even if the passed indexes are the same + + Returns + ------- + FDPanel (new object) + """ + result = self + + major = _mut_exclusive(major, major_axis) + minor = _mut_exclusive(minor, minor_axis) + + if major is not None: + result = result._reindex_axis(major, method, 2, copy) + + if minor is not None: + result = result._reindex_axis(minor, method, 3, copy) + + if items is not None: + result = result._reindex_axis(items, method, 1, copy) + + if labels is not None: + result = result._reindex_axis(labels, method, 0, copy) + + if result is self and copy: + raise ValueError('Must specify at least one axis') + + return result + + def reindex_like(self, other, method=None): + """ + Reindex FDPanel to match indices of another Panel + + Parameters + ---------- + other : FDPanel + method : string or None + + Returns + ------- + reindexed : FDPanel + """ + # todo: object columns + return self.reindex(labels=other.labels, major=other.major_axis, items=other.items, minor=other.minor_axis, method=method) + + def swapaxes(self, axis1='major', axis2='minor'): + """ + Interchange axes and swap values axes appropriately + + Returns + ------- + y : FDPanel (new object) + """ + i = self._get_axis_number(axis1) + j = self._get_axis_number(axis2) + + if i == j: + raise ValueError('Cannot specify the same axis') + + mapping = {i : j, j : i} + + new_axes = (self._get_axis(mapping.get(k, k)) + for k in range(4)) + new_values = self.values.swapaxes(i, j).copy() + + return self._constructor(new_values, *new_axes) + + def xs(self, key, axis=2, copy=True): + """ + Return slice of fdpanel along selected axis + + Parameters + ---------- + key : object + Label + axis : {'labels', 'items', 'major', 'minor}, default 1/'major' + + Returns + ------- + y : Panel + """ + if axis == 0: + data = self[key] + if copy: + data = data.copy() + return data + + self._consolidate_inplace() + axis_number = self._get_axis_number(axis) + new_data = self._data.xs(key, axis=axis_number, copy=copy) + return Panel(new_data) + + def apply(self, func, axis='major'): + """ + Apply + + Parameters + ---------- + func : numpy function + Signature should match numpy.{sum, mean, var, std} etc. + axis : {'labels', 'major', 'minor', 'items'} + fill_value : boolean, default True + Replace NaN values with specified first + + Returns + ------- + result : Panel or FDPanel + """ + i = self._get_axis_number(axis) + result = np.apply_along_axis(func, i, self.values) + return self._wrap_result(result, axis=axis) + + def _combine(self, other, func, axis=0): + if isinstance(other, FDPanel): + return self._combine_fdpanel(other, func) + elif isinstance(other, Panel): + raise NotImplementedError + elif isinstance(other, DataFrame): + raise NotImplementedError + elif np.isscalar(other): + new_values = func(self.values, other) + return self._constructor(new_values, self.labels, self.items, self.major_axis, + self.minor_axis) + + def _combine_fdpanel(self, other, func): + labels = self.labels + other.labels + items = self.items + other.items + major = self.major_axis + other.major_axis + minor = self.minor_axis + other.minor_axis + + # could check that everything's the same size, but forget it + this = self.reindex(labels=labels, items=items, major=major, minor=minor) + other = other.reindex(labels=labels, items=items, major=major, minor=minor) + + result_values = func(this.values, other.values) + + return self._constructor(result_values, labels, items, major, minor) + + def _wrap_result(self, result, axis): + axis = self._get_axis_name(axis) + items, major, minor = self._get_plane_axes(axis) + + return Panel(result, items=items, major_axis=major, minor_axis=minor) + + + ### remove operations #### + def major_xs(self, *args, **kwargs): + raise NotImplementedError + def minor_xs(self, *args, **kwargs): + raise NotImplementedError + def to_frame(self, *args, **kwargs): + raise NotImplementedError + def to_excel(self, *args, **kwargs): + raise NotImplementedError + +def _prep_ndarray(values, copy=True): + if not isinstance(values, np.ndarray): + values = np.asarray(values) + # NumPy strings are a pain, convert to object + if issubclass(values.dtype.type, basestring): + values = np.array(values, dtype=object, copy=True) + else: + if copy: + values = values.copy() + assert(values.ndim == 4) + return values + +def _extract_axis(data, axis=0, intersect=False): + from pandas.core.index import _union_indexes + + if len(data) == 0: + index = Index([]) + elif len(data) > 0: + raw_lengths = [] + indexes = [] + + have_raw_arrays = False + have_panels = False + + for v in data.values(): + if isinstance(v, Panel): + have_panels = True + indexes.append(v._get_axis(axis)) + else: + have_raw_arrays = True + raw_lengths.append(v.shape[axis]) + + if have_panels: + index = _get_combined_index(indexes, intersect=intersect) + + if have_raw_arrays: + lengths = list(set(raw_lengths)) + if len(lengths) > 1: + raise ValueError('ndarrays must match shape on axis %d' % axis) + + if have_panels: + assert(lengths[0] == len(index)) + else: + index = Index(np.arange(lengths[0])) + + return _ensure_index(index) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4dce75a29992b..a7423a1a58da8 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -167,11 +167,16 @@ def _multi_take_opportunity(self, tup): def _multi_take(self, tup): from pandas.core.frame import DataFrame from pandas.core.panel import Panel + from pandas.core.fdpanel import FDPanel if isinstance(self.obj, DataFrame): index = self._convert_for_reindex(tup[0], axis=0) columns = self._convert_for_reindex(tup[1], axis=1) return self.obj.reindex(index=index, columns=columns) + elif isinstance(self.obj, FDPanel): + conv = [self._convert_for_reindex(x, axis=i) + for i, x in enumerate(tup)] + return self.obj.reindex(labels=tup[0],items=tup[1], major=tup[2], minor=tup[3]) elif isinstance(self.obj, Panel): conv = [self._convert_for_reindex(x, axis=i) for i, x in enumerate(tup)] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py old mode 100644 new mode 100755 index d116337c80e4d..26b1fd134ee26 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -10,7 +10,7 @@ import numpy as np from pandas import ( - Series, TimeSeries, DataFrame, Panel, Index, MultiIndex, Int64Index + Series, TimeSeries, DataFrame, Panel, FDPanel, Index, MultiIndex, Int64Index ) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex @@ -160,6 +160,11 @@ class HDFStore(object): >>> store['foo'] = bar # write to HDF5 >>> bar = store['foo'] # retrieve >>> store.close() + + >>> store = HDFStore('test.h5') + >>> store.put(fdp) # write a FDPanel/dict to the store + >>> fdp = store.select() # read back a FDPanel (or Panel) + """ _quiet = False @@ -284,18 +289,22 @@ def get(self, key): except (exc_type, AttributeError): raise KeyError('No object named %s in the file' % key) - def select(self, key, where=None): + def select(self, key=None, where=None): """ Retrieve pandas object stored in file, optionally based on where criteria Parameters ---------- - key : object + key : object, optional where : list, optional Must be a list of dict objects of the following forms. Selection can be performed on the 'index' or 'column' fields. + + Object Selection (equivalent to passing a key) + {'field' : 'key', + 'value' : ['v1',v2']} Comparison op {'field' : 'index', @@ -311,13 +320,44 @@ def select(self, key, where=None): 'value' : [v1, v2, v3]} """ - group = getattr(self.handle.root, key, None) - if 'table' not in group._v_attrs.pandas_type: - raise Exception('can only select on objects written as tables') - if group is not None: - return self._read_group(group, where) - def put(self, key, value, table=False, append=False, + # see if we have a key in the where, otherwise, try for all keys + if key is None: + key = Selection(where = where).keys + if not len(key): + key = self.keys() + + if not isinstance(key,list): + key = [ key ] + + # construct a dict of the results (of only valid keys) + d = dict() + for k in key: + group = getattr(self.handle.root, k, None) + if group is not None: + self._has_selection_criteria(group, where) + d[k] = self._read_group(group, where) + + values = d.values() + + # nothing retrieved, return None + if len(values) == 0: + return None + + # if we have only a single key, return that object directly + elif len(values) == 1: + return values[0] + + # try to return a consolidated object from d (if the nodes are all Frames, return a Panel, if all panels, return a FDPanel, else return a dict) + elif all([ isinstance(o,DataFrame) for o in values ]): + return Panel(d) + + elif all([ isinstance(o,Panel) for o in values ]): + return FDPanel(d) + + return d + + def put(self, key, value=None, table=False, append=False, compression=None): """ Store object in HDFStore @@ -325,7 +365,7 @@ def put(self, key, value, table=False, append=False, Parameters ---------- key : object - value : {Series, DataFrame, Panel} + value : {Series, DataFrame, Panel, FDPanel} table : boolean, default False Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of @@ -338,8 +378,21 @@ def put(self, key, value, table=False, append=False, If None, the compression settings specified in the ctor will be used. """ - self._write_to_group(key, value, table=table, append=append, - comp=compression) + + def _put(k, v): + self._write_to_group(k, v, table=table, append=append, comp=compression) + + # do we have a dict or FDPanel? + if value is None: + if isinstance(key, (dict, FDPanel)): + for k, v in key.iteritems(): + _put(k, v) + else: + raise Exception("value must be passed to store a non-dict like object in put") + + # group put + else: + _put(key, value) def _get_handler(self, op, kind): return getattr(self,'_%s_%s' % (op, kind)) @@ -359,15 +412,25 @@ def remove(self, key, where=None): Parameters ---------- key : object + + Returns + ------- + number of rows removed + """ - if where is None: - self.handle.removeNode(self.handle.root, key, recursive=True) - else: - group = getattr(self.handle.root, key, None) - if group is not None: - self._delete_from_table(group, where) + n = None + group = getattr(self.handle.root, key, None) + if group is not None: + if where is None: + if self._has_selection_criteria(group,where): + n = group.nrows + self.handle.removeNode(self.handle.root, key, recursive=True) + else: + self._has_selection_criteria(group,where) + n = self._delete_from_table(group, where) + return n - def append(self, key, value): + def append(self, key, value = None): """ Append to Table in file. Node must already exist and be Table format. @@ -375,14 +438,28 @@ def append(self, key, value): Parameters ---------- key : object - value : {Series, DataFrame, Panel} + value : {Series, DataFrame, Panel, FDPanel} Notes ----- Does *not* check if data being appended overlaps with existing data in the table, so be careful """ - self._write_to_group(key, value, table=True, append=True) + + def _append(k, v): + self._write_to_group(k, v, table=True, append = True) + + # do we have a dict or FDPanel? + if value is None: + if isinstance(key, (dict, FDPanel)): + for k, v in key.iteritems(): + _append(k, v) + else: + raise Exception("value must be passed to store a non-dict like object in append") + + # group append + else: + _append(key, value) def _write_to_group(self, key, value, table=False, append=False, comp=None): @@ -758,6 +835,12 @@ def _write_table(self, group, items=None, index=None, columns=None, # the table must already exist table = getattr(group, 'table', None) + # check for backwards incompatibility + if append: + existing_kind = table._v_attrs.index_kind + if existing_kind != index_kind: + raise Exception("incompatible kind in index [%s - %s]" % (existing_kind,index_kind)) + # add kinds table._v_attrs.index_kind = index_kind table._v_attrs.columns_kind = cols_kind @@ -796,6 +879,13 @@ def _write_table(self, group, items=None, index=None, columns=None, pass raise + def _has_selection_criteria(self, group, where): + """ only raise an exception if where are not capable of processing it (e.g. a table) """ + if where is None: return False + if 'table' not in group._v_attrs.pandas_type: + raise Exception('can only select on objects written as tables') + return False + def _read_group(self, group, where=None): kind = group._v_attrs.pandas_type kind = _LEGACY_MAP.get(kind, kind) @@ -905,12 +995,25 @@ def _delete_from_table(self, group, where = None): s.select_coords() # delete the rows in reverse order - l = list(s.values) - l.reverse() - for c in l: - table.removeRows(c) - self.handle.flush() - return len(s.values) + l = list(s.values) + ln = len(l) + + if ln: + + # if we can do a consecutive removal - do it! + if l[0]+ln-1 == l[-1]: + table.removeRows(start = l[0], stop = l[-1]+1) + + # one by one + else: + l.reverse() + for c in l: + table.removeRows(c) + + self.handle.flush() + + return ln + def _convert_index(index): if isinstance(index, DatetimeIndex): @@ -1051,6 +1154,10 @@ class Selection(object): table : tables.Table where : list of dicts of the following form + Object Selection + {'field' : 'key', + 'value' : ['v1',v2']} + Comparison op {'field' : 'index', 'op' : '>=', @@ -1064,30 +1171,45 @@ class Selection(object): {'field' : 'index', 'value' : [v1, v2, v3]} """ - def __init__(self, table, where=None, index_kind=None): - self.table = table - self.where = where - self.index_kind = index_kind + def __init__(self, table=None, where=None, index_kind=None): + self.table = table + self.where = where + self.index_kind = index_kind self.column_filter = None self.the_condition = None - self.conditions = [] - self.values = None + self.conditions = [] + self.values = None + self.keys = [] if where: self.generate(where) def generate(self, where): + + if where is None: return + if not isinstance(where, list): + where = [ where ] + # and condictions for c in where: op = c.get('op',None) value = c['value'] field = c['field'] + # index selection if field == 'index' and self.index_kind == 'datetime64': val = lib.Timestamp(value).value self.conditions.append('(%s %s %s)' % (field,op,val)) elif field == 'index' and isinstance(value, datetime): value = time.mktime(value.timetuple()) self.conditions.append('(%s %s %s)' % (field,op,value)) + + # create keys + elif field == 'key': + if not isinstance(value, list): + value = [ value ] + self.keys.extend(value) + + # column selection else: self.generate_multiple_conditions(op,value,field) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py old mode 100644 new mode 100755 index 29d3b45f26a9e..6c9e10dc559c6 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -8,7 +8,7 @@ from datetime import datetime import numpy as np -from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, +from pandas import (Series, DataFrame, Panel, FDPanel, MultiIndex, bdate_range, date_range, Index) from pandas.io.pytables import HDFStore, get_store import pandas.util.testing as tm @@ -190,11 +190,14 @@ def test_remove_crit(self): 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) - self.store.remove('wp', where=[crit2]) + n = self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) + # removed rows + self.assert_(n == len(expected.major_axis)*len(expected.minor_axis)) + def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) @@ -501,6 +504,109 @@ def test_overwrite_node(self): tm.assert_series_equal(self.store['a'], ts) + def test_fdpanel_select_from_multiple_panels(self): + + try: + store = HDFStore(self.scratchpath) + + p1 = tm.makePanel() + p2 = tm.makePanel() + store.put('p1', p1) + store.put('p2', p2) + + # individually + result = store.select('p1') + tm.assert_panel_equal(result, p1) + result = store.select('p2') + tm.assert_panel_equal(result, p2) + + # as a fdpanel + fdp = FDPanel(dict(p1 = p1, p2 = p2)) + result = store.select() + tm.assert_fdpanel_equal(result, fdp) + + finally: + store.close() + os.remove(self.scratchpath) + + def test_fdpanel_put(self): + + try: + store = HDFStore(self.scratchpath) + + # fdpanel + fdp = FDPanel(dict(p1 = tm.makePanel(), p2 = tm.makePanel())) + store.put(fdp) + result = store.select() + tm.assert_fdpanel_equal(result, fdp) + + # store dict (retrieve as a FDPanel though) + d = dict(p1 = tm.makePanel(), p2 = tm.makePanel()) + store.put(d) + result = store.select() + tm.assert_fdpanel_equal(result, FDPanel(d)) + + # test value=None (but not a corresponding dict like key) + p = tm.makePanel() + self.assertRaises(Exception, self.store.put, p) + + finally: + store.close() + os.remove(self.scratchpath) + + def test_fdpanel_append(self): + + # regular append + df = tm.makeTimeDataFrame() + expected = FDPanel(dict(l1 = dict(ItemA = df, ItemB = df), l2 = dict(ItemC = df, ItemD = df))) + try: + store = HDFStore(self.scratchpath) + store.append(expected.reindex(major = expected.major_axis[0:10])) + store.append(expected.reindex(major = expected.major_axis[10:])) + result = store.select() + tm.assert_fdpanel_equal(result, expected) + finally: + store.close() + os.remove(self.scratchpath) + + # appending a panel to another panel (in the same tree - fails because items dont' match + p1 = tm.makePanel() + p2 = p1.rename_axis(dict(ItemA = 'ItemD', ItemB = 'ItemE', ItemC = 'ItemF')) + fdp1 = FDPanel(dict(l1 = p1)) + fdp2 = FDPanel(dict(l1 = p2)) + try: + store = HDFStore(self.scratchpath) + store.append(fdp1) + self.assertRaises(Exception, store.append, fdp2) + finally: + store.close() + os.remove(self.scratchpath) + + def test_panel_select_from_multiple_frames(self): + + try: + store = HDFStore(self.scratchpath) + + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + store.put('df1', df1) + store.put('df2', df2) + + # individually + result = store.select('df1') + tm.assert_frame_equal(result, df1) + result = store.select('df2') + tm.assert_frame_equal(result, df2) + + # as a panel + p = Panel(dict(df1 = df1, df2 = df2)) + result = store.select() + tm.assert_panel_equal(result, p) + + finally: + store.close() + os.remove(self.scratchpath) + def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) @@ -520,6 +626,35 @@ def test_panel_select(self): expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) + def test_panel_select_infer_key(self): + wp = tm.makePanel() + self.store.put('wp', wp, table=True) + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = { + 'field' : 'index', + 'op' : '>=', + 'value' : date + } + crit2 = { + 'field' : 'column', + 'value' : ['A', 'D'] + } + crit3 = { + 'field' : 'key', + 'value' : 'wp', + } + + result = self.store.select(where = [crit1, crit2, crit3]) + expected = wp.truncate(before=date).reindex(minor=['A', 'D']) + tm.assert_panel_equal(result, expected) + + def test_panel_select_no_key_specified(self): + wp = tm.makePanel() + self.store.put('wp', wp, table=True) + result = self.store.select() + tm.assert_panel_equal(result, wp) + def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) @@ -538,8 +673,12 @@ def test_frame_select(self): 'field' : 'column', 'value' : 'A' } + crit4 = { + 'field' : 'key', + 'value' : ['frame'] + } - result = self.store.select('frame', [crit1, crit2]) + result = self.store.select(where = [crit1, crit2,crit4]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/test_fdpanel.py b/pandas/tests/test_fdpanel.py new file mode 100755 index 0000000000000..83e6db0c82194 --- /dev/null +++ b/pandas/tests/test_fdpanel.py @@ -0,0 +1,1051 @@ +from datetime import datetime +import os +import operator +import unittest +import nose + +import numpy as np + +from pandas import DataFrame, Index, isnull, notnull, pivot, MultiIndex +from pandas.core.datetools import bday +from pandas.core.frame import group_agg +from pandas.core.panel import Panel +from pandas.core.fdpanel import FDPanel +from pandas.core.series import remove_na +import pandas.core.common as com +import pandas.core.panel as panelmod +from pandas.util import py3compat +from pandas.io.parsers import (ExcelFile, ExcelWriter) + +from pandas.util.testing import (assert_panel_equal, + assert_fdpanel_equal, + assert_frame_equal, + assert_series_equal, + assert_almost_equal) +import pandas.util.testing as tm + +def add_nans(fdp): + for l, label in enumerate(fdp.labels): + panel = fdp[label] + tm.add_nans(panel) + +class SafeForLongAndSparse(object): + + def test_repr(self): + foo = repr(self.fdpanel) + + def test_iter(self): + tm.equalContents(list(self.fdpanel), self.fdpanel.labels) + + def test_count(self): + f = lambda s: notnull(s).sum() + self._check_stat_op('count', f, obj=self.fdpanel, has_skipna=False) + + def test_sum(self): + self._check_stat_op('sum', np.sum) + + def test_mean(self): + self._check_stat_op('mean', np.mean) + + def test_prod(self): + self._check_stat_op('prod', np.prod) + + def test_median(self): + def wrapper(x): + if isnull(x).any(): + return np.nan + return np.median(x) + + self._check_stat_op('median', wrapper) + + def test_min(self): + self._check_stat_op('min', np.min) + + def test_max(self): + self._check_stat_op('max', np.max) + + def test_skew(self): + from scipy.stats import skew + def this_skew(x): + if len(x) < 3: + return np.nan + return skew(x, bias=False) + self._check_stat_op('skew', this_skew) + + # def test_mad(self): + # f = lambda x: np.abs(x - x.mean()).mean() + # self._check_stat_op('mad', f) + + def test_var(self): + def alt(x): + if len(x) < 2: + return np.nan + return np.var(x, ddof=1) + self._check_stat_op('var', alt) + + def test_std(self): + def alt(x): + if len(x) < 2: + return np.nan + return np.std(x, ddof=1) + self._check_stat_op('std', alt) + + # def test_skew(self): + # from scipy.stats import skew + + # def alt(x): + # if len(x) < 3: + # return np.nan + # return skew(x, bias=False) + + # self._check_stat_op('skew', alt) + + def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): + if obj is None: + obj = self.fdpanel + + # # set some NAs + # obj.ix[5:10] = np.nan + # obj.ix[15:20, -2:] = np.nan + + f = getattr(obj, name) + + if has_skipna: + def skipna_wrapper(x): + nona = remove_na(x) + if len(nona) == 0: + return np.nan + return alternative(nona) + + def wrapper(x): + return alternative(np.asarray(x)) + + for i in range(obj.ndim): + result = f(axis=i, skipna=False) + assert_panel_equal(result, obj.apply(wrapper, axis=i)) + else: + skipna_wrapper = alternative + wrapper = alternative + + for i in range(obj.ndim): + result = f(axis=i) + assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) + + self.assertRaises(Exception, f, axis=obj.ndim) + +class SafeForSparse(object): + + @classmethod + def assert_panel_equal(cls, x, y): + assert_panel_equal(x, y) + + @classmethod + def assert_fdpanel_equal(cls, x, y): + assert_fdpanel_equal(x, y) + + def test_get_axis(self): + assert(self.fdpanel._get_axis(0) is self.fdpanel.labels) + assert(self.fdpanel._get_axis(1) is self.fdpanel.items) + assert(self.fdpanel._get_axis(2) is self.fdpanel.major_axis) + assert(self.fdpanel._get_axis(3) is self.fdpanel.minor_axis) + + def test_set_axis(self): + new_labels = Index(np.arange(len(self.fdpanel.labels))) + new_items = Index(np.arange(len(self.fdpanel.items))) + new_major = Index(np.arange(len(self.fdpanel.major_axis))) + new_minor = Index(np.arange(len(self.fdpanel.minor_axis))) + + # ensure propagate to potentially prior-cached items too + label = self.fdpanel['l1'] + self.fdpanel.labels = new_labels + + if hasattr(self.fdpanel, '_item_cache'): + self.assert_('l1' not in self.fdpanel._item_cache) + self.assert_(self.fdpanel.labels is new_labels) + + self.fdpanel.major_axis = new_major + self.assert_(self.fdpanel[0].major_axis is new_major) + self.assert_(self.fdpanel.major_axis is new_major) + + self.fdpanel.minor_axis = new_minor + self.assert_(self.fdpanel[0].minor_axis is new_minor) + self.assert_(self.fdpanel.minor_axis is new_minor) + + def test_get_axis_number(self): + self.assertEqual(self.fdpanel._get_axis_number('labels'), 0) + self.assertEqual(self.fdpanel._get_axis_number('items'), 1) + self.assertEqual(self.fdpanel._get_axis_number('major'), 2) + self.assertEqual(self.fdpanel._get_axis_number('minor'), 3) + + def test_get_axis_name(self): + self.assertEqual(self.fdpanel._get_axis_name(0), 'labels') + self.assertEqual(self.fdpanel._get_axis_name(1), 'items') + self.assertEqual(self.fdpanel._get_axis_name(2), 'major_axis') + self.assertEqual(self.fdpanel._get_axis_name(3), 'minor_axis') + + #def test_get_plane_axes(self): + # # what to do here? + + # index, columns = self.panel._get_plane_axes('items') + # index, columns = self.panel._get_plane_axes('major_axis') + # index, columns = self.panel._get_plane_axes('minor_axis') + # index, columns = self.panel._get_plane_axes(0) + + def test_truncate(self): + raise nose.SkipTest + + #dates = self.panel.major_axis + #start, end = dates[1], dates[5] + + #trunced = self.panel.truncate(start, end, axis='major') + #expected = self.panel['ItemA'].truncate(start, end) + + #assert_frame_equal(trunced['ItemA'], expected) + + #trunced = self.panel.truncate(before=start, axis='major') + #expected = self.panel['ItemA'].truncate(before=start) + + #assert_frame_equal(trunced['ItemA'], expected) + + #trunced = self.panel.truncate(after=end, axis='major') + #expected = self.panel['ItemA'].truncate(after=end) + + #assert_frame_equal(trunced['ItemA'], expected) + + # XXX test other axes + + def test_arith(self): + self._test_op(self.fdpanel, operator.add) + self._test_op(self.fdpanel, operator.sub) + self._test_op(self.fdpanel, operator.mul) + self._test_op(self.fdpanel, operator.truediv) + self._test_op(self.fdpanel, operator.floordiv) + self._test_op(self.fdpanel, operator.pow) + + self._test_op(self.fdpanel, lambda x, y: y + x) + self._test_op(self.fdpanel, lambda x, y: y - x) + self._test_op(self.fdpanel, lambda x, y: y * x) + self._test_op(self.fdpanel, lambda x, y: y / x) + self._test_op(self.fdpanel, lambda x, y: y ** x) + + self.assertRaises(Exception, self.fdpanel.__add__, self.fdpanel['l1']) + + @staticmethod + def _test_op(fdpanel, op): + result = op(fdpanel, 1) + assert_panel_equal(result['l1'], op(fdpanel['l1'], 1)) + + def test_keys(self): + tm.equalContents(self.fdpanel.keys(), self.fdpanel.labels) + + def test_iteritems(self): + """Test fdpanel.iteritems(), aka fdpanel.iterkv()""" + # just test that it works + for k, v in self.fdpanel.iterkv(): + pass + + self.assertEqual(len(list(self.fdpanel.iterkv())), + len(self.fdpanel.labels)) + + def test_combineFDPanel(self): + result = self.fdpanel.add(self.fdpanel) + self.assert_fdpanel_equal(result, self.fdpanel * 2) + + def test_neg(self): + self.assert_fdpanel_equal(-self.fdpanel, self.fdpanel * -1) + + def test_select(self): + p = self.fdpanel + + # select labels + result = p.select(lambda x: x in ('l1', 'l3'), axis='labels') + expected = p.reindex(labels=['l1','l3']) + self.assert_fdpanel_equal(result, expected) + + # select items + result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') + expected = p.reindex(items=['ItemA', 'ItemC']) + self.assert_fdpanel_equal(result, expected) + + # select major_axis + result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') + new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] + expected = p.reindex(major=new_major) + self.assert_fdpanel_equal(result, expected) + + # select minor_axis + result = p.select(lambda x: x in ('D', 'A'), axis=3) + expected = p.reindex(minor=['A', 'D']) + self.assert_fdpanel_equal(result, expected) + + # corner case, empty thing + result = p.select(lambda x: x in ('foo',), axis='items') + self.assert_fdpanel_equal(result, p.reindex(items=[])) + + def test_get_value(self): + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + result = self.panel.get_value(item, mjr, mnr) + expected = self.panel[item][mnr][mjr] + assert_almost_equal(result, expected) + + def test_abs(self): + result = self.fdpanel.abs() + expected = np.abs(self.fdpanel) + self.assert_fdpanel_equal(result, expected) + + p = self.fdpanel['l1'] + result = p.abs() + expected = np.abs(p) + assert_panel_equal(result, expected) + + df = p['ItemA'] + result = df.abs() + expected = np.abs(df) + assert_frame_equal(result, expected) + +class CheckIndexing(object): + + + def test_getitem(self): + self.assertRaises(Exception, self.fdpanel.__getitem__, 'ItemQ') + + def test_delitem_and_pop(self): + expected = self.fdpanel['l2'] + result = self.fdpanel.pop('l2') + assert_panel_equal(expected, result) + self.assert_('l2' not in self.fdpanel.labels) + + del self.fdpanel['l3'] + self.assert_('l3' not in self.fdpanel.labels) + self.assertRaises(Exception, self.fdpanel.__delitem__, 'l3') + + values = np.empty((4, 4, 4, 4)) + values[0] = 0 + values[1] = 1 + values[2] = 2 + values[3] = 3 + + fdpanel = FDPanel(values, range(4), range(4), range(4), range(4)) + + # did we delete the right row? + + fdpanelc = fdpanel.copy() + del fdpanelc[0] + assert_panel_equal(fdpanelc[1], fdpanel[1]) + assert_panel_equal(fdpanelc[2], fdpanel[2]) + assert_panel_equal(fdpanelc[3], fdpanel[3]) + + fdpanelc = fdpanel.copy() + del fdpanelc[1] + assert_panel_equal(fdpanelc[0], fdpanel[0]) + assert_panel_equal(fdpanelc[2], fdpanel[2]) + assert_panel_equal(fdpanelc[3], fdpanel[3]) + + fdpanelc = fdpanel.copy() + del fdpanelc[2] + assert_panel_equal(fdpanelc[1], fdpanel[1]) + assert_panel_equal(fdpanelc[0], fdpanel[0]) + assert_panel_equal(fdpanelc[3], fdpanel[3]) + + fdpanelc = fdpanel.copy() + del fdpanelc[3] + assert_panel_equal(fdpanelc[1], fdpanel[1]) + assert_panel_equal(fdpanelc[2], fdpanel[2]) + assert_panel_equal(fdpanelc[0], fdpanel[0]) + + def test_setitem(self): + ## LongPanel with one item + #lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + #self.assertRaises(Exception, self.panel.__setitem__, + # 'ItemE', lp) + + # Panel + p = Panel(dict(ItemA = self.fdpanel['l1']['ItemA'][2:].filter(items=['A', 'B']))) + self.fdpanel['l4'] = p + self.fdpanel['l5'] = p + + p2 = self.fdpanel['l4'] + + assert_panel_equal(p, p2.reindex(items = p.items, + major_axis = p.major_axis, + minor_axis = p.minor_axis)) + + # scalar + self.fdpanel['lG'] = 1 + self.fdpanel['lE'] = True + self.assert_(self.fdpanel['lG'].values.dtype == np.int64) + self.assert_(self.fdpanel['lE'].values.dtype == np.bool_) + + # object dtype + self.fdpanel['lQ'] = 'foo' + self.assert_(self.fdpanel['lQ'].values.dtype == np.object_) + + # boolean dtype + self.fdpanel['lP'] = self.fdpanel['l1'] > 0 + self.assert_(self.fdpanel['lP'].values.dtype == np.bool_) + + def test_setitem_ndarray(self): + raise nose.SkipTest + # from pandas import DateRange, datetools + + # timeidx = DateRange(start=datetime(2009,1,1), + # end=datetime(2009,12,31), + # offset=datetools.MonthEnd()) + # lons_coarse = np.linspace(-177.5, 177.5, 72) + # lats_coarse = np.linspace(-87.5, 87.5, 36) + # P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse) + # data = np.random.randn(72*36).reshape((72,36)) + # key = datetime(2009,2,28) + # P[key] = data# + + # assert_almost_equal(P[key].values, data) + + def test_major_xs(self): + raise nose.SkipTest + # ref = self.panel['ItemA'] + + # idx = self.panel.major_axis[5] + # xs = self.panel.major_xs(idx) + + # assert_series_equal(xs['ItemA'], ref.xs(idx)) + + # # not contained + # idx = self.panel.major_axis[0] - bday + # self.assertRaises(Exception, self.panel.major_xs, idx) + + def test_major_xs_mixed(self): + raise nose.SkipTest + # self.panel['ItemD'] = 'foo' + # xs = self.panel.major_xs(self.panel.major_axis[0]) + # self.assert_(xs['ItemA'].dtype == np.float64) + # self.assert_(xs['ItemD'].dtype == np.object_) + + def test_minor_xs(self): + raise nose.SkipTest + # ref = self.panel['ItemA'] + + # idx = self.panel.minor_axis[1] + # xs = self.panel.minor_xs(idx) + + # assert_series_equal(xs['ItemA'], ref[idx]) + + # # not contained + # self.assertRaises(Exception, self.panel.minor_xs, 'E') + + def test_minor_xs_mixed(self): + raise nose.SkipTest + # self.panel['ItemD'] = 'foo' + + # xs = self.panel.minor_xs('D') + # self.assert_(xs['ItemA'].dtype == np.float64) + # self.assert_(xs['ItemD'].dtype == np.object_) + + def test_xs(self): + l1 = self.fdpanel.xs('l1', axis=0) + expected = self.fdpanel['l1'] + assert_panel_equal(l1, expected) + + # not view by default + l1.values[:] = np.nan + self.assert_(not np.isnan(self.fdpanel['l1'].values).all()) + + # but can get view + l1_view = self.fdpanel.xs('l1', axis=0, copy=False) + l1_view.values[:] = np.nan + self.assert_(np.isnan(self.fdpanel['l1'].values).all()) + + # mixed-type + self.fdpanel['strings'] = 'foo' + self.assertRaises(Exception, self.fdpanel.xs, 'D', axis=2, + copy=False) + + def test_getitem_fancy_labels(self): + fdp = self.fdpanel + + labels = fdp.labels[[1, 0]] + items = fdp.items[[1, 0]] + dates = fdp.major_axis[::2] + cols = ['D', 'C', 'F'] + + # all 4 specified + assert_fdpanel_equal(fdp.ix[labels, items, dates, cols], + fdp.reindex(labels=labels, items=items, major=dates, minor=cols)) + + # 3 specified + assert_fdpanel_equal(fdp.ix[:, items, dates, cols], + fdp.reindex(items=items, major=dates, minor=cols)) + + # 2 specified + assert_fdpanel_equal(fdp.ix[:, :, dates, cols], + fdp.reindex(major=dates, minor=cols)) + + assert_fdpanel_equal(fdp.ix[:, items, :, cols], + fdp.reindex(items=items, minor=cols)) + + assert_fdpanel_equal(fdp.ix[:, items, dates, :], + fdp.reindex(items=items, major=dates)) + + # only 1 + assert_fdpanel_equal(fdp.ix[:, items, :, :], + fdp.reindex(items=items)) + + assert_fdpanel_equal(fdp.ix[:, :, dates, :], + fdp.reindex(major=dates)) + + assert_fdpanel_equal(fdp.ix[:, :, :, cols], + fdp.reindex(minor=cols)) + + def test_getitem_fancy_slice(self): + pass + + def test_getitem_fancy_ints(self): + pass + + def test_getitem_fancy_xs(self): + self.assertRaises(NotImplementedError, self.fdpanel.major_xs) + self.assertRaises(NotImplementedError, self.fdpanel.minor_xs) + + def test_getitem_fancy_xs_check_view(self): + raise nose.SkipTest + # item = 'ItemB' + # date = self.panel.major_axis[5] + # col = 'C' + + # # make sure it's always a view + # NS = slice(None, None) + + # # DataFrames + # comp = assert_frame_equal + # self._check_view(item, comp) + # self._check_view((item, NS), comp) + # self._check_view((item, NS, NS), comp) + # self._check_view((NS, date), comp) + # self._check_view((NS, date, NS), comp) + # self._check_view((NS, NS, 'C'), comp) + + # # Series + # comp = assert_series_equal + # self._check_view((item, date), comp) + # self._check_view((item, date, NS), comp) + # self._check_view((item, NS, 'C'), comp) + # self._check_view((NS, date, 'C'), comp)# + + #def _check_view(self, indexer, comp): + # cp = self.panel.copy() + # obj = cp.ix[indexer] + # obj.values[:] = 0 + # self.assert_((obj.values == 0).all()) + # comp(cp.ix[indexer].reindex_like(obj), obj) + + def test_get_value(self): + for label in self.fdpanel.labels: + for item in self.fdpanel.items: + for mjr in self.fdpanel.major_axis[::2]: + for mnr in self.fdpanel.minor_axis: + result = self.fdpanel.get_value(label, item, mjr, mnr) + expected = self.fdpanel[label][item][mnr][mjr] + assert_almost_equal(result, expected) + + def test_set_value(self): + for label in self.fdpanel.labels: + for item in self.fdpanel.items: + for mjr in self.fdpanel.major_axis[::2]: + for mnr in self.fdpanel.minor_axis: + self.fdpanel.set_value(label, item, mjr, mnr, 1.) + assert_almost_equal(self.fdpanel[label][item][mnr][mjr], 1.) + + # resize + res = self.fdpanel.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) + self.assert_(isinstance(res, FDPanel)) + self.assert_(res is not self.fdpanel) + self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) + + res3 = self.fdpanel.set_value('l4', 'ItemE', 'foobar', 'baz', 5) + self.assert_(com.is_float_dtype(res3['l4'].values)) + +class TestFDPanel(unittest.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): + + @classmethod + def assert_fdpanel_equal(cls,x, y): + assert_fdpanel_equal(x, y) + + def setUp(self): + self.fdpanel = tm.makeFDPanel() + add_nans(self.fdpanel) + + def test_constructor(self): + # with BlockManager + fdp = FDPanel(self.fdpanel._data) + self.assert_(fdp._data is self.fdpanel._data) + + fdp = FDPanel(self.fdpanel._data, copy=True) + self.assert_(fdp._data is not self.fdpanel._data) + assert_fdpanel_equal(fdp, self.fdpanel) + + # strings handled prop + #fdp = FDPanel([[['foo', 'foo', 'foo',], + # ['foo', 'foo', 'foo']]]) + #self.assert_(wp.values.dtype == np.object_) + + vals = self.fdpanel.values + + # no copy + fdp = FDPanel(vals) + self.assert_(fdp.values is vals) + + # copy + fdp = FDPanel(vals, copy=True) + self.assert_(fdp.values is not vals) + + def test_constructor_cast(self): + zero_filled = self.fdpanel.fillna(0) + + casted = FDPanel(zero_filled._data, dtype=int) + casted2 = FDPanel(zero_filled.values, dtype=int) + + exp_values = zero_filled.values.astype(int) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) + + # can't cast + data = [[['foo', 'bar', 'baz']]] + self.assertRaises(ValueError, Panel, data, dtype=float) + + def test_constructor_empty_panel(self): + empty = Panel() + self.assert_(len(empty.items) == 0) + self.assert_(len(empty.major_axis) == 0) + self.assert_(len(empty.minor_axis) == 0) + + def test_constructor_observe_dtype(self): + # GH #411 + panel = Panel(items=range(3), major_axis=range(3), + minor_axis=range(3), dtype='O') + self.assert_(panel.values.dtype == np.object_) + + def test_consolidate(self): + self.assert_(self.fdpanel._data.is_consolidated()) + + self.fdpanel['foo'] = 1. + self.assert_(not self.fdpanel._data.is_consolidated()) + + fdpanel = self.fdpanel.consolidate() + self.assert_(fdpanel._data.is_consolidated()) + + def test_ctor_dict(self): + l1 = self.fdpanel['l1'] + l2 = self.fdpanel['l2'] + + d = {'A' : l1, 'B' : l2.ix[['ItemB'],:,:] } + #d2 = {'A' : itema._series, 'B' : itemb[5:]._series} + #d3 = {'A' : DataFrame(itema._series), + # 'B' : DataFrame(itemb[5:]._series)} + + fdp = FDPanel(d) + #wp2 = Panel.from_dict(d2) # nested Dict + #wp3 = Panel.from_dict(d3) + #self.assert_(wp.major_axis.equals(self.panel.major_axis)) + assert_panel_equal(fdp['A'], self.fdpanel['l1']) + assert_frame_equal(fdp.ix['B','ItemB',:,:], self.fdpanel.ix['l2',['ItemB'],:,:]['ItemB']) + + # intersect + #wp = Panel.from_dict(d, intersect=True) + #self.assert_(wp.major_axis.equals(itemb.index[5:])) + + # use constructor + #assert_panel_equal(Panel(d), Panel.from_dict(d)) + #assert_panel_equal(Panel(d2), Panel.from_dict(d2)) + #assert_panel_equal(Panel(d3), Panel.from_dict(d3)) + + # cast + #dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) + # for k, v in d.iteritems()) + #result = Panel(dcasted, dtype=int) + #expected = Panel(dict((k, v.astype(int)) + # for k, v in dcasted.iteritems())) + #assert_panel_equal(result, expected) + + def test_constructor_dict_mixed(self): + data = dict((k, v.values) for k, v in self.fdpanel.iterkv()) + result = FDPanel(data) + exp_major = Index(np.arange(len(self.fdpanel.major_axis))) + self.assert_(result.major_axis.equals(exp_major)) + + result = FDPanel(data, + labels = self.fdpanel.labels, + items = self.fdpanel.items, + major_axis = self.fdpanel.major_axis, + minor_axis = self.fdpanel.minor_axis) + assert_fdpanel_equal(result, self.fdpanel) + + data['l2'] = self.fdpanel['l2'] + result = FDPanel(data) + assert_fdpanel_equal(result, self.fdpanel) + + # corner, blow up + data['l2'] = data['l2']['ItemB'] + self.assertRaises(Exception, FDPanel, data) + + data['l2'] = self.fdpanel['l2'].values[:, :, :-1] + self.assertRaises(Exception, FDPanel, data) + + def test_constructor_resize(self): + data = self.fdpanel._data + labels= self.fdpanel.labels[:-1] + items = self.fdpanel.items[:-1] + major = self.fdpanel.major_axis[:-1] + minor = self.fdpanel.minor_axis[:-1] + + result = FDPanel(data, labels=labels, items=items, major_axis=major, minor_axis=minor) + expected = self.fdpanel.reindex(labels=labels, items=items, major=major, minor=minor) + assert_fdpanel_equal(result, expected) + + result = FDPanel(data, items=items, major_axis=major) + expected = self.fdpanel.reindex(items=items, major=major) + assert_fdpanel_equal(result, expected) + + result = FDPanel(data, items=items) + expected = self.fdpanel.reindex(items=items) + assert_fdpanel_equal(result, expected) + + result = FDPanel(data, minor_axis=minor) + expected = self.fdpanel.reindex(minor=minor) + assert_fdpanel_equal(result, expected) + + def test_from_dict_mixed_orient(self): + raise nose.SkipTest + # df = tm.makeDataFrame() + # df['foo'] = 'bar' + + # data = {'k1' : df, + # 'k2' : df} + + # panel = Panel.from_dict(data, orient='minor') + + # self.assert_(panel['foo'].values.dtype == np.object_) + # self.assert_(panel['A'].values.dtype == np.float64) + + def test_values(self): + self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), + range(5), range(5), range(4)) + + def test_conform(self): + p = self.fdpanel['l1'].filter(items=['ItemA', 'ItemB']) + conformed = self.fdpanel.conform(p) + + assert(conformed.items.equals(self.fdpanel.items)) + assert(conformed.major_axis.equals(self.fdpanel.major_axis)) + assert(conformed.minor_axis.equals(self.fdpanel.minor_axis)) + + def test_reindex(self): + ref = self.fdpanel['l2'] + + # labels + result = self.fdpanel.reindex(labels=['l1','l2']) + assert_panel_equal(result['l2'], ref) + + # items + result = self.fdpanel.reindex(items=['ItemA', 'ItemB']) + assert_frame_equal(result['l2']['ItemB'], ref['ItemB']) + + # major + new_major = list(self.fdpanel.major_axis[:10]) + result = self.fdpanel.reindex(major=new_major) + assert_frame_equal(result['l2']['ItemB'], ref['ItemB'].reindex(index=new_major)) + + # raise exception put both major and major_axis + self.assertRaises(Exception, self.fdpanel.reindex, + major_axis=new_major, major=new_major) + + # minor + new_minor = list(self.fdpanel.minor_axis[:2]) + result = self.fdpanel.reindex(minor=new_minor) + assert_frame_equal(result['l2']['ItemB'], ref['ItemB'].reindex(columns=new_minor)) + + result = self.fdpanel.reindex(labels=self.fdpanel.labels, + items =self.fdpanel.items, + major =self.fdpanel.major_axis, + minor =self.fdpanel.minor_axis) + + assert(result.labels is self.fdpanel.labels) + assert(result.items is self.fdpanel.items) + assert(result.major_axis is self.fdpanel.major_axis) + assert(result.minor_axis is self.fdpanel.minor_axis) + + self.assertRaises(Exception, self.fdpanel.reindex) + + # with filling + smaller_major = self.fdpanel.major_axis[::5] + smaller = self.fdpanel.reindex(major=smaller_major) + + larger = smaller.reindex(major=self.fdpanel.major_axis, + method='pad') + + assert_panel_equal(larger.ix[:,:,self.fdpanel.major_axis[1],:], + smaller.ix[:,:,smaller_major[0],:]) + + # don't necessarily copy + result = self.fdpanel.reindex(major=self.fdpanel.major_axis, copy=False) + self.assert_(result is self.fdpanel) + + def test_reindex_like(self): + # reindex_like + smaller = self.fdpanel.reindex(labels=self.fdpanel.labels[:-1], + items =self.fdpanel.items[:-1], + major =self.fdpanel.major_axis[:-1], + minor =self.fdpanel.minor_axis[:-1]) + smaller_like = self.fdpanel.reindex_like(smaller) + assert_fdpanel_equal(smaller, smaller_like) + + def test_take(self): + raise nose.SkipTest + # # axis == 0 + # result = self.panel.take([2, 0, 1], axis=0) + # expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) + # assert_panel_equal(result, expected)# + + # # axis >= 1 + # result = self.panel.take([3, 0, 1, 2], axis=2) + # expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) + # assert_panel_equal(result, expected) + + # self.assertRaises(Exception, self.panel.take, [3, -1, 1, 2], axis=2) + # self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) + + def test_sort_index(self): + import random + + rlabels= list(self.fdpanel.labels) + ritems = list(self.fdpanel.items) + rmajor = list(self.fdpanel.major_axis) + rminor = list(self.fdpanel.minor_axis) + random.shuffle(rlabels) + random.shuffle(ritems) + random.shuffle(rmajor) + random.shuffle(rminor) + + random_order = self.fdpanel.reindex(labels=rlabels) + sorted_fdpanel = random_order.sort_index(axis=0) + assert_fdpanel_equal(sorted_fdpanel, self.fdpanel) + + # descending + #random_order = self.panel.reindex(items=ritems) + #sorted_panel = random_order.sort_index(axis=0, ascending=False) + #assert_panel_equal(sorted_panel, + # self.panel.reindex(items=self.panel.items[::-1])) + + #random_order = self.panel.reindex(major=rmajor) + #sorted_panel = random_order.sort_index(axis=1) + #assert_panel_equal(sorted_panel, self.panel) + + #random_order = self.panel.reindex(minor=rminor) + #sorted_panel = random_order.sort_index(axis=2) + #assert_panel_equal(sorted_panel, self.panel) + + def test_fillna(self): + filled = self.fdpanel.fillna(0) + self.assert_(np.isfinite(filled.values).all()) + + filled = self.fdpanel.fillna(method='backfill') + assert_panel_equal(filled['l1'], + self.fdpanel['l1'].fillna(method='backfill')) + + fdpanel = self.fdpanel.copy() + fdpanel['str'] = 'foo' + + filled = fdpanel.fillna(method='backfill') + assert_panel_equal(filled['l1'], + fdpanel['l1'].fillna(method='backfill')) + + empty = self.fdpanel.reindex(labels=[]) + filled = empty.fillna(0) + assert_fdpanel_equal(filled, empty) + + def test_swapaxes(self): + result = self.fdpanel.swapaxes('labels','items') + self.assert_(result.items is self.fdpanel.labels) + + result = self.fdpanel.swapaxes('labels','minor') + self.assert_(result.labels is self.fdpanel.minor_axis) + + result = self.fdpanel.swapaxes('items', 'minor') + self.assert_(result.items is self.fdpanel.minor_axis) + + result = self.fdpanel.swapaxes('items', 'major') + self.assert_(result.items is self.fdpanel.major_axis) + + result = self.fdpanel.swapaxes('major', 'minor') + self.assert_(result.major_axis is self.fdpanel.minor_axis) + + # this should also work + result = self.fdpanel.swapaxes(0, 1) + self.assert_(result.labels is self.fdpanel.items) + + # this should also work + self.assertRaises(Exception, self.fdpanel.swapaxes, 'items', 'items') + + def test_to_frame(self): + raise nose.SkipTest + # # filtered + # filtered = self.panel.to_frame() + # expected = self.panel.to_frame().dropna(how='any') + # assert_frame_equal(filtered, expected) + + # # unfiltered + # unfiltered = self.panel.to_frame(filter_observations=False) + # assert_panel_equal(unfiltered.to_panel(), self.panel) + + # # names + # self.assertEqual(unfiltered.index.names, ['major', 'minor']) + + def test_to_frame_mixed(self): + raise nose.SkipTest + # panel = self.panel.fillna(0) + # panel['str'] = 'foo' + # panel['bool'] = panel['ItemA'] > 0 + + # lp = panel.to_frame() + # wp = lp.to_panel() + # self.assertEqual(wp['bool'].values.dtype, np.bool_) + # assert_frame_equal(wp['bool'], panel['bool']) + + def test_filter(self): + pass + + def test_apply(self): + pass + + def test_compound(self): + raise nose.SkipTest + # compounded = self.panel.compound() + + # assert_series_equal(compounded['ItemA'], + # (1 + self.panel['ItemA']).product(0) - 1) + + def test_shift(self): + raise nose.SkipTest + # # major + # idx = self.panel.major_axis[0] + # idx_lag = self.panel.major_axis[1] + + # shifted = self.panel.shift(1) + + # assert_frame_equal(self.panel.major_xs(idx), + # shifted.major_xs(idx_lag)) + + # # minor + # idx = self.panel.minor_axis[0] + # idx_lag = self.panel.minor_axis[1] + + # shifted = self.panel.shift(1, axis='minor') + + # assert_frame_equal(self.panel.minor_xs(idx), + # shifted.minor_xs(idx_lag)) + + # self.assertRaises(Exception, self.panel.shift, 1, axis='items') + + def test_multiindex_get(self): + raise nose.SkipTest + # ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b',2)], + # names=['first', 'second']) + # wp = Panel(np.random.random((4,5,5)), + # items=ind, + # major_axis=np.arange(5), + # minor_axis=np.arange(5)) + # f1 = wp['a'] + # f2 = wp.ix['a'] + # assert_panel_equal(f1, f2) + + # self.assert_((f1.items == [1, 2]).all()) + # self.assert_((f2.items == [1, 2]).all()) + + # ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + # names=['first', 'second']) + + def test_multiindex_blocks(self): + raise nose.SkipTest + # ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + # names=['first', 'second']) + # wp = Panel(self.panel._data) + # wp.items = ind + # f1 = wp['a'] + # self.assert_((f1.items == [1, 2]).all()) + + # f1 = wp[('b',1)] + # self.assert_((f1.columns == ['A', 'B', 'C', 'D']).all()) + + def test_repr_empty(self): + empty = FDPanel() + repr(empty) + + def test_rename(self): + mapper = { + 'l1' : 'foo', + 'l2' : 'bar', + 'l3' : 'baz' + } + + renamed = self.fdpanel.rename_axis(mapper, axis=0) + exp = Index(['foo', 'bar', 'baz']) + self.assert_(renamed.labels.equals(exp)) + + renamed = self.fdpanel.rename_axis(str.lower, axis=3) + exp = Index(['a', 'b', 'c', 'd']) + self.assert_(renamed.minor_axis.equals(exp)) + + # don't copy + renamed_nocopy = self.fdpanel.rename_axis(mapper, axis=0, copy=False) + renamed_nocopy['foo'] = 3. + self.assert_((self.fdpanel['l1'].values == 3).all()) + + def test_get_attr(self): + assert_panel_equal(self.fdpanel['l1'], self.fdpanel.l1) + + def test_group_agg(self): + values = np.ones((10, 2)) * np.arange(10).reshape((10, 1)) + bounds = np.arange(5) * 2 + f = lambda x: x.mean(axis=0) + + agged = group_agg(values, bounds, f) + + assert(agged[1][0] == 2.5) + assert(agged[2][0] == 4.5) + + # test a function that doesn't aggregate + f2 = lambda x: np.zeros((2,2)) + self.assertRaises(Exception, group_agg, values, bounds, f2) + + def test_from_frame_level1_unsorted(self): + raise nose.SkipTest + # tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), + # ('AAPL', 1), ('MSFT', 1)] + # midx = MultiIndex.from_tuples(tuples) + # df = DataFrame(np.random.rand(5,4), index=midx) + # p = df.to_panel() + # assert_frame_equal(p.minor_xs(2), df.ix[:,2].sort_index()) + + def test_to_excel(self): + raise nose.SkipTest + # try: + # import xlwt + # import xlrd + # import openpyxl + # except ImportError: + # raise nose.SkipTest + + # for ext in ['xls', 'xlsx']: + # path = '__tmp__.' + ext + # self.panel.to_excel(path) + # reader = ExcelFile(path) + # for item, df in self.panel.iteritems(): + # recdf = reader.parse(str(item),index_col=0) + # assert_frame_equal(df, recdf) + # os.remove(path) + + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/util/testing.py b/pandas/util/testing.py old mode 100644 new mode 100755 index 01117f3e9b4c0..8fc0aadd030f5 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -17,6 +17,7 @@ import pandas.core.series as series import pandas.core.frame as frame import pandas.core.panel as panel +import pandas.core.fdpanel as fdpanel from pandas import bdate_range from pandas.tseries.index import DatetimeIndex @@ -28,6 +29,7 @@ Series = series.Series DataFrame = frame.DataFrame Panel = panel.Panel +FDPanel = fdpanel.FDPanel N = 30 K = 4 @@ -173,6 +175,19 @@ def assert_panel_equal(left, right, check_panel_type=False): for col in right: assert(col in left) +def assert_fdpanel_equal(left, right): + assert(left.labels.equals(right.labels)) + assert(left.items.equals(right.items)) + assert(left.major_axis.equals(right.major_axis)) + assert(left.minor_axis.equals(right.minor_axis)) + + for col, series in left.iterkv(): + assert(col in right) + assert_panel_equal(series, right[col]) + + for col in right: + assert(col in left) + def assert_contains_all(iterable, dic): for k in iterable: assert(k in dic) @@ -266,6 +281,9 @@ def makePanel(): data = dict((c, makeTimeDataFrame()) for c in cols) return Panel.fromDict(data) +def makeFDPanel(): + return FDPanel(dict(l1 = makePanel(), l2 = makePanel(), l3 = makePanel())) + def add_nans(panel): I, J, N = panel.shape for i, item in enumerate(panel.items):