diff --git a/README.rst b/README.rst index 04ae98030..f15967fa7 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Library Highlights * User interface with an IPython console for rapid exploration of data -* Compatible with the pandas library: LArray objects can be converted into pandas DataFrame and vice versa. +* Compatible with the pandas library: Array objects can be converted into pandas DataFrame and vice versa. .. _start-install: diff --git a/doc/source/api.rst b/doc/source/api.rst index a23f3794b..41a9f9001 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -217,10 +217,10 @@ Testing AxisCollection.isaxis AxisCollection.check_compatible -.. _api-larray: +.. _api-array: -LArray -====== +Array +===== * :ref:`la_overview` * :ref:`la_creation_func` @@ -247,7 +247,7 @@ Overview .. autosummary:: :toctree: _generated/ - LArray + Array .. _la_creation_func: @@ -276,32 +276,32 @@ Copying .. autosummary:: :toctree: _generated/ - LArray.copy - LArray.astype + Array.copy + Array.astype .. _la_inspecting: Inspecting ---------- -=================== ============================================================== -LArray.data Data of the array (Numpy ndarray) -------------------- -------------------------------------------------------------- -LArray.axes Axes of the array (AxisCollection) -------------------- -------------------------------------------------------------- -LArray.title Title of the array (str) -=================== ============================================================== +================== ============================================================== +Array.data Data of the array (Numpy ndarray) +------------------ -------------------------------------------------------------- +Array.axes Axes of the array (AxisCollection) +------------------ -------------------------------------------------------------- +Array.title Title of the array (str) +================== ============================================================== .. autosummary:: :toctree: _generated/ - LArray.info - LArray.shape - LArray.ndim - LArray.dtype - LArray.size - LArray.nbytes - LArray.memory_used + Array.info + Array.shape + Array.ndim + Array.dtype + Array.size + Array.nbytes + Array.memory_used .. _la_selecting: @@ -311,16 +311,16 @@ Modifying/Selecting .. autosummary:: :toctree: _generated/ - LArray.i - LArray.points - LArray.ipoints - LArray.iflat - LArray.set - LArray.drop - LArray.ignore_labels - LArray.filter - LArray.apply - LArray.apply_map + Array.i + Array.points + Array.ipoints + Array.iflat + Array.set + Array.drop + Array.ignore_labels + Array.filter + Array.apply + Array.apply_map .. _la_axes_labels: @@ -330,12 +330,12 @@ Changing Axes or Labels .. autosummary:: :toctree: _generated/ - LArray.set_axes - LArray.rename - LArray.set_labels - LArray.combine_axes - LArray.split_axes - LArray.reverse + Array.set_axes + Array.rename + Array.set_labels + Array.combine_axes + Array.split_axes + Array.reverse .. _la_agg: @@ -345,30 +345,30 @@ Aggregation Functions .. autosummary:: :toctree: _generated/ - LArray.sum - LArray.sum_by - LArray.prod - LArray.prod_by - LArray.cumsum - LArray.cumprod - LArray.mean - LArray.mean_by - LArray.median - LArray.median_by - LArray.var - LArray.var_by - LArray.std - LArray.std_by - LArray.percentile - LArray.percentile_by - LArray.ptp - LArray.with_total - LArray.percent - LArray.ratio - LArray.rationot0 - LArray.growth_rate - LArray.describe - LArray.describe_by + Array.sum + Array.sum_by + Array.prod + Array.prod_by + Array.cumsum + Array.cumprod + Array.mean + Array.mean_by + Array.median + Array.median_by + Array.var + Array.var_by + Array.std + Array.std_by + Array.percentile + Array.percentile_by + Array.ptp + Array.with_total + Array.percent + Array.ratio + Array.rationot0 + Array.growth_rate + Array.describe + Array.describe_by .. _la_sorting: @@ -378,10 +378,10 @@ Sorting .. autosummary:: :toctree: _generated/ - LArray.sort_axes - LArray.sort_values - LArray.labelsofsorted - LArray.indicesofsorted + Array.sort_axes + Array.sort_values + Array.labelsofsorted + Array.indicesofsorted .. _la_reshaping: @@ -391,18 +391,18 @@ Reshaping/Extending/Reordering .. autosummary:: :toctree: _generated/ - LArray.reshape - LArray.reshape_like - LArray.compact - LArray.reindex - LArray.transpose - LArray.expand - LArray.prepend - LArray.append - LArray.extend - LArray.insert - LArray.broadcast_with - LArray.align + Array.reshape + Array.reshape_like + Array.compact + Array.reindex + Array.transpose + Array.expand + Array.prepend + Array.append + Array.extend + Array.insert + Array.broadcast_with + Array.align .. _la_testing: @@ -412,22 +412,22 @@ Testing/Searching .. autosummary:: :toctree: _generated/ - LArray.equals - LArray.eq - LArray.isin - LArray.nonzero - LArray.all - LArray.all_by - LArray.any - LArray.any_by - LArray.min - LArray.min_by - LArray.max - LArray.max_by - LArray.labelofmin - LArray.indexofmin - LArray.labelofmax - LArray.indexofmax + Array.equals + Array.eq + Array.isin + Array.nonzero + Array.all + Array.all_by + Array.any + Array.any_by + Array.min + Array.min_by + Array.max + Array.max_by + Array.labelofmin + Array.indexofmin + Array.labelofmax + Array.indexofmax .. _la_iter: @@ -437,18 +437,18 @@ Iterating .. autosummary:: :toctree: _generated/ - LArray.keys - LArray.values - LArray.items + Array.keys + Array.values + Array.items .. _la_op: Operators --------- -=================================================== ============================== -:py:meth:`@ ` Matrix multiplication -=================================================== ============================== +================================================== ============================== +:py:meth:`@ ` Matrix multiplication +================================================== ============================== .. _la_misc: @@ -458,13 +458,13 @@ Miscellaneous .. autosummary:: :toctree: _generated/ - LArray.divnot0 - LArray.clip - LArray.shift - LArray.roll - LArray.diff - LArray.unique - LArray.to_clipboard + Array.divnot0 + Array.clip + Array.shift + Array.roll + Array.diff + Array.unique + Array.to_clipboard .. _la_to_pandas: @@ -474,8 +474,8 @@ Converting to Pandas objects .. autosummary:: :toctree: _generated/ - LArray.to_series - LArray.to_frame + Array.to_series + Array.to_frame .. _la_plotting: @@ -485,7 +485,7 @@ Plotting .. autosummary:: :toctree: _generated/ - LArray.plot + Array.plot .. _api-ufuncs: @@ -652,11 +652,11 @@ Write .. autosummary:: :toctree: _generated/ - LArray.to_csv - LArray.to_excel - LArray.to_hdf - LArray.to_stata - LArray.dump + Array.to_csv + Array.to_excel + Array.to_hdf + Array.to_stata + Array.dump Excel ===== @@ -714,7 +714,7 @@ Miscellaneous .. autosummary:: :toctree: _generated/ - aslarray + asarray from_frame from_series get_example_filepath diff --git a/doc/source/changes/template.rst.inc b/doc/source/changes/template.rst.inc index bbb2f6750..a23f5c467 100644 --- a/doc/source/changes/template.rst.inc +++ b/doc/source/changes/template.rst.inc @@ -4,9 +4,9 @@ Syntax changes ^^^^^^^^^^^^^^ -* renamed ``LArray.old_method_name()`` to :py:obj:`LArray.new_method_name()` (closes :issue:`1`). +* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). -* renamed ``old_argument_name`` argument of :py:obj:`LArray.method_name()` to ``new_argument_name``. +* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. Backward incompatible changes diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index 0d67d91e2..2dc9f5589 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -4,9 +4,7 @@ Syntax changes ^^^^^^^^^^^^^^ -* renamed ``LArray.old_method_name()`` to :py:obj:`LArray.new_method_name()` (closes :issue:`1`). - -* renamed ``old_argument_name`` argument of :py:obj:`LArray.method_name()` to ``new_argument_name``. +* renamed the ``LArray`` class to :py:obj:`Array` (closes :issue:`611`). Backward incompatible changes diff --git a/doc/source/tutorial/getting_started.rst b/doc/source/tutorial/getting_started.rst index a680c13fc..b111b6da7 100644 --- a/doc/source/tutorial/getting_started.rst +++ b/doc/source/tutorial/getting_started.rst @@ -19,7 +19,7 @@ To use the LArray library, the first thing to do is to import it: Create an array --------------- -Working with the LArray library mainly consists of manipulating :ref:`LArray ` data structures. +Working with the LArray library mainly consists of manipulating :ref:`Array ` data structures. They represent N-dimensional labelled arrays and are composed of raw data (NumPy ndarray), :ref:`axes ` and optionally some metadata. @@ -49,8 +49,8 @@ To create an array from scratch, you need to supply data and axes: [[1023, 1038, 1053], [756, 775, 793]]] - # create an LArray object - pop = LArray(data, axes=[age, sex, year]) + # create an Array object + pop = Array(data, axes=[age, sex, year]) pop You can optionally attach some metadata to an array: @@ -113,7 +113,7 @@ Save/Load an array The LArray library offers many I/O functions to read and write arrays in various formats (CSV, Excel, HDF5). For example, to save an array in a CSV file, call the method -:py:meth:`~LArray.to_csv`: +:py:meth:`~Array.to_csv`: .. ipython:: python @@ -317,7 +317,7 @@ Grouping arrays in a Session ---------------------------- Arrays may be grouped in :ref:`Session ` objects. -A session is an ordered dict-like container of LArray objects with special I/O methods. +A session is an ordered dict-like container of Array objects with special I/O methods. To create a session, you need to pass a list of pairs (array_name, array): .. ipython:: python diff --git a/doc/source/tutorial/pandas.rst b/doc/source/tutorial/pandas.rst index 8869dc171..9d23f9635 100644 --- a/doc/source/tutorial/pandas.rst +++ b/doc/source/tutorial/pandas.rst @@ -1,16 +1,16 @@ Compatibility with pandas ========================= -To convert a LArray object into a pandas DataFrame, the method :py:meth:`~LArray.to_frame` can be used: +To convert an Array object into a pandas DataFrame, the method :py:meth:`~Array.to_frame` can be used: .. ipython:: python df = pop.to_frame() df -Inversely, to convert a DataFrame into a LArray object, use the function :py:func:`aslarray`: +Inversely, to convert a DataFrame into an Array object, use the function :py:func:`asarray`: .. ipython:: python - pop = aslarray(df) + pop = asarray(df) pop diff --git a/doc/source/tutorial/tutorial_IO.ipyml b/doc/source/tutorial/tutorial_IO.ipyml index 23296c9ac..27012c187 100644 --- a/doc/source/tutorial/tutorial_IO.ipyml +++ b/doc/source/tutorial/tutorial_IO.ipyml @@ -5,7 +5,7 @@ cells: - markdown: | - LArray provides methods and functions to load and dump LArray, Session, Axis Group objects to several formats such as Excel, CSV and HDF5. The HDF5 file format is designed to store and organize large amounts of data. It allows to read and write data much faster than when working with CSV and Excel files. + The LArray library provides methods and functions to load and dump Array, Session, Axis Group objects to several formats such as Excel, CSV and HDF5. The HDF5 file format is designed to store and organize large amounts of data. It allows to read and write data much faster than when working with CSV and Excel files. - code: | diff --git a/doc/source/tutorial/tutorial_IO.ipynb b/doc/source/tutorial/tutorial_IO.ipynb index 6d9225e23..6bf0e7b41 100644 --- a/doc/source/tutorial/tutorial_IO.ipynb +++ b/doc/source/tutorial/tutorial_IO.ipynb @@ -1,1066 +1,1066 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load And Dump Arrays, Sessions, Axes And Groups\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "LArray provides methods and functions to load and dump LArray, Session, Axis Group objects to several formats such as Excel, CSV and HDF5. The HDF5 file format is designed to store and organize large amounts of data. It allows to read and write data much faster than when working with CSV and Excel files. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "nbsphinx": "hidden" - }, - "outputs": [], - "source": [ - "# run this cell to avoid annoying warnings\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\", message=r'.*numpy.dtype size changed*')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# first of all, import the LArray library\n", - "from larray import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Check the version of LArray:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from larray import __version__\n", - "__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading and Dumping Arrays\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Loading Arrays - Basic Usage (CSV, Excel, HDF5)\n", - "\n", - "To read an array from a CSV file, you must use the ``read_csv`` function:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "csv_dir = get_example_filepath('examples')\n", - "\n", - "# read the array pop from the file 'pop.csv'.\n", - "# The data of the array below is derived from a subset of the demo_pjan table from Eurostat\n", - "pop = read_csv(csv_dir + '/pop.csv')\n", - "pop" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To read an array from a sheet of an Excel file, you can use the ``read_excel`` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "filepath_excel = get_example_filepath('examples.xlsx')\n", - "\n", - "# read the array from the sheet 'births' of the Excel file 'examples.xlsx'\n", - "# The data of the array below is derived from a subset of the demo_fasec table from Eurostat\n", - "births = read_excel(filepath_excel, 'births')\n", - "births" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``open_excel`` function in combination with the ``load`` method allows you to load several arrays from the same Workbook without opening and closing it several times:\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "# open the Excel file 'population.xlsx' and let it opened as long as you keep the indent.\n", - "# The Python keyword ``with`` ensures that the Excel file is properly closed even if an error occurs\n", - "with open_excel(filepath_excel) as wb:\n", - " # load the array 'pop' from the sheet 'pop' \n", - " pop = wb['pop'].load()\n", - " # load the array 'births' from the sheet 'births'\n", - " births = wb['births'].load()\n", - " # load the array 'deaths' from the sheet 'deaths'\n", - " deaths = wb['deaths'].load()\n", - "\n", - "# the Workbook is automatically closed when getting out the block defined by the with statement\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - " **Warning:** `open_excel` requires to work on Windows and to have the library ``xlwings`` installed.\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `HDF5` file format is specifically designed to store and organize large amounts of data. \n", - "Reading and writing data in this file format is much faster than with CSV or Excel. \n", - "An HDF5 file can contain multiple arrays, each array being associated with a key.\n", - "To read an array from an HDF5 file, you must use the ``read_hdf`` function and provide the key associated with the array:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "filepath_hdf = get_example_filepath('examples.h5')\n", - "\n", - "# read the array from the file 'examples.h5' associated with the key 'deaths'\n", - "# The data of the array below is derived from a subset of the demo_magec table from Eurostat\n", - "deaths = read_hdf(filepath_hdf, 'deaths')\n", - "deaths" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dumping Arrays - Basic Usage (CSV, Excel, HDF5)\n", - "\n", - "To write an array in a CSV file, you must use the ``to_csv`` method:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save the array pop in the file 'pop.csv'\n", - "pop.to_csv('pop.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To write an array to a sheet of an Excel file, you can use the ``to_excel`` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save the array pop in the sheet 'pop' of the Excel file 'population.xlsx' \n", - "pop.to_excel('population.xlsx', 'pop')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that ``to_excel`` create a new Excel file if it does not exist yet. \n", - "If the file already exists, a new sheet is added after the existing ones if that sheet does not already exists:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# add a new sheet 'births' to the file 'population.xlsx' and save the array births in it\n", - "births.to_excel('population.xlsx', 'births')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To reset an Excel file, you simply need to set the `overwrite_file` argument as True:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1. reset the file 'population.xlsx' (all sheets are removed)\n", - "# 2. create a sheet 'pop' and save the array pop in it\n", - "pop.to_excel('population.xlsx', 'pop', overwrite_file=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``open_excel`` function in combination with the ``dump()`` method allows you to open a Workbook and to export several arrays at once. If the Excel file doesn't exist, the ``overwrite_file`` argument must be set to True.\n", - "\n", - "
\n", - " **Warning:** The ``save`` method must be called at the end of the block defined by the *with* statement to actually write data in the Excel file, otherwise you will end up with an empty file.\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "# to create a new Excel file, argument overwrite_file must be set to True\n", - "with open_excel('population.xlsx', overwrite_file=True) as wb:\n", - " # add a new sheet 'pop' and dump the array pop in it \n", - " wb['pop'] = pop.dump()\n", - " # add a new sheet 'births' and dump the array births in it \n", - " wb['births'] = births.dump()\n", - " # add a new sheet 'deaths' and dump the array deaths in it \n", - " wb['deaths'] = deaths.dump()\n", - " # actually write data in the Workbook\n", - " wb.save()\n", - " \n", - "# the Workbook is automatically closed when getting out the block defined by the with statement\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To write an array in an HDF5 file, you must use the ``to_hdf`` function and provide the key that will be associated with the array:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save the array pop in the file 'population.h5' and associate it with the key 'pop'\n", - "pop.to_hdf('population.h5', 'pop')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Specifying Wide VS Narrow format (CSV, Excel)\n", - "\n", - "By default, all reading functions assume that arrays are stored in the ``wide`` format, meaning that their last axis is represented horizontally:\n", - "\n", - "| country \\\\ time | 2013 | 2014 | 2015 |\n", - "| --------------- | -------- | -------- | -------- |\n", - "| Belgium | 11137974 | 11180840 | 11237274 |\n", - "| France | 65600350 | 65942267 | 66456279 |\n", - "\n", - "By setting the ``wide`` argument to False, reading functions will assume instead that arrays are stored in the ``narrow`` format, i.e. one column per axis plus one value column:\n", - "\n", - "| country | time | value |\n", - "| ------- | ---- | -------- |\n", - "| Belgium | 2013 | 11137974 |\n", - "| Belgium | 2014 | 11180840 |\n", - "| Belgium | 2015 | 11237274 |\n", - "| France | 2013 | 65600350 |\n", - "| France | 2014 | 65942267 |\n", - "| France | 2015 | 66456279 |\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# set 'wide' argument to False to indicate that the array is stored in the 'narrow' format\n", - "pop_BE_FR = read_csv(csv_dir + '/pop_narrow_format.csv', wide=False)\n", - "pop_BE_FR" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# same for the read_excel function\n", - "pop_BE_FR = read_excel(filepath_excel, sheet='pop_narrow_format', wide=False)\n", - "pop_BE_FR" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By default, writing functions will set the name of the column containing the data to 'value'. You can choose the name of this column by using the ``value_name`` argument. For example, using ``value_name='population'`` you can export the previous array as:\n", - "\n", - "| country | time | population |\n", - "| ------- | ---- | ---------- |\n", - "| Belgium | 2013 | 11137974 |\n", - "| Belgium | 2014 | 11180840 |\n", - "| Belgium | 2015 | 11237274 |\n", - "| France | 2013 | 65600350 |\n", - "| France | 2014 | 65942267 |\n", - "| France | 2015 | 66456279 |\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# dump the array pop_BE_FR in a narrow format (one column per axis plus one value column).\n", - "# By default, the name of the column containing data is set to 'value'\n", - "pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False)\n", - "\n", - "# same but replace 'value' by 'population'\n", - "pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False, value_name='population')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# same for the to_excel method\n", - "pop_BE_FR.to_excel('population.xlsx', 'pop_narrow_format', wide=False, value_name='population')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like with the ``to_excel`` method, it is possible to export arrays in a ``narrow`` format using ``open_excel``. \n", - "To do so, you must set the ``wide`` argument of the ``dump`` method to False:\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "with open_excel('population.xlsx') as wb:\n", - " # dump the array pop_BE_FR in a narrow format: \n", - " # one column per axis plus one value column.\n", - " # Argument value_name can be used to change the name of the \n", - " # column containing the data (default name is 'value')\n", - " wb['pop_narrow_format'] = pop_BE_FR.dump(wide=False, value_name='population')\n", - " # don't forget to call save()\n", - " wb.save()\n", - "\n", - "# in the sheet 'pop_narrow_format', data is written as:\n", - "# | country | time | value |\n", - "# | ------- | ---- | -------- |\n", - "# | Belgium | 2013 | 11137974 |\n", - "# | Belgium | 2014 | 11180840 |\n", - "# | Belgium | 2015 | 11237274 |\n", - "# | France | 2013 | 65600350 |\n", - "# | France | 2014 | 65942267 |\n", - "# | France | 2015 | 66456279 |\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Specifying Position in Sheet (Excel)\n", - "\n", - "If you want to read an array from an Excel sheet which does not start at cell `A1` (when there is more than one array stored in the same sheet for example), you will need to use the ``range`` argument. \n", - "\n", - "
\n", - " **Warning:** Note that the ``range`` argument is only available if you have the library ``xlwings`` installed (Windows).\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "# the 'range' argument must be used to load data not starting at cell A1.\n", - "# This is useful when there is several arrays stored in the same sheet\n", - "births = read_excel(filepath_excel, sheet='pop_births_deaths', range='A9:E15')\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using ``open_excel``, ranges are passed in brackets:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "with open_excel(filepath_excel) as wb:\n", - " # store sheet 'pop_births_deaths' in a temporary variable sh\n", - " sh = wb['pop_births_deaths']\n", - " # load the array pop from range A1:E7\n", - " pop = sh['A1:E7'].load()\n", - " # load the array births from range A9:E15\n", - " births = sh['A9:E15'].load()\n", - " # load the array deaths from range A17:E23\n", - " deaths = sh['A17:E23'].load()\n", - "\n", - "# the Workbook is automatically closed when getting out the block defined by the with statement\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When exporting arrays to Excel files, data is written starting at cell `A1` by default. Using the ``position`` argument of the ``to_excel`` method, it is possible to specify the top left cell of the dumped data. This can be useful when you want to export several arrays in the same sheet for example\n", - "\n", - "
\n", - " **Warning:** Note that the ``position`` argument is only available if you have the library ``xlwings`` installed (Windows).\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "filename = 'population.xlsx'\n", - "sheetname = 'pop_births_deaths'\n", - "\n", - "# save the arrays pop, births and deaths in the same sheet 'pop_births_and_deaths'.\n", - "# The 'position' argument is used to shift the location of the second and third arrays to be dumped\n", - "pop.to_excel(filename, sheetname)\n", - "births.to_excel(filename, sheetname, position='A9')\n", - "deaths.to_excel(filename, sheetname, position='A17')\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using ``open_excel``, the position is passed in brackets (this allows you to also add extra informations): \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "with open_excel('population.xlsx') as wb:\n", - " # add a new sheet 'pop_births_deaths' and write 'population' in the first cell\n", - " # note: you can use wb['new_sheet_name'] = '' to create an empty sheet\n", - " wb['pop_births_deaths'] = 'population'\n", - " # store sheet 'pop_births_deaths' in a temporary variable sh\n", - " sh = wb['pop_births_deaths']\n", - " # dump the array pop in sheet 'pop_births_deaths' starting at cell A2\n", - " sh['A2'] = pop.dump()\n", - " # add 'births' in cell A10\n", - " sh['A10'] = 'births'\n", - " # dump the array births in sheet 'pop_births_deaths' starting at cell A11 \n", - " sh['A11'] = births.dump()\n", - " # add 'deaths' in cell A19\n", - " sh['A19'] = 'deaths'\n", - " # dump the array deaths in sheet 'pop_births_deaths' starting at cell A20\n", - " sh['A20'] = deaths.dump()\n", - " # don't forget to call save()\n", - " wb.save()\n", - " \n", - "# the Workbook is automatically closed when getting out the block defined by the with statement\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exporting data without headers (Excel)\n", - "\n", - "For some reasons, you may want to export only the data of an array without axes. For example, you may want to insert a new column containing extra information. As an exercise, let us consider we want to add the capital city for each country present in the array containing the total population by country:\n", - "\n", - "| country | capital city | 2013 | 2014 | 2015 |\n", - "| ------- | ------------ | -------- | -------- | -------- |\n", - "| Belgium | Brussels | 11137974 | 11180840 | 11237274 |\n", - "| France | Paris | 65600350 | 65942267 | 66456279 |\n", - "| Germany | Berlin | 80523746 | 80767463 | 81197537 |\n", - "\n", - "Assuming you have prepared an excel sheet as below: \n", - "\n", - "| country | capital city | 2013 | 2014 | 2015 |\n", - "| ------- | ------------ | -------- | -------- | -------- |\n", - "| Belgium | Brussels | | | |\n", - "| France | Paris | | | |\n", - "| Germany | Berlin | | | ||\n", - "\n", - "you can then dump the data at right place by setting the ``header`` argument of ``to_excel`` to False and specifying the position of the data in sheet:\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "pop_by_country = pop.sum('gender')\n", - "\n", - "# export only the data of the array pop_by_country starting at cell C2\n", - "pop_by_country.to_excel('population.xlsx', 'pop_by_country', header=False, position='C2')\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using ``open_excel``, you can easily prepare the sheet and then export only data at the right place by either setting the ``header`` argument of the ``dump`` method to False or avoiding to call ``dump``:\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "with open_excel('population.xlsx') as wb:\n", - " # create new empty sheet 'pop_by_country'\n", - " wb['pop_by_country'] = ''\n", - " # store sheet 'pop_by_country' in a temporary variable sh\n", - " sh = wb['pop_by_country']\n", - " # write extra information (description)\n", - " sh['A1'] = 'Population at 1st January by country'\n", - " # export column names\n", - " sh['A2'] = ['country', 'capital city']\n", - " sh['C2'] = pop_by_country.time.labels\n", - " # export countries as first column\n", - " sh['A3'].options(transpose=True).value = pop_by_country.country.labels\n", - " # export capital cities as second column\n", - " sh['B3'].options(transpose=True).value = ['Brussels', 'Paris', 'Berlin']\n", - " # export only data of pop_by_country\n", - " sh['C3'] = pop_by_country.dump(header=False)\n", - " # or equivalently\n", - " sh['C3'] = pop_by_country\n", - " # don't forget to call save()\n", - " wb.save()\n", - " \n", - "# the Workbook is automatically closed when getting out the block defined by the with statement\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Specifying the Number of Axes at Reading (CSV, Excel)\n", - "\n", - "By default, ``read_csv`` and ``read_excel`` will search the position of the first cell containing the special character ``\\`` in the header line in order to determine the number of axes of the array to read. The special character ``\\`` is used to separate the name of the two last axes. If there is no special character ``\\``, ``read_csv`` and ``read_excel`` will consider that the array to read has only one dimension. For an array stored as:\n", - "\n", - "| country | gender \\\\ time | 2013 | 2014 | 2015 |\n", - "| ------- | -------------- | -------- | -------- | -------- |\n", - "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", - "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", - "| France | Male | 31772665 | 31936596 | 32175328 |\n", - "| France | Female | 33827685 | 34005671 | 34280951 |\n", - "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", - "| Germany | Female | 41142770 | 41210540 | 41362080 |\n", - "\n", - "``read_csv`` and ``read_excel`` will find the special character ``\\`` in the second cell meaning it expects three axes (country, gender and time). \n", - "\n", - "Sometimes, you need to read an array for which the name of the last axis is implicit: \n", - "\n", - "| country | gender | 2013 | 2014 | 2015 |\n", - "| ------- | ------ | -------- | -------- | -------- |\n", - "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", - "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", - "| France | Male | 31772665 | 31936596 | 32175328 |\n", - "| France | Female | 33827685 | 34005671 | 34280951 |\n", - "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", - "| Germany | Female | 41142770 | 41210540 | 41362080 |\n", - "\n", - "For such case, you will have to inform ``read_csv`` and ``read_excel`` of the number of axes of the output array by setting the ``nb_axes`` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the 3 x 2 x 3 array stored in the file 'pop_missing_axis_name.csv' wihout using 'nb_axes' argument.\n", - "pop = read_csv(csv_dir + '/pop_missing_axis_name.csv')\n", - "# shape and data type of the output array are not what we expected\n", - "pop.info" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# by setting the 'nb_axes' argument, you can indicate to read_csv the number of axes of the output array\n", - "pop = read_csv(csv_dir + '/pop_missing_axis_name.csv', nb_axes=3)\n", - "\n", - "# give a name to the last axis\n", - "pop = pop.rename(-1, 'time')\n", - "\n", - "# shape and data type of the output array are what we expected\n", - "pop.info" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# same for the read_excel function\n", - "pop = read_excel(filepath_excel, sheet='pop_missing_axis_name', nb_axes=3)\n", - "pop = pop.rename(-1, 'time')\n", - "pop.info" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### NaNs and Missing Data Handling at Reading (CSV, Excel)\n", - "\n", - "Sometimes, there is no data available for some label combinations. In the example below, the rows corresponding to `France - Male` and `Germany - Female` are missing:\n", - "\n", - "| country | gender \\\\ time | 2013 | 2014 | 2015 |\n", - "| ------- | -------------- | -------- | -------- | -------- |\n", - "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", - "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", - "| France | Female | 33827685 | 34005671 | 34280951 |\n", - "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", - "\n", - "By default, ``read_csv`` and ``read_excel`` will fill cells associated with missing label combinations with nans. \n", - "Be aware that, in that case, an int array will be converted to a float array." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# by default, cells associated will missing label combinations are filled with nans.\n", - "# In that case, the output array is converted to a float array\n", - "read_csv(csv_dir + '/pop_missing_values.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, it is possible to choose which value to use to fill missing cells using the ``fill_value`` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "read_csv(csv_dir + '/pop_missing_values.csv', fill_value=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# same for the read_excel function\n", - "read_excel(filepath_excel, sheet='pop_missing_values', fill_value=0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sorting Axes at Reading (CSV, Excel, HDF5)\n", - "\n", - "The ``sort_rows`` and ``sort_columns`` arguments of the reading functions allows you to sort rows and columns alphabetically:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# sort labels at reading --> Male and Female labels are inverted\n", - "read_csv(csv_dir + '/pop.csv', sort_rows=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "read_excel(filepath_excel, sheet='births', sort_rows=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "read_hdf(filepath_hdf, key='deaths').sort_axes()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Metadata (HDF5)\n", - "\n", - "Since the version 0.29 of LArray, it is possible to add metadata to arrays:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pop.meta.title = 'Population at 1st January'\n", - "pop.meta.origin = 'Table demo_jpan from Eurostat'\n", - "\n", - "pop.info" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These metadata are automatically saved and loaded when working with the HDF5 file format: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pop.to_hdf('population.h5', 'pop')\n", - "\n", - "new_pop = read_hdf('population.h5', 'pop')\n", - "new_pop.info" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - " **Warning:** Currently, metadata associated with arrays cannot be saved and loaded when working with CSV and Excel files.\n", - " This restriction does not apply however to metadata associated with sessions.\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading and Dumping Sessions\n", - "\n", - "One of the main advantages of grouping arrays, axes and groups in session objects is that you can load and save all of them in one shot. Like arrays, it is possible to associate metadata to a session. These can be saved and loaded in all file formats. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Loading Sessions (CSV, Excel, HDF5)\n", - "\n", - "To load the items of a session, you have two options:\n", - "\n", - "1) Instantiate a new session and pass the path to the Excel/HDF5 file or to the directory containing CSV files to the Session constructor:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a new Session object and load all arrays, axes, groups and metadata \n", - "# from all CSV files located in the passed directory\n", - "csv_dir = get_example_filepath('demography_eurostat')\n", - "session = Session(csv_dir)\n", - "\n", - "# create a new Session object and load all arrays, axes, groups and metadata\n", - "# stored in the passed Excel file\n", - "filepath_excel = get_example_filepath('demography_eurostat.xlsx')\n", - "session = Session(filepath_excel)\n", - "\n", - "# create a new Session object and load all arrays, axes, groups and metadata\n", - "# stored in the passed HDF5 file\n", - "filepath_hdf = get_example_filepath('demography_eurostat.h5')\n", - "session = Session(filepath_hdf)\n", - "\n", - "print(session.summary())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2) Call the ``load`` method on an existing session and pass the path to the Excel/HDF5 file or to the directory containing CSV files as first argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a session containing 3 axes, 2 groups and one array 'pop'\n", - "filepath = get_example_filepath('pop_only.xlsx')\n", - "session = Session(filepath)\n", - "\n", - "print(session.summary())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call the load method on the previous session and add the 'births' and 'deaths' arrays to it\n", - "filepath = get_example_filepath('births_and_deaths.xlsx')\n", - "session.load(filepath)\n", - "\n", - "print(session.summary())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``load`` method offers some options:\n", - "\n", - "1) Using the ``names`` argument, you can specify which items to load:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "session = Session()\n", - "\n", - "# use the names argument to only load births and deaths arrays\n", - "session.load(filepath_hdf, names=['births', 'deaths'])\n", - "\n", - "print(session.summary())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2) Setting the ``display`` argument to True, the ``load`` method will print a message each time a new item is loaded: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "session = Session()\n", - "\n", - "# with display=True, the load method will print a message\n", - "# each time a new item is loaded\n", - "session.load(filepath_hdf, display=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dumping Sessions (CSV, Excel, HDF5)\n", - "\n", - "To save a session, you need to call the ``save`` method. The first argument is the path to a Excel/HDF5 file or to a directory if items are saved to CSV files:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save items of a session in CSV files.\n", - "# Here, the save method will create a 'population' directory in which CSV files will be written \n", - "session.save('population')\n", - "\n", - "# save session to an HDF5 file\n", - "session.save('population.h5')\n", - "\n", - "# save session to an Excel file\n", - "session.save('population.xlsx')\n", - "\n", - "# load session saved in 'population.h5' to see its content\n", - "Session('population.h5')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - " Note: Concerning the CSV and Excel formats: \n", - " \n", - " - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)` \n", - " - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)` \n", - " - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)` \n", - " \n", - " These sheet (CSV file) names cannot be changed. \n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``save`` method has several arguments:\n", - "\n", - "1) Using the ``names`` argument, you can specify which items to save:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# use the names argument to only save births and deaths arrays\n", - "session.save('population.h5', names=['births', 'deaths'])\n", - "\n", - "# load session saved in 'population.h5' to see its content\n", - "Session('population.h5')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2) By default, dumping a session to an Excel or HDF5 file will overwrite it. By setting the ``overwrite`` argument to False, you can choose to update the existing Excel or HDF5 file: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pop = read_csv('./population/pop.csv')\n", - "ses_pop = Session([('pop', pop)])\n", - "\n", - "# by setting overwrite to False, the destination file is updated instead of overwritten.\n", - "# The items already stored in the file but not present in the session are left intact. \n", - "# On the contrary, the items that exist in both the file and the session are completely overwritten.\n", - "ses_pop.save('population.h5', overwrite=False)\n", - "\n", - "# load session saved in 'population.h5' to see its content\n", - "Session('population.h5')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "3) Setting the ``display`` argument to True, the ``save`` method will print a message each time an item is dumped: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# with display=True, the save method will print a message\n", - "# each time an item is dumped\n", - "session.save('population.h5', display=True)" - ] - } - ], - "metadata": { - "celltoolbar": "Edit Metadata", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "livereveal": { - "autolaunch": false, - "scroll": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load And Dump Arrays, Sessions, Axes And Groups\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The LArray library provides methods and functions to load and dump Array, Session, Axis Group objects to several formats such as Excel, CSV and HDF5. The HDF5 file format is designed to store and organize large amounts of data. It allows to read and write data much faster than when working with CSV and Excel files. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# run this cell to avoid annoying warnings\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", message=r'.*numpy.dtype size changed*')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# first of all, import the LArray library\n", + "from larray import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the version of LArray:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from larray import __version__\n", + "__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and Dumping Arrays\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading Arrays - Basic Usage (CSV, Excel, HDF5)\n", + "\n", + "To read an array from a CSV file, you must use the ``read_csv`` function:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "csv_dir = get_example_filepath('examples')\n", + "\n", + "# read the array pop from the file 'pop.csv'.\n", + "# The data of the array below is derived from a subset of the demo_pjan table from Eurostat\n", + "pop = read_csv(csv_dir + '/pop.csv')\n", + "pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To read an array from a sheet of an Excel file, you can use the ``read_excel`` function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filepath_excel = get_example_filepath('examples.xlsx')\n", + "\n", + "# read the array from the sheet 'births' of the Excel file 'examples.xlsx'\n", + "# The data of the array below is derived from a subset of the demo_fasec table from Eurostat\n", + "births = read_excel(filepath_excel, 'births')\n", + "births" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``open_excel`` function in combination with the ``load`` method allows you to load several arrays from the same Workbook without opening and closing it several times:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "# open the Excel file 'population.xlsx' and let it opened as long as you keep the indent.\n", + "# The Python keyword ``with`` ensures that the Excel file is properly closed even if an error occurs\n", + "with open_excel(filepath_excel) as wb:\n", + " # load the array 'pop' from the sheet 'pop' \n", + " pop = wb['pop'].load()\n", + " # load the array 'births' from the sheet 'births'\n", + " births = wb['births'].load()\n", + " # load the array 'deaths' from the sheet 'deaths'\n", + " deaths = wb['deaths'].load()\n", + "\n", + "# the Workbook is automatically closed when getting out the block defined by the with statement\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + " **Warning:** `open_excel` requires to work on Windows and to have the library ``xlwings`` installed.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `HDF5` file format is specifically designed to store and organize large amounts of data. \n", + "Reading and writing data in this file format is much faster than with CSV or Excel. \n", + "An HDF5 file can contain multiple arrays, each array being associated with a key.\n", + "To read an array from an HDF5 file, you must use the ``read_hdf`` function and provide the key associated with the array:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filepath_hdf = get_example_filepath('examples.h5')\n", + "\n", + "# read the array from the file 'examples.h5' associated with the key 'deaths'\n", + "# The data of the array below is derived from a subset of the demo_magec table from Eurostat\n", + "deaths = read_hdf(filepath_hdf, 'deaths')\n", + "deaths" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dumping Arrays - Basic Usage (CSV, Excel, HDF5)\n", + "\n", + "To write an array in a CSV file, you must use the ``to_csv`` method:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save the array pop in the file 'pop.csv'\n", + "pop.to_csv('pop.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To write an array to a sheet of an Excel file, you can use the ``to_excel`` method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save the array pop in the sheet 'pop' of the Excel file 'population.xlsx' \n", + "pop.to_excel('population.xlsx', 'pop')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that ``to_excel`` create a new Excel file if it does not exist yet. \n", + "If the file already exists, a new sheet is added after the existing ones if that sheet does not already exists:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# add a new sheet 'births' to the file 'population.xlsx' and save the array births in it\n", + "births.to_excel('population.xlsx', 'births')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To reset an Excel file, you simply need to set the `overwrite_file` argument as True:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. reset the file 'population.xlsx' (all sheets are removed)\n", + "# 2. create a sheet 'pop' and save the array pop in it\n", + "pop.to_excel('population.xlsx', 'pop', overwrite_file=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``open_excel`` function in combination with the ``dump()`` method allows you to open a Workbook and to export several arrays at once. If the Excel file doesn't exist, the ``overwrite_file`` argument must be set to True.\n", + "\n", + "
\n", + " **Warning:** The ``save`` method must be called at the end of the block defined by the *with* statement to actually write data in the Excel file, otherwise you will end up with an empty file.\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "# to create a new Excel file, argument overwrite_file must be set to True\n", + "with open_excel('population.xlsx', overwrite_file=True) as wb:\n", + " # add a new sheet 'pop' and dump the array pop in it \n", + " wb['pop'] = pop.dump()\n", + " # add a new sheet 'births' and dump the array births in it \n", + " wb['births'] = births.dump()\n", + " # add a new sheet 'deaths' and dump the array deaths in it \n", + " wb['deaths'] = deaths.dump()\n", + " # actually write data in the Workbook\n", + " wb.save()\n", + " \n", + "# the Workbook is automatically closed when getting out the block defined by the with statement\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To write an array in an HDF5 file, you must use the ``to_hdf`` function and provide the key that will be associated with the array:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save the array pop in the file 'population.h5' and associate it with the key 'pop'\n", + "pop.to_hdf('population.h5', 'pop')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specifying Wide VS Narrow format (CSV, Excel)\n", + "\n", + "By default, all reading functions assume that arrays are stored in the ``wide`` format, meaning that their last axis is represented horizontally:\n", + "\n", + "| country \\\\ time | 2013 | 2014 | 2015 |\n", + "| --------------- | -------- | -------- | -------- |\n", + "| Belgium | 11137974 | 11180840 | 11237274 |\n", + "| France | 65600350 | 65942267 | 66456279 |\n", + "\n", + "By setting the ``wide`` argument to False, reading functions will assume instead that arrays are stored in the ``narrow`` format, i.e. one column per axis plus one value column:\n", + "\n", + "| country | time | value |\n", + "| ------- | ---- | -------- |\n", + "| Belgium | 2013 | 11137974 |\n", + "| Belgium | 2014 | 11180840 |\n", + "| Belgium | 2015 | 11237274 |\n", + "| France | 2013 | 65600350 |\n", + "| France | 2014 | 65942267 |\n", + "| France | 2015 | 66456279 |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set 'wide' argument to False to indicate that the array is stored in the 'narrow' format\n", + "pop_BE_FR = read_csv(csv_dir + '/pop_narrow_format.csv', wide=False)\n", + "pop_BE_FR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# same for the read_excel function\n", + "pop_BE_FR = read_excel(filepath_excel, sheet='pop_narrow_format', wide=False)\n", + "pop_BE_FR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, writing functions will set the name of the column containing the data to 'value'. You can choose the name of this column by using the ``value_name`` argument. For example, using ``value_name='population'`` you can export the previous array as:\n", + "\n", + "| country | time | population |\n", + "| ------- | ---- | ---------- |\n", + "| Belgium | 2013 | 11137974 |\n", + "| Belgium | 2014 | 11180840 |\n", + "| Belgium | 2015 | 11237274 |\n", + "| France | 2013 | 65600350 |\n", + "| France | 2014 | 65942267 |\n", + "| France | 2015 | 66456279 |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# dump the array pop_BE_FR in a narrow format (one column per axis plus one value column).\n", + "# By default, the name of the column containing data is set to 'value'\n", + "pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False)\n", + "\n", + "# same but replace 'value' by 'population'\n", + "pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False, value_name='population')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# same for the to_excel method\n", + "pop_BE_FR.to_excel('population.xlsx', 'pop_narrow_format', wide=False, value_name='population')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like with the ``to_excel`` method, it is possible to export arrays in a ``narrow`` format using ``open_excel``. \n", + "To do so, you must set the ``wide`` argument of the ``dump`` method to False:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "with open_excel('population.xlsx') as wb:\n", + " # dump the array pop_BE_FR in a narrow format: \n", + " # one column per axis plus one value column.\n", + " # Argument value_name can be used to change the name of the \n", + " # column containing the data (default name is 'value')\n", + " wb['pop_narrow_format'] = pop_BE_FR.dump(wide=False, value_name='population')\n", + " # don't forget to call save()\n", + " wb.save()\n", + "\n", + "# in the sheet 'pop_narrow_format', data is written as:\n", + "# | country | time | value |\n", + "# | ------- | ---- | -------- |\n", + "# | Belgium | 2013 | 11137974 |\n", + "# | Belgium | 2014 | 11180840 |\n", + "# | Belgium | 2015 | 11237274 |\n", + "# | France | 2013 | 65600350 |\n", + "# | France | 2014 | 65942267 |\n", + "# | France | 2015 | 66456279 |\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specifying Position in Sheet (Excel)\n", + "\n", + "If you want to read an array from an Excel sheet which does not start at cell `A1` (when there is more than one array stored in the same sheet for example), you will need to use the ``range`` argument. \n", + "\n", + "
\n", + " **Warning:** Note that the ``range`` argument is only available if you have the library ``xlwings`` installed (Windows).\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "# the 'range' argument must be used to load data not starting at cell A1.\n", + "# This is useful when there is several arrays stored in the same sheet\n", + "births = read_excel(filepath_excel, sheet='pop_births_deaths', range='A9:E15')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using ``open_excel``, ranges are passed in brackets:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "with open_excel(filepath_excel) as wb:\n", + " # store sheet 'pop_births_deaths' in a temporary variable sh\n", + " sh = wb['pop_births_deaths']\n", + " # load the array pop from range A1:E7\n", + " pop = sh['A1:E7'].load()\n", + " # load the array births from range A9:E15\n", + " births = sh['A9:E15'].load()\n", + " # load the array deaths from range A17:E23\n", + " deaths = sh['A17:E23'].load()\n", + "\n", + "# the Workbook is automatically closed when getting out the block defined by the with statement\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When exporting arrays to Excel files, data is written starting at cell `A1` by default. Using the ``position`` argument of the ``to_excel`` method, it is possible to specify the top left cell of the dumped data. This can be useful when you want to export several arrays in the same sheet for example\n", + "\n", + "
\n", + " **Warning:** Note that the ``position`` argument is only available if you have the library ``xlwings`` installed (Windows).\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "filename = 'population.xlsx'\n", + "sheetname = 'pop_births_deaths'\n", + "\n", + "# save the arrays pop, births and deaths in the same sheet 'pop_births_and_deaths'.\n", + "# The 'position' argument is used to shift the location of the second and third arrays to be dumped\n", + "pop.to_excel(filename, sheetname)\n", + "births.to_excel(filename, sheetname, position='A9')\n", + "deaths.to_excel(filename, sheetname, position='A17')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using ``open_excel``, the position is passed in brackets (this allows you to also add extra informations): \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "with open_excel('population.xlsx') as wb:\n", + " # add a new sheet 'pop_births_deaths' and write 'population' in the first cell\n", + " # note: you can use wb['new_sheet_name'] = '' to create an empty sheet\n", + " wb['pop_births_deaths'] = 'population'\n", + " # store sheet 'pop_births_deaths' in a temporary variable sh\n", + " sh = wb['pop_births_deaths']\n", + " # dump the array pop in sheet 'pop_births_deaths' starting at cell A2\n", + " sh['A2'] = pop.dump()\n", + " # add 'births' in cell A10\n", + " sh['A10'] = 'births'\n", + " # dump the array births in sheet 'pop_births_deaths' starting at cell A11 \n", + " sh['A11'] = births.dump()\n", + " # add 'deaths' in cell A19\n", + " sh['A19'] = 'deaths'\n", + " # dump the array deaths in sheet 'pop_births_deaths' starting at cell A20\n", + " sh['A20'] = deaths.dump()\n", + " # don't forget to call save()\n", + " wb.save()\n", + " \n", + "# the Workbook is automatically closed when getting out the block defined by the with statement\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exporting data without headers (Excel)\n", + "\n", + "For some reasons, you may want to export only the data of an array without axes. For example, you may want to insert a new column containing extra information. As an exercise, let us consider we want to add the capital city for each country present in the array containing the total population by country:\n", + "\n", + "| country | capital city | 2013 | 2014 | 2015 |\n", + "| ------- | ------------ | -------- | -------- | -------- |\n", + "| Belgium | Brussels | 11137974 | 11180840 | 11237274 |\n", + "| France | Paris | 65600350 | 65942267 | 66456279 |\n", + "| Germany | Berlin | 80523746 | 80767463 | 81197537 |\n", + "\n", + "Assuming you have prepared an excel sheet as below: \n", + "\n", + "| country | capital city | 2013 | 2014 | 2015 |\n", + "| ------- | ------------ | -------- | -------- | -------- |\n", + "| Belgium | Brussels | | | |\n", + "| France | Paris | | | |\n", + "| Germany | Berlin | | | ||\n", + "\n", + "you can then dump the data at right place by setting the ``header`` argument of ``to_excel`` to False and specifying the position of the data in sheet:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "pop_by_country = pop.sum('gender')\n", + "\n", + "# export only the data of the array pop_by_country starting at cell C2\n", + "pop_by_country.to_excel('population.xlsx', 'pop_by_country', header=False, position='C2')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using ``open_excel``, you can easily prepare the sheet and then export only data at the right place by either setting the ``header`` argument of the ``dump`` method to False or avoiding to call ``dump``:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "with open_excel('population.xlsx') as wb:\n", + " # create new empty sheet 'pop_by_country'\n", + " wb['pop_by_country'] = ''\n", + " # store sheet 'pop_by_country' in a temporary variable sh\n", + " sh = wb['pop_by_country']\n", + " # write extra information (description)\n", + " sh['A1'] = 'Population at 1st January by country'\n", + " # export column names\n", + " sh['A2'] = ['country', 'capital city']\n", + " sh['C2'] = pop_by_country.time.labels\n", + " # export countries as first column\n", + " sh['A3'].options(transpose=True).value = pop_by_country.country.labels\n", + " # export capital cities as second column\n", + " sh['B3'].options(transpose=True).value = ['Brussels', 'Paris', 'Berlin']\n", + " # export only data of pop_by_country\n", + " sh['C3'] = pop_by_country.dump(header=False)\n", + " # or equivalently\n", + " sh['C3'] = pop_by_country\n", + " # don't forget to call save()\n", + " wb.save()\n", + " \n", + "# the Workbook is automatically closed when getting out the block defined by the with statement\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specifying the Number of Axes at Reading (CSV, Excel)\n", + "\n", + "By default, ``read_csv`` and ``read_excel`` will search the position of the first cell containing the special character ``\\`` in the header line in order to determine the number of axes of the array to read. The special character ``\\`` is used to separate the name of the two last axes. If there is no special character ``\\``, ``read_csv`` and ``read_excel`` will consider that the array to read has only one dimension. For an array stored as:\n", + "\n", + "| country | gender \\\\ time | 2013 | 2014 | 2015 |\n", + "| ------- | -------------- | -------- | -------- | -------- |\n", + "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", + "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", + "| France | Male | 31772665 | 31936596 | 32175328 |\n", + "| France | Female | 33827685 | 34005671 | 34280951 |\n", + "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", + "| Germany | Female | 41142770 | 41210540 | 41362080 |\n", + "\n", + "``read_csv`` and ``read_excel`` will find the special character ``\\`` in the second cell meaning it expects three axes (country, gender and time). \n", + "\n", + "Sometimes, you need to read an array for which the name of the last axis is implicit: \n", + "\n", + "| country | gender | 2013 | 2014 | 2015 |\n", + "| ------- | ------ | -------- | -------- | -------- |\n", + "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", + "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", + "| France | Male | 31772665 | 31936596 | 32175328 |\n", + "| France | Female | 33827685 | 34005671 | 34280951 |\n", + "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", + "| Germany | Female | 41142770 | 41210540 | 41362080 |\n", + "\n", + "For such case, you will have to inform ``read_csv`` and ``read_excel`` of the number of axes of the output array by setting the ``nb_axes`` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# read the 3 x 2 x 3 array stored in the file 'pop_missing_axis_name.csv' wihout using 'nb_axes' argument.\n", + "pop = read_csv(csv_dir + '/pop_missing_axis_name.csv')\n", + "# shape and data type of the output array are not what we expected\n", + "pop.info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# by setting the 'nb_axes' argument, you can indicate to read_csv the number of axes of the output array\n", + "pop = read_csv(csv_dir + '/pop_missing_axis_name.csv', nb_axes=3)\n", + "\n", + "# give a name to the last axis\n", + "pop = pop.rename(-1, 'time')\n", + "\n", + "# shape and data type of the output array are what we expected\n", + "pop.info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# same for the read_excel function\n", + "pop = read_excel(filepath_excel, sheet='pop_missing_axis_name', nb_axes=3)\n", + "pop = pop.rename(-1, 'time')\n", + "pop.info" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### NaNs and Missing Data Handling at Reading (CSV, Excel)\n", + "\n", + "Sometimes, there is no data available for some label combinations. In the example below, the rows corresponding to `France - Male` and `Germany - Female` are missing:\n", + "\n", + "| country | gender \\\\ time | 2013 | 2014 | 2015 |\n", + "| ------- | -------------- | -------- | -------- | -------- |\n", + "| Belgium | Male | 5472856 | 5493792 | 5524068 |\n", + "| Belgium | Female | 5665118 | 5687048 | 5713206 |\n", + "| France | Female | 33827685 | 34005671 | 34280951 |\n", + "| Germany | Male | 39380976 | 39556923 | 39835457 |\n", + "\n", + "By default, ``read_csv`` and ``read_excel`` will fill cells associated with missing label combinations with nans. \n", + "Be aware that, in that case, an int array will be converted to a float array." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# by default, cells associated will missing label combinations are filled with nans.\n", + "# In that case, the output array is converted to a float array\n", + "read_csv(csv_dir + '/pop_missing_values.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, it is possible to choose which value to use to fill missing cells using the ``fill_value`` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_csv(csv_dir + '/pop_missing_values.csv', fill_value=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# same for the read_excel function\n", + "read_excel(filepath_excel, sheet='pop_missing_values', fill_value=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sorting Axes at Reading (CSV, Excel, HDF5)\n", + "\n", + "The ``sort_rows`` and ``sort_columns`` arguments of the reading functions allows you to sort rows and columns alphabetically:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sort labels at reading --> Male and Female labels are inverted\n", + "read_csv(csv_dir + '/pop.csv', sort_rows=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_excel(filepath_excel, sheet='births', sort_rows=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_hdf(filepath_hdf, key='deaths').sort_axes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metadata (HDF5)\n", + "\n", + "Since the version 0.29 of LArray, it is possible to add metadata to arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pop.meta.title = 'Population at 1st January'\n", + "pop.meta.origin = 'Table demo_jpan from Eurostat'\n", + "\n", + "pop.info" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These metadata are automatically saved and loaded when working with the HDF5 file format: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pop.to_hdf('population.h5', 'pop')\n", + "\n", + "new_pop = read_hdf('population.h5', 'pop')\n", + "new_pop.info" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + " **Warning:** Currently, metadata associated with arrays cannot be saved and loaded when working with CSV and Excel files.\n", + " This restriction does not apply however to metadata associated with sessions.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and Dumping Sessions\n", + "\n", + "One of the main advantages of grouping arrays, axes and groups in session objects is that you can load and save all of them in one shot. Like arrays, it is possible to associate metadata to a session. These can be saved and loaded in all file formats. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading Sessions (CSV, Excel, HDF5)\n", + "\n", + "To load the items of a session, you have two options:\n", + "\n", + "1) Instantiate a new session and pass the path to the Excel/HDF5 file or to the directory containing CSV files to the Session constructor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a new Session object and load all arrays, axes, groups and metadata \n", + "# from all CSV files located in the passed directory\n", + "csv_dir = get_example_filepath('demography_eurostat')\n", + "session = Session(csv_dir)\n", + "\n", + "# create a new Session object and load all arrays, axes, groups and metadata\n", + "# stored in the passed Excel file\n", + "filepath_excel = get_example_filepath('demography_eurostat.xlsx')\n", + "session = Session(filepath_excel)\n", + "\n", + "# create a new Session object and load all arrays, axes, groups and metadata\n", + "# stored in the passed HDF5 file\n", + "filepath_hdf = get_example_filepath('demography_eurostat.h5')\n", + "session = Session(filepath_hdf)\n", + "\n", + "print(session.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2) Call the ``load`` method on an existing session and pass the path to the Excel/HDF5 file or to the directory containing CSV files as first argument:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a session containing 3 axes, 2 groups and one array 'pop'\n", + "filepath = get_example_filepath('pop_only.xlsx')\n", + "session = Session(filepath)\n", + "\n", + "print(session.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# call the load method on the previous session and add the 'births' and 'deaths' arrays to it\n", + "filepath = get_example_filepath('births_and_deaths.xlsx')\n", + "session.load(filepath)\n", + "\n", + "print(session.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``load`` method offers some options:\n", + "\n", + "1) Using the ``names`` argument, you can specify which items to load:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session = Session()\n", + "\n", + "# use the names argument to only load births and deaths arrays\n", + "session.load(filepath_hdf, names=['births', 'deaths'])\n", + "\n", + "print(session.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2) Setting the ``display`` argument to True, the ``load`` method will print a message each time a new item is loaded: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session = Session()\n", + "\n", + "# with display=True, the load method will print a message\n", + "# each time a new item is loaded\n", + "session.load(filepath_hdf, display=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dumping Sessions (CSV, Excel, HDF5)\n", + "\n", + "To save a session, you need to call the ``save`` method. The first argument is the path to a Excel/HDF5 file or to a directory if items are saved to CSV files:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save items of a session in CSV files.\n", + "# Here, the save method will create a 'population' directory in which CSV files will be written \n", + "session.save('population')\n", + "\n", + "# save session to an HDF5 file\n", + "session.save('population.h5')\n", + "\n", + "# save session to an Excel file\n", + "session.save('population.xlsx')\n", + "\n", + "# load session saved in 'population.h5' to see its content\n", + "Session('population.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + " Note: Concerning the CSV and Excel formats: \n", + " \n", + " - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)` \n", + " - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)` \n", + " - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)` \n", + " \n", + " These sheet (CSV file) names cannot be changed. \n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``save`` method has several arguments:\n", + "\n", + "1) Using the ``names`` argument, you can specify which items to save:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# use the names argument to only save births and deaths arrays\n", + "session.save('population.h5', names=['births', 'deaths'])\n", + "\n", + "# load session saved in 'population.h5' to see its content\n", + "Session('population.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2) By default, dumping a session to an Excel or HDF5 file will overwrite it. By setting the ``overwrite`` argument to False, you can choose to update the existing Excel or HDF5 file: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pop = read_csv('./population/pop.csv')\n", + "ses_pop = Session([('pop', pop)])\n", + "\n", + "# by setting overwrite to False, the destination file is updated instead of overwritten.\n", + "# The items already stored in the file but not present in the session are left intact. \n", + "# On the contrary, the items that exist in both the file and the session are completely overwritten.\n", + "ses_pop.save('population.h5', overwrite=False)\n", + "\n", + "# load session saved in 'population.h5' to see its content\n", + "Session('population.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3) Setting the ``display`` argument to True, the ``save`` method will print a message each time an item is dumped: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# with display=True, the save method will print a message\n", + "# each time an item is dumped\n", + "session.save('population.h5', display=True)" + ] + } + ], + "metadata": { + "celltoolbar": "Edit Metadata", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "livereveal": { + "autolaunch": false, + "scroll": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipyml b/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipyml index 2b1e2ef07..fa260e6a7 100644 --- a/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipyml +++ b/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipyml @@ -95,7 +95,7 @@ cells: - code: | - aggregation_matrix = LArray([[1, 0, 0], [0, 1, 1]], axes=(Axis('country=Belgium,France+Germany'), pop.country)) + aggregation_matrix = Array([[1, 0, 0], [0, 1, 1]], axes=(Axis('country=Belgium,France+Germany'), pop.country)) aggregation_matrix @@ -107,12 +107,12 @@ cells: - markdown: |
**Note:** Be careful when mixing different data types. - You can use the method [astype](../_generated/larray.LArray.astype.rst#larray.LArray.astype) to change the data type of an array. + You can use the method [astype](../_generated/larray.Array.astype.rst#larray.Array.astype) to change the data type of an array.
- code: | - aggregation_matrix = LArray([[1, 0, 0], [0, 0.5, 0.5]], axes=(Axis('country=Belgium,France+Germany/2'), pop.country)) + aggregation_matrix = Array([[1, 0, 0], [0, 0.5, 0.5]], axes=(Axis('country=Belgium,France+Germany/2'), pop.country)) aggregation_matrix @@ -175,7 +175,7 @@ cells: - markdown: |
- **Warning:** Operations between two arrays only works when they have compatible axes (i.e. same labels) but this behavior can be override via the [ignore_labels](../_generated/larray.LArray.ignore_labels.rst#larray.LArray.ignore_labels) method. + **Warning:** Operations between two arrays only works when they have compatible axes (i.e. same labels) but this behavior can be override via the [ignore_labels](../_generated/larray.Array.ignore_labels.rst#larray.Array.ignore_labels) method. In that case only the position on the axis is used and not the labels. Using this method is done at your own risk.
@@ -201,7 +201,7 @@ cells: - code: | # let's define a 'multiplicator' vector with # one value defined for each gender - multiplicator = LArray([-1, 1], axes=pop.gender) + multiplicator = Array([-1, 1], axes=pop.gender) multiplicator @@ -281,7 +281,7 @@ cells: - markdown: | - To test if all values between are equals, use the [equals](../_generated/larray.LArray.equals.rst#larray.LArray.equals) method: + To test if all values between are equals, use the [equals](../_generated/larray.Array.equals.rst#larray.Array.equals) method: - code: | @@ -291,7 +291,7 @@ cells: - markdown: | ## Aggregates - LArray provides many aggregation functions. The list is given in the [Aggregation Functions](../api.rst#aggregation-functions) subsection of the [API Reference](../api.rst) page. + The LArray library provides many aggregation functions. The list is given in the [Aggregation Functions](../api.rst#aggregation-functions) subsection of the [API Reference](../api.rst) page. Aggregation operations can be performed on axes or groups. Axes and groups can be mixed. diff --git a/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipynb b/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipynb index b6ea22d88..eb7ec2ee6 100644 --- a/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipynb +++ b/doc/source/tutorial/tutorial_arithmetic_op_and_aggregation.ipynb @@ -176,7 +176,7 @@ "metadata": {}, "outputs": [], "source": [ - "aggregation_matrix = LArray([[1, 0, 0], [0, 1, 1]], axes=(Axis('country=Belgium,France+Germany'), pop.country))\n", + "aggregation_matrix = Array([[1, 0, 0], [0, 1, 1]], axes=(Axis('country=Belgium,France+Germany'), pop.country))\n", "aggregation_matrix" ] }, @@ -196,7 +196,7 @@ "source": [ "
\n", "**Note:** Be careful when mixing different data types.\n", - "You can use the method [astype](../_generated/larray.LArray.astype.rst#larray.LArray.astype) to change the data type of an array.\n", + "You can use the method [astype](../_generated/larray.Array.astype.rst#larray.Array.astype) to change the data type of an array.\n", "
\n" ] }, @@ -206,7 +206,7 @@ "metadata": {}, "outputs": [], "source": [ - "aggregation_matrix = LArray([[1, 0, 0], [0, 0.5, 0.5]], axes=(Axis('country=Belgium,France+Germany/2'), pop.country))\n", + "aggregation_matrix = Array([[1, 0, 0], [0, 0.5, 0.5]], axes=(Axis('country=Belgium,France+Germany/2'), pop.country))\n", "aggregation_matrix" ] }, @@ -313,7 +313,7 @@ "metadata": {}, "source": [ "
\n", - " **Warning:** Operations between two arrays only works when they have compatible axes (i.e. same labels) but this behavior can be override via the [ignore_labels](../_generated/larray.LArray.ignore_labels.rst#larray.LArray.ignore_labels) method.\n", + " **Warning:** Operations between two arrays only works when they have compatible axes (i.e. same labels) but this behavior can be override via the [ignore_labels](../_generated/larray.Array.ignore_labels.rst#larray.Array.ignore_labels) method.\n", "In that case only the position on the axis is used and not the labels.\n", "Using this method is done at your own risk.\n", "
\n" @@ -355,7 +355,7 @@ "source": [ "# let's define a 'multiplicator' vector with \n", "# one value defined for each gender\n", - "multiplicator = LArray([-1, 1], axes=pop.gender)\n", + "multiplicator = Array([-1, 1], axes=pop.gender)\n", "multiplicator" ] }, @@ -485,7 +485,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To test if all values between are equals, use the [equals](../_generated/larray.LArray.equals.rst#larray.LArray.equals) method:" + "To test if all values between are equals, use the [equals](../_generated/larray.Array.equals.rst#larray.Array.equals) method:" ] }, { @@ -503,7 +503,7 @@ "source": [ "## Aggregates\n", "\n", - "LArray provides many aggregation functions. The list is given in the [Aggregation Functions](../api.rst#aggregation-functions) subsection of the [API Reference](../api.rst) page.\n", + "The LArray library provides many aggregation functions. The list is given in the [Aggregation Functions](../api.rst#aggregation-functions) subsection of the [API Reference](../api.rst) page.\n", "\n", "Aggregation operations can be performed on axes or groups. Axes and groups can be mixed. \n", "\n", diff --git a/doc/source/tutorial/tutorial_indexing.ipyml b/doc/source/tutorial/tutorial_indexing.ipyml index 848da0b61..ca14d32f7 100644 --- a/doc/source/tutorial/tutorial_indexing.ipyml +++ b/doc/source/tutorial/tutorial_indexing.ipyml @@ -42,7 +42,7 @@ cells: - markdown: | ## Selecting (Subsets) - LArray allows to select a subset of an array either by labels or indices (positions) + The ``Array`` class allows to select a subset either by labels or indices (positions) - markdown: | @@ -254,7 +254,7 @@ cells: - code: | - start_year = LArray([2015, 2016, 2017], axes=pop.country) + start_year = Array([2015, 2016, 2017], axes=pop.country) start_year diff --git a/doc/source/tutorial/tutorial_indexing.ipynb b/doc/source/tutorial/tutorial_indexing.ipynb index e0a116f46..0a3261fc0 100644 --- a/doc/source/tutorial/tutorial_indexing.ipynb +++ b/doc/source/tutorial/tutorial_indexing.ipynb @@ -77,7 +77,7 @@ "source": [ "## Selecting (Subsets)\n", "\n", - "LArray allows to select a subset of an array either by labels or indices (positions)\n" + "The ``Array`` class allows to select a subset either by labels or indices (positions)\n" ] }, { @@ -437,7 +437,7 @@ "metadata": {}, "outputs": [], "source": [ - "start_year = LArray([2015, 2016, 2017], axes=pop.country)\n", + "start_year = Array([2015, 2016, 2017], axes=pop.country)\n", "start_year" ] }, diff --git a/doc/source/tutorial/tutorial_miscellaneous.ipyml b/doc/source/tutorial/tutorial_miscellaneous.ipyml index 216668502..1adfe875b 100644 --- a/doc/source/tutorial/tutorial_miscellaneous.ipyml +++ b/doc/source/tutorial/tutorial_miscellaneous.ipyml @@ -49,7 +49,7 @@ cells: - markdown: | - See [with_total](../_generated/larray.LArray.with_total.rst#larray.LArray.with_total) for more details and examples. + See [with_total](../_generated/larray.Array.with_total.rst#larray.Array.with_total) for more details and examples. - markdown: | @@ -90,7 +90,7 @@ cells: - markdown: | - See [clip](../_generated/larray.LArray.clip.rst#larray.LArray.clip) for more details and examples. + See [clip](../_generated/larray.Array.clip.rst#larray.Array.clip) for more details and examples. - markdown: | @@ -117,7 +117,7 @@ cells: - markdown: | - See [divnot0](../_generated/larray.LArray.divnot0.rst#larray.LArray.divnot0) for more details and examples. + See [divnot0](../_generated/larray.Array.divnot0.rst#larray.Array.divnot0) for more details and examples. - markdown: | @@ -134,7 +134,7 @@ cells: - markdown: | - See [ratio](../_generated/larray.LArray.ratio.rst#larray.LArray.ratio) and [rationot0](../_generated/larray.LArray.rationot0.rst#larray.LArray.rationot0) for more details and examples. + See [ratio](../_generated/larray.Array.ratio.rst#larray.Array.ratio) and [rationot0](../_generated/larray.Array.rationot0.rst#larray.Array.rationot0) for more details and examples. - markdown: | @@ -147,7 +147,7 @@ cells: - markdown: | - See [percent](../_generated/larray.LArray.percent.rst#larray.LArray.percent) for more details and examples. + See [percent](../_generated/larray.Array.percent.rst#larray.Array.percent) for more details and examples. - markdown: | @@ -174,7 +174,7 @@ cells: - markdown: | - See [diff](../_generated/larray.LArray.diff.rst#larray.LArray.diff) for more details and examples. + See [diff](../_generated/larray.Array.diff.rst#larray.Array.diff) for more details and examples. - markdown: | @@ -190,7 +190,7 @@ cells: - markdown: | - See [growth_rate](../_generated/larray.LArray.growth_rate.rst#larray.LArray.growth_rate) for more details and examples. + See [growth_rate](../_generated/larray.Array.growth_rate.rst#larray.Array.growth_rate) for more details and examples. - markdown: | @@ -211,7 +211,7 @@ cells: - markdown: | - See [shift](../_generated/larray.LArray.shift.rst#larray.LArray.shift) for more details and examples. + See [shift](../_generated/larray.Array.shift.rst#larray.Array.shift) for more details and examples. - markdown: | diff --git a/doc/source/tutorial/tutorial_miscellaneous.ipynb b/doc/source/tutorial/tutorial_miscellaneous.ipynb index 78f5c4251..fb3c6d2d9 100644 --- a/doc/source/tutorial/tutorial_miscellaneous.ipynb +++ b/doc/source/tutorial/tutorial_miscellaneous.ipynb @@ -89,7 +89,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [with_total](../_generated/larray.LArray.with_total.rst#larray.LArray.with_total) for more details and examples.\n" + "See [with_total](../_generated/larray.Array.with_total.rst#larray.Array.with_total) for more details and examples.\n" ] }, { @@ -162,7 +162,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [clip](../_generated/larray.LArray.clip.rst#larray.LArray.clip) for more details and examples.\n" + "See [clip](../_generated/larray.Array.clip.rst#larray.Array.clip) for more details and examples.\n" ] }, { @@ -210,7 +210,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [divnot0](../_generated/larray.LArray.divnot0.rst#larray.LArray.divnot0) for more details and examples.\n" + "See [divnot0](../_generated/larray.Array.divnot0.rst#larray.Array.divnot0) for more details and examples.\n" ] }, { @@ -238,7 +238,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [ratio](../_generated/larray.LArray.ratio.rst#larray.LArray.ratio) and [rationot0](../_generated/larray.LArray.rationot0.rst#larray.LArray.rationot0) for more details and examples.\n" + "See [ratio](../_generated/larray.Array.ratio.rst#larray.Array.ratio) and [rationot0](../_generated/larray.Array.rationot0.rst#larray.Array.rationot0) for more details and examples.\n" ] }, { @@ -262,7 +262,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [percent](../_generated/larray.LArray.percent.rst#larray.LArray.percent) for more details and examples.\n" + "See [percent](../_generated/larray.Array.percent.rst#larray.Array.percent) for more details and examples.\n" ] }, { @@ -310,7 +310,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [diff](../_generated/larray.LArray.diff.rst#larray.LArray.diff) for more details and examples.\n" + "See [diff](../_generated/larray.Array.diff.rst#larray.Array.diff) for more details and examples.\n" ] }, { @@ -337,7 +337,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [growth_rate](../_generated/larray.LArray.growth_rate.rst#larray.LArray.growth_rate) for more details and examples.\n" + "See [growth_rate](../_generated/larray.Array.growth_rate.rst#larray.Array.growth_rate) for more details and examples.\n" ] }, { @@ -374,7 +374,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [shift](../_generated/larray.LArray.shift.rst#larray.LArray.shift) for more details and examples.\n" + "See [shift](../_generated/larray.Array.shift.rst#larray.Array.shift) for more details and examples.\n" ] }, { diff --git a/doc/source/tutorial/tutorial_plotting.ipyml b/doc/source/tutorial/tutorial_plotting.ipyml index 2c3272daa..365d88818 100644 --- a/doc/source/tutorial/tutorial_plotting.ipyml +++ b/doc/source/tutorial/tutorial_plotting.ipyml @@ -59,7 +59,7 @@ cells: - markdown: | - See [plot](../_generated/larray.LArray.plot.rst#larray.LArray.plot) for more details and examples. + See [plot](../_generated/larray.Array.plot.rst#larray.Array.plot) for more details and examples. # The lines below here may be deleted if you do not need them. diff --git a/doc/source/tutorial/tutorial_plotting.ipynb b/doc/source/tutorial/tutorial_plotting.ipynb index d7d37d764..354cd65cd 100644 --- a/doc/source/tutorial/tutorial_plotting.ipynb +++ b/doc/source/tutorial/tutorial_plotting.ipynb @@ -110,7 +110,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [plot](../_generated/larray.LArray.plot.rst#larray.LArray.plot) for more details and examples." + "See [plot](../_generated/larray.Array.plot.rst#larray.Array.plot) for more details and examples." ] } ], diff --git a/doc/source/tutorial/tutorial_presenting_larray_objects.ipyml b/doc/source/tutorial/tutorial_presenting_larray_objects.ipyml index ff4342375..c0e6e3e90 100644 --- a/doc/source/tutorial/tutorial_presenting_larray_objects.ipyml +++ b/doc/source/tutorial/tutorial_presenting_larray_objects.ipyml @@ -1,7 +1,7 @@ cells: - markdown: | - # Presenting LArray objects (Axis, Groups, LArray, Session) + # Presenting LArray objects (Axis, Groups, Array, Session) - code: | @@ -32,7 +32,7 @@ cells: - markdown: | ## Axis - An ``Axis`` represents a dimension of an LArray object. + An ``Axis`` represents a dimension of an Array object. It consists of a name and a list of labels. They are several ways to create an axis: @@ -95,14 +95,14 @@ cells: - markdown: | - ## LArray + ## Array - A ``LArray`` object represents a multidimensional array with labeled axes. + An ``Array`` object represents a multidimensional array with labeled axes. ### Create an array from scratch - To create an array from scratch, you need to provide the data and a list - of axes. Optionally, metadata (title, description, creation date, authors, ...) can be associated to the array: + To create an array from scratch, you need to provide the data and a list of axes. + Optionally, metadata (title, description, creation date, authors, ...) can be associated to the array: - code: | @@ -115,7 +115,7 @@ cells: # metadata meta = [('title', 'random array')] - arr = LArray(data, axes, meta=meta) + arr = Array(data, axes, meta=meta) arr @@ -221,7 +221,7 @@ cells: - markdown: | - ### Inspecting LArray objects + ### Inspecting Array objects - code: | @@ -303,11 +303,11 @@ cells: - markdown: | - ### More on LArray objects + ### More on Array objects To know how to save and load arrays in CSV, Excel or HDF format, please refer to the [Loading and Dumping Arrays](tutorial_IO.ipynb#Loading-and-Dumping-Arrays) section of the tutorial. - See the [LArray](../api.rst#larray) section of the API Reference to explore all methods of LArray objects. + See the [Array](../api.rst#array) section of the API Reference to explore all methods of Array objects. - markdown: | diff --git a/doc/source/tutorial/tutorial_presenting_larray_objects.ipynb b/doc/source/tutorial/tutorial_presenting_larray_objects.ipynb index c98ffb444..3bad4acb4 100644 --- a/doc/source/tutorial/tutorial_presenting_larray_objects.ipynb +++ b/doc/source/tutorial/tutorial_presenting_larray_objects.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Presenting LArray objects (Axis, Groups, LArray, Session)\n" + "# Presenting LArray objects (Axis, Groups, Array, Session)\n" ] }, { @@ -59,7 +59,7 @@ "source": [ "## Axis\n", "\n", - "An ``Axis`` represents a dimension of an LArray object.\n", + "An ``Axis`` represents a dimension of an Array object.\n", "It consists of a name and a list of labels. \n", "\n", "They are several ways to create an axis:\n" @@ -152,14 +152,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## LArray\n", + "## Array\n", "\n", - "A ``LArray`` object represents a multidimensional array with labeled axes.\n", + "An ``Array`` object represents a multidimensional array with labeled axes.\n", "\n", "### Create an array from scratch\n", "\n", - "To create an array from scratch, you need to provide the data and a list\n", - "of axes. Optionally, metadata (title, description, creation date, authors, ...) can be associated to the array:\n" + "To create an array from scratch, you need to provide the data and a list of axes. \n", + "Optionally, metadata (title, description, creation date, authors, ...) can be associated to the array:\n" ] }, { @@ -177,7 +177,7 @@ "# metadata\n", "meta = [('title', 'random array')]\n", "\n", - "arr = LArray(data, axes, meta=meta)\n", + "arr = Array(data, axes, meta=meta)\n", "arr" ] }, @@ -346,7 +346,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Inspecting LArray objects\n" + "### Inspecting Array objects\n" ] }, { @@ -498,11 +498,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### More on LArray objects\n", + "### More on Array objects\n", "\n", "To know how to save and load arrays in CSV, Excel or HDF format, please refer to the [Loading and Dumping Arrays](tutorial_IO.ipynb#Loading-and-Dumping-Arrays) section of the tutorial.\n", "\n", - "See the [LArray](../api.rst#larray) section of the API Reference to explore all methods of LArray objects." + "See the [Array](../api.rst#array) section of the API Reference to explore all methods of Array objects." ] }, { diff --git a/doc/source/tutorial/tutorial_sessions.ipyml b/doc/source/tutorial/tutorial_sessions.ipyml index 77e2a954a..3865f95d4 100644 --- a/doc/source/tutorial/tutorial_sessions.ipyml +++ b/doc/source/tutorial/tutorial_sessions.ipyml @@ -99,7 +99,7 @@ cells: - code: | # select only arrays of a session - s_pop.filter(kind=LArray) + s_pop.filter(kind=Array) - code: | @@ -122,7 +122,7 @@ cells: - code: | # iterate over items for value in s_pop.values(): - if isinstance(value, LArray): + if isinstance(value, Array): print(value.info) else: print(repr(value)) @@ -132,7 +132,7 @@ cells: - code: | # iterate over names and items for key, value in s_pop.items(): - if isinstance(value, LArray): + if isinstance(value, Array): print(key, ':') print(value.info) else: @@ -231,7 +231,7 @@ cells: - markdown: | - It is also possible to apply a function on non-LArray objects of a session. Please refer the documentation of the [apply](../_generated/larray.Session.apply.rst#larray.Session.apply) method. + It is also possible to apply a function on non-Array objects of a session. Please refer the documentation of the [apply](../_generated/larray.Session.apply.rst#larray.Session.apply) method. - markdown: | diff --git a/doc/source/tutorial/tutorial_sessions.ipynb b/doc/source/tutorial/tutorial_sessions.ipynb index 7b5bac662..f7fbdcf16 100644 --- a/doc/source/tutorial/tutorial_sessions.ipynb +++ b/doc/source/tutorial/tutorial_sessions.ipynb @@ -177,7 +177,7 @@ "outputs": [], "source": [ "# select only arrays of a session\n", - "s_pop.filter(kind=LArray)" + "s_pop.filter(kind=Array)" ] }, { @@ -218,7 +218,7 @@ "source": [ "# iterate over items\n", "for value in s_pop.values():\n", - " if isinstance(value, LArray):\n", + " if isinstance(value, Array):\n", " print(value.info)\n", " else:\n", " print(repr(value))\n", @@ -233,7 +233,7 @@ "source": [ "# iterate over names and items\n", "for key, value in s_pop.items():\n", - " if isinstance(value, LArray):\n", + " if isinstance(value, Array):\n", " print(key, ':')\n", " print(value.info)\n", " else:\n", @@ -380,7 +380,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It is also possible to apply a function on non-LArray objects of a session. Please refer the documentation of the [apply](../_generated/larray.Session.apply.rst#larray.Session.apply) method." + "It is also possible to apply a function on non-Array objects of a session. Please refer the documentation of the [apply](../_generated/larray.Session.apply.rst#larray.Session.apply) method." ] }, { diff --git a/doc/source/tutorial/tutorial_string_syntax.ipyml b/doc/source/tutorial/tutorial_string_syntax.ipyml index 685eba2e2..617d3b98b 100644 --- a/doc/source/tutorial/tutorial_string_syntax.ipyml +++ b/doc/source/tutorial/tutorial_string_syntax.ipyml @@ -22,7 +22,7 @@ cells: - markdown: | - LArray offers two syntaxes to build axes and make selections and aggregations. + The LArray library offers two syntaxes to build axes and make selections and aggregations. The first one is more ``Pythonic`` (uses Python structures) For example, you can create an *age_category* axis as follows: diff --git a/doc/source/tutorial/tutorial_string_syntax.ipynb b/doc/source/tutorial/tutorial_string_syntax.ipynb index dcf0b9350..9925ed3fd 100644 --- a/doc/source/tutorial/tutorial_string_syntax.ipynb +++ b/doc/source/tutorial/tutorial_string_syntax.ipynb @@ -44,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "LArray offers two syntaxes to build axes and make selections and aggregations.\n", + "The LArray library offers two syntaxes to build axes and make selections and aggregations.\n", "The first one is more ``Pythonic`` (uses Python structures) \n", "For example, you can create an *age_category* axis as follows:" ] diff --git a/doc/source/tutorial/tutorial_transforming.ipyml b/doc/source/tutorial/tutorial_transforming.ipyml index d14adcb46..8b3f438ae 100644 --- a/doc/source/tutorial/tutorial_transforming.ipyml +++ b/doc/source/tutorial/tutorial_transforming.ipyml @@ -44,17 +44,17 @@ cells: ## Manipulating axes - LArray offers several methods to manipulate the axes and labels of an array: + The ``Array`` class offers several methods to manipulate the axes and labels of an array: - - [set_labels](../_generated/larray.LArray.set_labels.rst#larray.LArray.set_labels): to replace all or some labels of one or several axes. - - [rename](../_generated/larray.LArray.rename.rst#larray.LArray.rename): to replace one or several axis names. - - [set_axes](../_generated/larray.LArray.set_axes.rst#larray.LArray.set_axes): to replace one or several axes. - - [transpose](../_generated/larray.LArray.transpose.rst#larray.LArray.transpose): to modify the order of axes. - - [drop](../_generated/larray.LArray.drop.rst#larray.LArray.drop): to remove one or several labels. - - [combine_axes](../_generated/larray.LArray.combine_axes.rst#larray.LArray.combine_axes): to combine axes. - - [split_axes](../_generated/larray.LArray.split_axes.rst#larray.LArray.split_axes): to split one or several axes by splitting their labels and names. - - [reindex](../_generated/larray.LArray.reindex.rst#larray.LArray.reindex): to reorder, add and remove labels of one or several axes. - - [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert): to insert a label at a given position. + - [set_labels](../_generated/larray.Array.set_labels.rst#larray.Array.set_labels): to replace all or some labels of one or several axes. + - [rename](../_generated/larray.Array.rename.rst#larray.Array.rename): to replace one or several axis names. + - [set_axes](../_generated/larray.Array.set_axes.rst#larray.Array.set_axes): to replace one or several axes. + - [transpose](../_generated/larray.Array.transpose.rst#larray.Array.transpose): to modify the order of axes. + - [drop](../_generated/larray.Array.drop.rst#larray.Array.drop): to remove one or several labels. + - [combine_axes](../_generated/larray.Array.combine_axes.rst#larray.Array.combine_axes): to combine axes. + - [split_axes](../_generated/larray.Array.split_axes.rst#larray.Array.split_axes): to split one or several axes by splitting their labels and names. + - [reindex](../_generated/larray.Array.reindex.rst#larray.Array.reindex): to reorder, add and remove labels of one or several axes. + - [insert](../_generated/larray.Array.insert.rst#larray.Array.insert): to insert a label at a given position. - markdown: | @@ -82,7 +82,7 @@ cells: - markdown: | - See [set_labels](../_generated/larray.LArray.set_labels.rst#larray.LArray.set_labels) for more details and examples. + See [set_labels](../_generated/larray.Array.set_labels.rst#larray.Array.set_labels) for more details and examples. - markdown: | @@ -107,7 +107,7 @@ cells: - markdown: | - See [rename](../_generated/larray.LArray.rename.rst#larray.LArray.rename) for more details and examples. + See [rename](../_generated/larray.Array.rename.rst#larray.Array.rename) for more details and examples. - markdown: | @@ -177,7 +177,7 @@ cells: - markdown: | - See [transpose](../_generated/larray.LArray.transpose.rst#larray.LArray.transpose) for more details and examples. + See [transpose](../_generated/larray.Array.transpose.rst#larray.Array.transpose) for more details and examples. - markdown: | @@ -190,7 +190,7 @@ cells: - markdown: | - See [drop](../_generated/larray.LArray.drop.rst#larray.LArray.drop) for more details and examples. + See [drop](../_generated/larray.Array.drop.rst#larray.Array.drop) for more details and examples. - markdown: | @@ -214,7 +214,7 @@ cells: - markdown: | - See [combine_axes](../_generated/larray.LArray.combine_axes.rst#larray.LArray.combine_axes) and [split_axes](../_generated/larray.LArray.split_axes.rst#larray.LArray.split_axes) for more details and examples. + See [combine_axes](../_generated/larray.Array.combine_axes.rst#larray.Array.combine_axes) and [split_axes](../_generated/larray.Array.split_axes.rst#larray.Array.split_axes) for more details and examples. - markdown: | @@ -240,7 +240,7 @@ cells: - markdown: | - See [reindex](../_generated/larray.LArray.reindex.rst#larray.LArray.reindex) for more details and examples. + See [reindex](../_generated/larray.Array.reindex.rst#larray.Array.reindex) for more details and examples. - markdown: | @@ -257,16 +257,16 @@ cells: - markdown: | - See [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert) for more details and examples. + See [insert](../_generated/larray.Array.insert.rst#larray.Array.insert) for more details and examples. - markdown: | ## Sorting - - [sort_axes](../_generated/larray.LArray.sort_axes.rst#larray.LArray.sort_axes): sort the labels of an axis. - - [labelsofsorted](../_generated/larray.LArray.labelsofsorted.rst#larray.LArray.labelsofsorted): give labels which would sort an axis. - - [sort_values](../_generated/larray.LArray.sort_values.rst#larray.LArray.sort_values): sort axes according to values + - [sort_axes](../_generated/larray.Array.sort_axes.rst#larray.Array.sort_axes): sort the labels of an axis. + - [labelsofsorted](../_generated/larray.Array.labelsofsorted.rst#larray.Array.labelsofsorted): give labels which would sort an axis. + - [sort_values](../_generated/larray.Array.sort_values.rst#larray.Array.sort_values): sort axes according to values - code: | @@ -304,12 +304,12 @@ cells: - markdown: | ## Combining arrays - LArray offers several methods to combine arrays: + The LArray library offers several methods and functions to combine arrays: - - [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert): inserts an array in another array along an axis - - [append](../_generated/larray.LArray.append.rst#larray.LArray.append): adds an array at the end of an axis. - - [prepend](../_generated/larray.LArray.prepend.rst#larray.LArray.prepend): adds an array at the beginning of an axis. - - [extend](../_generated/larray.LArray.extend.rst#larray.LArray.extend): extends an array along an axis. + - [insert](../_generated/larray.Array.insert.rst#larray.Array.insert): inserts an array in another array along an axis + - [append](../_generated/larray.Array.append.rst#larray.Array.append): adds an array at the end of an axis. + - [prepend](../_generated/larray.Array.prepend.rst#larray.Array.prepend): adds an array at the beginning of an axis. + - [extend](../_generated/larray.Array.extend.rst#larray.Array.extend): extends an array along an axis. - [stack](../_generated/larray.stack.rst#larray.stack): combines several arrays along an axis. @@ -326,7 +326,7 @@ cells: - markdown: | - See [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert) for more details and examples. + See [insert](../_generated/larray.Array.insert.rst#larray.Array.insert) for more details and examples. - markdown: | @@ -346,7 +346,7 @@ cells: - code: | - pop_lux = LArray([-1, 1], pop.gender) + pop_lux = Array([-1, 1], pop.gender) pop_lux @@ -356,7 +356,7 @@ cells: - markdown: | - See [append](../_generated/larray.LArray.append.rst#larray.LArray.append) for more details and examples. + See [append](../_generated/larray.Array.append.rst#larray.Array.append) for more details and examples. - markdown: | @@ -372,7 +372,7 @@ cells: - markdown: | - See [prepend](../_generated/larray.LArray.prepend.rst#larray.LArray.prepend) for more details and examples. + See [prepend](../_generated/larray.Array.prepend.rst#larray.Array.prepend) for more details and examples. - markdown: | @@ -387,7 +387,7 @@ cells: - markdown: | - See [extend](../_generated/larray.LArray.extend.rst#larray.LArray.extend) for more details and examples. + See [extend](../_generated/larray.Array.extend.rst#larray.Array.extend) for more details and examples. - markdown: | @@ -451,7 +451,7 @@ cells: - markdown: | - See [align](../_generated/larray.LArray.align.rst#larray.LArray.align) for more details and examples. + See [align](../_generated/larray.Array.align.rst#larray.Array.align) for more details and examples. # The lines below here may be deleted if you do not need them. diff --git a/doc/source/tutorial/tutorial_transforming.ipynb b/doc/source/tutorial/tutorial_transforming.ipynb index 1632f2d78..7fc6505ab 100644 --- a/doc/source/tutorial/tutorial_transforming.ipynb +++ b/doc/source/tutorial/tutorial_transforming.ipynb @@ -81,17 +81,17 @@ "## Manipulating axes\n", "\n", "\n", - "LArray offers several methods to manipulate the axes and labels of an array:\n", + "The ``Array`` class offers several methods to manipulate the axes and labels of an array:\n", "\n", - "- [set_labels](../_generated/larray.LArray.set_labels.rst#larray.LArray.set_labels): to replace all or some labels of one or several axes.\n", - "- [rename](../_generated/larray.LArray.rename.rst#larray.LArray.rename): to replace one or several axis names.\n", - "- [set_axes](../_generated/larray.LArray.set_axes.rst#larray.LArray.set_axes): to replace one or several axes.\n", - "- [transpose](../_generated/larray.LArray.transpose.rst#larray.LArray.transpose): to modify the order of axes.\n", - "- [drop](../_generated/larray.LArray.drop.rst#larray.LArray.drop): to remove one or several labels.\n", - "- [combine_axes](../_generated/larray.LArray.combine_axes.rst#larray.LArray.combine_axes): to combine axes.\n", - "- [split_axes](../_generated/larray.LArray.split_axes.rst#larray.LArray.split_axes): to split one or several axes by splitting their labels and names.\n", - "- [reindex](../_generated/larray.LArray.reindex.rst#larray.LArray.reindex): to reorder, add and remove labels of one or several axes.\n", - "- [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert): to insert a label at a given position.\n" + "- [set_labels](../_generated/larray.Array.set_labels.rst#larray.Array.set_labels): to replace all or some labels of one or several axes.\n", + "- [rename](../_generated/larray.Array.rename.rst#larray.Array.rename): to replace one or several axis names.\n", + "- [set_axes](../_generated/larray.Array.set_axes.rst#larray.Array.set_axes): to replace one or several axes.\n", + "- [transpose](../_generated/larray.Array.transpose.rst#larray.Array.transpose): to modify the order of axes.\n", + "- [drop](../_generated/larray.Array.drop.rst#larray.Array.drop): to remove one or several labels.\n", + "- [combine_axes](../_generated/larray.Array.combine_axes.rst#larray.Array.combine_axes): to combine axes.\n", + "- [split_axes](../_generated/larray.Array.split_axes.rst#larray.Array.split_axes): to split one or several axes by splitting their labels and names.\n", + "- [reindex](../_generated/larray.Array.reindex.rst#larray.Array.reindex): to reorder, add and remove labels of one or several axes.\n", + "- [insert](../_generated/larray.Array.insert.rst#larray.Array.insert): to insert a label at a given position.\n" ] }, { @@ -140,7 +140,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [set_labels](../_generated/larray.LArray.set_labels.rst#larray.LArray.set_labels) for more details and examples." + "See [set_labels](../_generated/larray.Array.set_labels.rst#larray.Array.set_labels) for more details and examples." ] }, { @@ -184,7 +184,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [rename](../_generated/larray.LArray.rename.rst#larray.LArray.rename) for more details and examples." + "See [rename](../_generated/larray.Array.rename.rst#larray.Array.rename) for more details and examples." ] }, { @@ -301,7 +301,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [transpose](../_generated/larray.LArray.transpose.rst#larray.LArray.transpose) for more details and examples." + "See [transpose](../_generated/larray.Array.transpose.rst#larray.Array.transpose) for more details and examples." ] }, { @@ -325,7 +325,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [drop](../_generated/larray.LArray.drop.rst#larray.LArray.drop) for more details and examples." + "See [drop](../_generated/larray.Array.drop.rst#larray.Array.drop) for more details and examples." ] }, { @@ -368,7 +368,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [combine_axes](../_generated/larray.LArray.combine_axes.rst#larray.LArray.combine_axes) and [split_axes](../_generated/larray.LArray.split_axes.rst#larray.LArray.split_axes) for more details and examples." + "See [combine_axes](../_generated/larray.Array.combine_axes.rst#larray.Array.combine_axes) and [split_axes](../_generated/larray.Array.split_axes.rst#larray.Array.split_axes) for more details and examples." ] }, { @@ -413,7 +413,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [reindex](../_generated/larray.LArray.reindex.rst#larray.LArray.reindex) for more details and examples." + "See [reindex](../_generated/larray.Array.reindex.rst#larray.Array.reindex) for more details and examples." ] }, { @@ -441,7 +441,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert) for more details and examples." + "See [insert](../_generated/larray.Array.insert.rst#larray.Array.insert) for more details and examples." ] }, { @@ -451,9 +451,9 @@ "## Sorting\n", "\n", "\n", - "- [sort_axes](../_generated/larray.LArray.sort_axes.rst#larray.LArray.sort_axes): sort the labels of an axis.\n", - "- [labelsofsorted](../_generated/larray.LArray.labelsofsorted.rst#larray.LArray.labelsofsorted): give labels which would sort an axis. \n", - "- [sort_values](../_generated/larray.LArray.sort_values.rst#larray.LArray.sort_values): sort axes according to values" + "- [sort_axes](../_generated/larray.Array.sort_axes.rst#larray.Array.sort_axes): sort the labels of an axis.\n", + "- [labelsofsorted](../_generated/larray.Array.labelsofsorted.rst#larray.Array.labelsofsorted): give labels which would sort an axis. \n", + "- [sort_values](../_generated/larray.Array.sort_values.rst#larray.Array.sort_values): sort axes according to values" ] }, { @@ -523,12 +523,12 @@ "source": [ "## Combining arrays\n", "\n", - "LArray offers several methods to combine arrays:\n", + "The LArray library offers several methods and functions to combine arrays:\n", "\n", - "- [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert): inserts an array in another array along an axis\n", - "- [append](../_generated/larray.LArray.append.rst#larray.LArray.append): adds an array at the end of an axis.\n", - "- [prepend](../_generated/larray.LArray.prepend.rst#larray.LArray.prepend): adds an array at the beginning of an axis.\n", - "- [extend](../_generated/larray.LArray.extend.rst#larray.LArray.extend): extends an array along an axis.\n", + "- [insert](../_generated/larray.Array.insert.rst#larray.Array.insert): inserts an array in another array along an axis\n", + "- [append](../_generated/larray.Array.append.rst#larray.Array.append): adds an array at the end of an axis.\n", + "- [prepend](../_generated/larray.Array.prepend.rst#larray.Array.prepend): adds an array at the beginning of an axis.\n", + "- [extend](../_generated/larray.Array.extend.rst#larray.Array.extend): extends an array along an axis.\n", "- [stack](../_generated/larray.stack.rst#larray.stack): combines several arrays along an axis.\n" ] }, @@ -556,7 +556,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [insert](../_generated/larray.LArray.insert.rst#larray.LArray.insert) for more details and examples." + "See [insert](../_generated/larray.Array.insert.rst#larray.Array.insert) for more details and examples." ] }, { @@ -592,7 +592,7 @@ "metadata": {}, "outputs": [], "source": [ - "pop_lux = LArray([-1, 1], pop.gender)\n", + "pop_lux = Array([-1, 1], pop.gender)\n", "pop_lux" ] }, @@ -610,7 +610,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [append](../_generated/larray.LArray.append.rst#larray.LArray.append) for more details and examples." + "See [append](../_generated/larray.Array.append.rst#larray.Array.append) for more details and examples." ] }, { @@ -637,7 +637,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [prepend](../_generated/larray.LArray.prepend.rst#larray.LArray.prepend) for more details and examples." + "See [prepend](../_generated/larray.Array.prepend.rst#larray.Array.prepend) for more details and examples." ] }, { @@ -663,7 +663,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [extend](../_generated/larray.LArray.extend.rst#larray.LArray.extend) for more details and examples." + "See [extend](../_generated/larray.Array.extend.rst#larray.Array.extend) for more details and examples." ] }, { @@ -767,7 +767,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See [align](../_generated/larray.LArray.align.rst#larray.LArray.align) for more details and examples." + "See [align](../_generated/larray.Array.align.rst#larray.Array.align) for more details and examples." ] } ], diff --git a/larray/__init__.py b/larray/__init__.py index ab65cf548..d8e7a40df 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -5,8 +5,8 @@ from larray.core.axis import Axis, AxisCollection, X from larray.core.group import Group, LGroup, LSet, IGroup, union -from larray.core.array import (LArray, zeros, zeros_like, ones, ones_like, empty, empty_like, full, - full_like, sequence, labels_array, ndtest, aslarray, identity, diag, +from larray.core.array import (Array, zeros, zeros_like, ones, ones_like, empty, empty_like, full, + full_like, sequence, labels_array, ndtest, asarray, identity, diag, eye, all, any, sum, prod, cumsum, cumprod, min, max, mean, ptp, var, std, median, percentile, stack, zip_array_values, zip_array_items) from larray.core.session import Session, local_arrays, global_arrays, arrays @@ -53,8 +53,8 @@ # group 'Group', 'LGroup', 'LSet', 'IGroup', 'union', # array - 'LArray', 'zeros', 'zeros_like', 'ones', 'ones_like', 'empty', 'empty_like', 'full', - 'full_like', 'sequence', 'labels_array', 'ndtest', 'aslarray', 'identity', 'diag', 'eye', + 'Array', 'zeros', 'zeros_like', 'ones', 'ones_like', 'empty', 'empty_like', 'full', + 'full_like', 'sequence', 'labels_array', 'ndtest', 'asarray', 'identity', 'diag', 'eye', 'all', 'any', 'sum', 'prod', 'cumsum', 'cumprod', 'min', 'max', 'mean', 'ptp', 'var', 'std', 'median', 'percentile', 'stack', # session @@ -93,8 +93,8 @@ from larray.core.axis import x from larray.core.group import PGroup -from larray.core.array import (create_sequential, ndrange, larray_equal, larray_nan_equal, - nan_equal, element_equal) +from larray.core.array import (LArray, aslarray, create_sequential, ndrange, larray_equal, + larray_nan_equal, nan_equal, element_equal) _deprecated = [ @@ -103,7 +103,9 @@ # group 'PGroup', # array - 'create_sequential', 'ndrange', 'larray_equal', 'larray_nan_equal', 'nan_equal', 'element_equal', + 'LArray', 'aslarray', + 'create_sequential', 'ndrange', + 'larray_equal', 'larray_nan_equal', 'nan_equal', 'element_equal', ] __all__ += _deprecated diff --git a/larray/core/abstractbases.py b/larray/core/abstractbases.py index d5d766673..599e74975 100644 --- a/larray/core/abstractbases.py +++ b/larray/core/abstractbases.py @@ -14,5 +14,5 @@ class ABCAxisReference(ABCAxis): __metaclass__ = ABCMeta -class ABCLArray(object): +class ABCArray(object): __metaclass__ = ABCMeta diff --git a/larray/core/array.py b/larray/core/array.py index 39a4ac7c8..982e5ab61 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -2,7 +2,7 @@ from __future__ import absolute_import, division, print_function """ -Matrix class +Array class """ # ? implement multi group in one axis getitem: lipro['P01,P02;P05'] <=> (lipro['P01,P02'], lipro['P05']) @@ -53,7 +53,7 @@ except ImportError: np_nanprod = None -from larray.core.abstractbases import ABCLArray +from larray.core.abstractbases import ABCArray from larray.core.constants import nan from larray.core.metadata import Metadata from larray.core.expr import ExprNode @@ -73,9 +73,9 @@ def all(values, axis=None): See Also -------- - LArray.all + Array.all """ - if isinstance(values, LArray): + if isinstance(values, Array): return values.all(axis) else: return builtins.all(values) @@ -87,9 +87,9 @@ def any(values, axis=None): See Also -------- - LArray.any + Array.any """ - if isinstance(values, LArray): + if isinstance(values, Array): return values.any(axis) else: return builtins.any(values) @@ -102,13 +102,13 @@ def sum(array, *args, **kwargs): See Also -------- - LArray.sum + Array.sum """ # XXX: we might want to be more aggressive here (more types to convert), however, generators should still be # computed via the builtin. if isinstance(array, (np.ndarray, list)): - array = LArray(array) - if isinstance(array, LArray): + array = Array(array) + if isinstance(array, Array): return array.sum(*args, **kwargs) else: return builtins.sum(array, *args, **kwargs) @@ -120,7 +120,7 @@ def prod(array, *args, **kwargs): See Also -------- - LArray.prod + Array.prod """ return array.prod(*args, **kwargs) @@ -131,7 +131,7 @@ def cumsum(array, *args, **kwargs): See Also -------- - LArray.cumsum + Array.cumsum """ return array.cumsum(*args, **kwargs) @@ -142,7 +142,7 @@ def cumprod(array, *args, **kwargs): See Also -------- - LArray.cumprod + Array.cumprod """ return array.cumprod(*args, **kwargs) @@ -153,9 +153,9 @@ def min(array, *args, **kwargs): See Also -------- - LArray.min + Array.min """ - if isinstance(array, LArray): + if isinstance(array, Array): return array.min(*args, **kwargs) else: return builtins.min(array, *args, **kwargs) @@ -167,9 +167,9 @@ def max(array, *args, **kwargs): See Also -------- - LArray.max + Array.max """ - if isinstance(array, LArray): + if isinstance(array, Array): return array.max(*args, **kwargs) else: return builtins.max(array, *args, **kwargs) @@ -181,7 +181,7 @@ def mean(array, *args, **kwargs): See Also -------- - LArray.mean + Array.mean """ return array.mean(*args, **kwargs) @@ -192,7 +192,7 @@ def median(array, *args, **kwargs): See Also -------- - LArray.median + Array.median """ return array.median(*args, **kwargs) @@ -203,7 +203,7 @@ def percentile(array, *args, **kwargs): See Also -------- - LArray.percentile + Array.percentile """ return array.percentile(*args, **kwargs) @@ -215,7 +215,7 @@ def ptp(array, *args, **kwargs): See Also -------- - LArray.ptp + Array.ptp """ return array.ptp(*args, **kwargs) @@ -226,7 +226,7 @@ def var(array, *args, **kwargs): See Also -------- - LArray.var + Array.var """ return array.var(*args, **kwargs) @@ -237,7 +237,7 @@ def std(array, *args, **kwargs): See Also -------- - LArray.std + Array.std """ return array.std(*args, **kwargs) @@ -247,7 +247,7 @@ def concat(arrays, axis=0, dtype=None): Parameters ---------- - arrays : tuple of LArray + arrays : tuple of Array Arrays to concatenate. axis : axis reference (int, str or Axis), optional Axis along which to concatenate. All arrays must have that axis. Defaults to the first axis. @@ -256,7 +256,7 @@ def concat(arrays, axis=0, dtype=None): Returns ------- - LArray + Array Examples -------- @@ -310,36 +310,36 @@ def concat(arrays, axis=0, dtype=None): if PY2: - class LArrayIterator(object): + class ArrayIterator(object): __slots__ = ('next',) def __init__(self, array): data_iter = iter(array.data) next_data_func = data_iter.next res_axes = array.axes[1:] - # this case should not happen (handled by the fastpath in LArray.__iter__) + # this case should not happen (handled by the fastpath in Array.__iter__) assert len(res_axes) > 0 def next_func(): - return LArray(next_data_func(), res_axes) + return Array(next_data_func(), res_axes) self.next = next_func def __iter__(self): return self else: - class LArrayIterator(object): + class ArrayIterator(object): __slots__ = ('__next__',) def __init__(self, array): data_iter = iter(array.data) next_data_func = data_iter.__next__ res_axes = array.axes[1:] - # this case should not happen (handled by the fastpath in LArray.__iter__) + # this case should not happen (handled by the fastpath in Array.__iter__) assert len(res_axes) > 0 def next_func(): - return LArray(next_data_func(), res_axes) + return Array(next_data_func(), res_axes) self.__next__ = next_func @@ -347,8 +347,8 @@ def __iter__(self): return self -# TODO: rename to LArrayIndexIndexer or something like that -class LArrayPositionalIndexer(object): +# TODO: rename to ArrayIndexIndexer or something like that +class ArrayPositionalIndexer(object): """ equivalent to numpy indexing when indexing along a single axis *but* indexes the cross product of multiple axes instead of points @@ -405,10 +405,10 @@ def __iter__(self): if array.ndim <= 1: return iter(array.data) else: - return LArrayIterator(array) + return ArrayIterator(array) -class LArrayPointsIndexer(object): +class ArrayPointsIndexer(object): __slots__ = ('array',) def __init__(self, array): @@ -420,7 +420,7 @@ def _prepare_key(self, key, wildcard=False): # the key we need to know which axis each key belongs to and to do that, we need to # translate the key to indices) translated_key = axes._translated_key(key) - # 2) transform keys to IGroup and non-LArray advanced keys to LArray with a combined axis + # 2) transform keys to IGroup and non-Array advanced keys to Array with a combined axis return axes._adv_keys_to_combined_axis_la_keys(translated_key, wildcard) def __getitem__(self, key): @@ -436,7 +436,7 @@ def __setitem__(self, key, value): # >>> arr.iflat[:4] # a_b a0_b0 a0_b1 a0_b2 a1_b0 # 0 10 20 30 -class LArrayFlatIndicesIndexer(object): +class ArrayFlatIndicesIndexer(object): r""" Access the array by index as if it was flat (one dimensional) and all its axes were combined. @@ -469,9 +469,9 @@ class LArrayFlatIndicesIndexer(object): a0 42 10 20 a1 30 40 42 - When the key is an LArray, the result will have the axes of the key + When the key is an Array, the result will have the axes of the key - >>> key = LArray([0, 3], 'c=c0,c1') + >>> key = Array([0, 3], 'c=c0,c1') >>> key c c0 c1 0 3 @@ -485,7 +485,7 @@ def __init__(self, array): self.array = array def __getitem__(self, flat_key, sep='_'): - if isinstance(flat_key, ABCLArray): + if isinstance(flat_key, ABCArray): flat_np_key = flat_key.data res_axes = flat_key.axes else: @@ -494,14 +494,14 @@ def __getitem__(self, flat_key, sep='_'): nd_key = np.unravel_index(flat_np_key, axes.shape) # the following lines are equivalent to (but faster than) "return array.ipoints[nd_key]" - # TODO: extract a function which only computes the combined axes because we do not use the actual LArrays + # TODO: extract a function which only computes the combined axes because we do not use the actual Arrays # produced here, which is wasteful. AxisCollection._flat_lookup seems related (but not usable as-is). la_key = axes._adv_keys_to_combined_axis_la_keys(nd_key, sep=sep) first_axis_key_axes = la_key[0].axes - assert all(isinstance(axis_key, ABCLArray) and axis_key.axes is first_axis_key_axes + assert all(isinstance(axis_key, ABCArray) and axis_key.axes is first_axis_key_axes for axis_key in la_key[1:]) res_axes = first_axis_key_axes - return LArray(self.array.data.flat[flat_np_key], res_axes) + return Array(self.array.data.flat[flat_np_key], res_axes) def __setitem__(self, flat_key, value): # np.ndarray.flat is a flatiter object but it is indexable despite the name @@ -511,8 +511,8 @@ def __len__(self): return self.array.size -# TODO: rename to LArrayIndexPointsIndexer or something like that -class LArrayPositionalPointsIndexer(object): +# TODO: rename to ArrayIndexPointsIndexer or something like that +class ArrayPositionalPointsIndexer(object): __slots__ = ('array',) """ the closest to numpy indexing we get, but not 100% the same. @@ -528,7 +528,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): # we still need to prepare the key instead of letting numpy handle everything so that - # existing (integer)LArray keys are broadcasted correctly (using axes names). + # existing (integer)Array keys are broadcasted correctly (using axes names). self.array.__setitem__(self._prepare_key(key, wildcard=True), value, translate_key=False) @@ -538,15 +538,15 @@ def get_axis(obj, i): Parameters ---------- - obj : LArray or other array - Input LArray or any array object which has a shape attribute (NumPy or Pandas array). + obj : Array or other array + Input Array or any array object which has a shape attribute (NumPy or Pandas array). i : int index of the axis. Returns ------- Axis - Axis corresponding to the given index if input `obj` is a LArray. A new anonymous Axis with the length of + Axis corresponding to the given index if input `obj` is an Array. A new anonymous Axis with the length of the ith dimension of the input `obj` otherwise. Examples @@ -564,7 +564,7 @@ def get_axis(obj, i): >>> get_axis(np_arr, 1) Axis(2, None) """ - return obj.axes[i] if isinstance(obj, LArray) else Axis(obj.shape[i]) + return obj.axes[i] if isinstance(obj, Array) else Axis(obj.shape[i]) _arg_agg = { @@ -578,7 +578,7 @@ def get_axis(obj, i): dtype : dtype, optional The data type of the returned array. Defaults to None (the dtype of the input array)."""}, 'out': {'value': None, 'doc': """ - out : LArray, optional + out : Array, optional Alternate output array in which to place the result. It must have the same shape as the expected output and its type is preserved (e.g., if dtype(out) is float, the result will consist of 0.0’s and 1.0’s). Axes and labels can be different, only the shape matters. Defaults to None (create a new array)."""}, @@ -671,7 +671,7 @@ def element_equal(a1, a2, rtol=0, atol=0, nan_equals=False): import warnings warnings.warn("element_equal() is deprecated. Use array1.eq(array2, rtol, atol, nan_equals) instead.", FutureWarning, stacklevel=2) - a1 = aslarray(a1) + a1 = asarray(a1) return a1.eq(a2, rtol, atol, nan_equals) @@ -700,11 +700,11 @@ def _handle_meta(meta, title): return Metadata(meta) -class LArray(ABCLArray): +class Array(ABCArray): r""" - A LArray object represents a multidimensional, homogeneous array of fixed-size items with labeled axes. + An Array object represents a multidimensional, homogeneous array of fixed-size items with labeled axes. - The function :func:`aslarray` can be used to convert a NumPy array or Pandas DataFrame into a LArray. + The function :func:`asarray` can be used to convert a NumPy array or Pandas DataFrame into an Array. Parameters ---------- @@ -731,12 +731,12 @@ class LArray(ABCLArray): See Also -------- - sequence : Create a LArray by sequentially applying modifications to the array along axis. - ndtest : Create a test LArray with increasing elements. - zeros : Create a LArray, each element of which is zero. - ones : Create a LArray, each element of which is 1. - full : Create a LArray filled with a given value. - empty : Create a LArray, but leave its allocated memory unchanged (i.e., it contains “garbage”). + sequence : Create an Array by sequentially applying modifications to the array along axis. + ndtest : Create a test Array with increasing elements. + zeros : Create an Array, each element of which is zero. + ones : Create an Array, each element of which is 1. + full : Create an Array filled with a given value. + empty : Create an Array, but leave its allocated memory unchanged (i.e., it contains “garbage”). Warnings -------- @@ -753,7 +753,7 @@ class LArray(ABCLArray): >>> axes = [age, sex, time] >>> data = np.zeros((len(axes), len(sex), len(time))) - >>> LArray(data, axes) + >>> Array(data, axes) age sex\time 2007 2008 2009 10 M 0.0 0.0 0.0 10 F 0.0 0.0 0.0 @@ -762,9 +762,9 @@ class LArray(ABCLArray): 12 M 0.0 0.0 0.0 12 F 0.0 0.0 0.0 >>> # with metadata (Python <= 3.5) - >>> arr = LArray(data, axes, meta=[('title', 'my title'), ('author', 'John Smith')]) + >>> arr = Array(data, axes, meta=[('title', 'my title'), ('author', 'John Smith')]) >>> # with metadata (Python 3.6+) - >>> arr = LArray(data, axes, meta=Metadata(title='my title', author='John Smith')) # doctest: +SKIP + >>> arr = Array(data, axes, meta=Metadata(title='my title', author='John Smith')) # doctest: +SKIP Array creation functions @@ -955,7 +955,7 @@ def set_axes(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs) Returns ------- - LArray + Array Array with axes replaced. See Also @@ -1012,7 +1012,7 @@ def set_axes(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs) self.axes = new_axes return self else: - return LArray(self.data, new_axes) + return Array(self.data, new_axes) with_axes = renamed_to(set_axes, 'with_axes') @@ -1063,7 +1063,7 @@ def i(self): a1 b0 12 14 a1 b1 16 18 """ - return LArrayPositionalIndexer(self) + return ArrayPositionalIndexer(self) @lazy_attribute def points(self): @@ -1099,7 +1099,7 @@ def points(self): ... IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (2,) (2,) (3,) """ - return LArrayPointsIndexer(self) + return ArrayPointsIndexer(self) # TODO: show that we need to use a "full slice" for leaving the dimension alone # TODO: document explicitly that axes should be in the correct order and missing axes should be slice None @@ -1135,11 +1135,11 @@ def ipoints(self): ... IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (2,) (2,) (3,) """ - return LArrayPositionalPointsIndexer(self) + return ArrayPositionalPointsIndexer(self) def to_frame(self, fold_last_axis_name=False, dropna=None): r""" - Converts LArray into Pandas DataFrame. + Converts an Array into a Pandas DataFrame. Parameters ---------- @@ -1211,7 +1211,7 @@ def to_frame(self, fold_last_axis_name=False, dropna=None): def to_series(self, name=None, dropna=False): r""" - Converts LArray into Pandas Series. + Converts an Array into a Pandas Series. Parameters ---------- @@ -1303,15 +1303,15 @@ def describe(self, *args, **kwargs): Returns ------- - LArray + Array See Also -------- - LArray.describe_by + Array.describe_by Examples -------- - >>> arr = LArray([0, 6, 2, 5, 4, 3, 1, 3], 'year=2013..2020') + >>> arr = Array([0, 6, 2, 5, 4, 3, 1, 3], 'year=2013..2020') >>> arr year 2013 2014 2015 2016 2017 2018 2019 2020 0 6 2 5 4 3 1 3 @@ -1354,16 +1354,16 @@ def describe_by(self, *args, **kwargs): Returns ------- - LArray + Array See Also -------- - LArray.describe + Array.describe Examples -------- >>> data = [[0, 6, 3, 5, 4, 2, 1, 3], [7, 5, 3, 2, 8, 5, 6, 4]] - >>> arr = LArray(data, 'gender=Male,Female;year=2013..2020').astype(float) + >>> arr = Array(data, 'gender=Male,Female;year=2013..2020').astype(float) >>> arr gender\year 2013 2014 2015 2016 2017 2018 2019 2020 Male 0.0 6.0 3.0 5.0 4.0 2.0 1.0 3.0 @@ -1411,7 +1411,7 @@ def __array_wrap__(self, out_arr, context=None): cases. """ data = np.ndarray.__array_wrap__(self.data, out_arr, context) - return LArray(data, self.axes) + return Array(data, self.axes) def __bool__(self): return bool(self.data) @@ -1434,7 +1434,7 @@ def rename(self, renames=None, to=None, inplace=False, **kwargs): Returns ------- - LArray + Array Array with axes renamed. See Also @@ -1472,7 +1472,7 @@ def rename(self, renames=None, to=None, inplace=False, **kwargs): self.axes = axes return self else: - return LArray(self.data, axes) + return Array(self.data, axes) def reindex(self, axes_to_reindex=None, new_axis=None, fill_value=nan, inplace=False, **kwargs): r"""Reorder and/or add new labels in axes. @@ -1488,7 +1488,7 @@ def reindex(self, axes_to_reindex=None, new_axis=None, fill_value=nan, inplace=F If a list of Axis or an AxisCollection is given, existing axes are reindexed while missing ones are added. new_axis : int, str, list/tuple/array of str, Group or Axis, optional List of new labels or new axis if `axes_to_reindex` contains a single axis reference. - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which were not present before reindexing. Defaults to NaN. inplace : bool, optional @@ -1499,7 +1499,7 @@ def reindex(self, axes_to_reindex=None, new_axis=None, fill_value=nan, inplace=F Returns ------- - LArray + Array Array with reindexed axes. Notes @@ -1642,7 +1642,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None): Parameters ---------- - other : LArray-like + other : Array-like join : {'outer', 'inner', 'left', 'right', 'exact'}, optional Join method. For each axis common to both arrays: - outer: will use a label if it is in either arrays axis (ordered like the first array). @@ -1651,7 +1651,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None): - left: will use the first array axis labels. - right: will use the other array axis labels. - exact: instead of aligning, raise an error when axes to be aligned are not equal. - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not common to both arrays. Defaults to NaN. axes : AxisReference or sequence of them, optional @@ -1660,7 +1660,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None): Returns ------- - (left, right) : (LArray, LArray) + (left, right) : (Array, Array) Aligned objects Notes @@ -1810,10 +1810,10 @@ def align(self, other, join='outer', fill_value=nan, axes=None): ValueError: Both arrays are not aligned because align method with join='exact' expected Axis(['a0', 'a1'], 'a') to be equal to Axis(['a0', 'a1', 'a2'], 'a') """ - other = aslarray(other) + other = asarray(other) # reindex does not currently support anonymous axes if any(name is None for name in self.axes.names) or any(name is None for name in other.axes.names): - raise ValueError("arrays with anonymous axes are currently not supported by LArray.align") + raise ValueError("arrays with anonymous axes are currently not supported by Array.align") try: left_axes, right_axes = self.axes.align(other.axes, join=join, axes=axes) except ValueError as e: @@ -1839,21 +1839,21 @@ def sort_values(self, key=None, axis=None, ascending=True): Returns ------- - LArray + Array Array with sorted values. Examples -------- sort the whole array (no key or axis given) - >>> arr_1D = LArray([10, 2, 4], 'a=a0..a2') + >>> arr_1D = Array([10, 2, 4], 'a=a0..a2') >>> arr_1D a a0 a1 a2 10 2 4 >>> arr_1D.sort_values() a a1 a2 a0 2 4 10 - >>> arr_2D = LArray([[10, 2, 4], [3, 7, 1]], 'a=a0,a1; b=b0..b2') + >>> arr_2D = Array([[10, 2, 4], [3, 7, 1]], 'a=a0,a1; b=b0..b2') >>> arr_2D a\b b0 b1 b2 a0 10 2 4 @@ -1874,7 +1874,7 @@ def sort_values(self, key=None, axis=None, ascending=True): a\b b1 b0 b2 a0 2 10 4 a1 7 3 1 - >>> arr_3D = LArray([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]], + >>> arr_3D = Array([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]], ... 'a=a0,a1; b=b0,b1; c=c0..c2') >>> arr_3D a b\c c0 c1 c2 @@ -1916,7 +1916,7 @@ def sort_values(self, key=None, axis=None, ascending=True): axis_idx = self.axes.index(axis) data = np.sort(self.data, axis_idx) new_axes = self.axes.replace(axis_idx, Axis(len(axis), axis.name)) - res = LArray(data, new_axes) + res = Array(data, new_axes) elif key is not None: subset = self[key] if subset.ndim > 1: @@ -1956,7 +1956,7 @@ def sort_axes(self, axes=None, ascending=True): Returns ------- - LArray + Array Array with sorted axes. Examples @@ -2058,7 +2058,7 @@ def _translate_axis_key_chunk(self, axis_key): # otherwise we need to guess the axis # TODO: instead of checking all axes, we should have a big mapping - # (in AxisCollection or LArray): + # (in AxisCollection or Array): # label -> (axis, index) # but for Pandas, this wouldn't work, we'd need label -> axis valid_axes = [] @@ -2103,13 +2103,13 @@ def _translate_axis_key(self, axis_key): axis_key = axis_key.labels # TODO: do it for Group without axis too - if isinstance(axis_key, (tuple, list, np.ndarray, LArray)): + if isinstance(axis_key, (tuple, list, np.ndarray, Array)): axis = None # TODO: I should actually do some benchmarks to see if this is useful, and estimate which numbers to use # FIXME: check that size is < than key size for size in (1, 10, 100, 1000): # TODO: do not recheck already checked elements - key_chunk = axis_key.i[:size] if isinstance(axis_key, LArray) else axis_key[:size] + key_chunk = axis_key.i[:size] if isinstance(axis_key, Array) else axis_key[:size] try: tkey = self._translate_axis_key_chunk(key_chunk) axis = tkey.axis @@ -2136,7 +2136,7 @@ def __getitem__(self, key, collapse_slices=False, translate_key=True): translate_key) res_data = data[raw_broadcasted_key] if res_axes: - res = LArray(res_data, res_axes) + res = Array(res_data, res_axes) # if some axes have been moved in front because of advanced indexing, we transpose them back to their # original position return res.transpose(transpose_indices) if transpose_indices is not None else res @@ -2150,7 +2150,7 @@ def __setitem__(self, key, value, collapse_slices=True, translate_key=True): # data = np.asarray(expanded.data) data = self.data raw_broadcasted_key, target_axes, _ = self.axes._key_to_raw_and_axes(key, collapse_slices, translate_key) - if isinstance(value, LArray): + if isinstance(value, Array): # TODO: the check_compatible should be included in broadcast_with value = value.broadcast_with(target_axes) value.axes.check_compatible(target_axes) @@ -2167,8 +2167,8 @@ def __setitem__(self, key, value, collapse_slices=True, translate_key=True): data[raw_broadcasted_key] = value # concerning keys this can make sense in several cases: - # single bool LArray key with extra axes. - # tuple of bool LArray keys (eg one for each axis). each could have extra axes. Common axes between keys are + # single bool Array key with extra axes. + # tuple of bool Array keys (eg one for each axis). each could have extra axes. Common axes between keys are # not a problem, we can simply "and" them. Though we should avoid explicitly "and"ing them if there is no # common axis because that is less efficient than the implicit "and" that is done by numpy __getitem__ (and # the fact we need to combine dimensions when any key has more than 1 dim). @@ -2186,7 +2186,7 @@ def set(self, value, **kwargs): Parameters ---------- - value : scalar or LArray + value : scalar or Array Examples -------- @@ -2226,7 +2226,7 @@ def reshape(self, target_axes): Returns ------- - LArray + Array New array with new axes but same data. Examples @@ -2259,7 +2259,7 @@ def reshape(self, target_axes): if not isinstance(target_axes, AxisCollection): target_axes = AxisCollection(target_axes) data = np.asarray(self).reshape(target_axes.shape) - return LArray(data, target_axes) + return Array(data, target_axes) # TODO: this should be a private method def reshape_like(self, target): @@ -2269,7 +2269,7 @@ def reshape_like(self, target): See Also -------- - reshape : returns a LArray with a new shape given a list of axes. + reshape : returns an Array with a new shape given a list of axes. Examples -------- @@ -2303,13 +2303,13 @@ def broadcast_with(self, target): Parameters ---------- - target : LArray or collection of Axis + target : Array or collection of Axis Returns ------- - LArray + Array """ - if isinstance(target, LArray): + if isinstance(target, Array): target_axes = target.axes else: target_axes = target @@ -2353,7 +2353,7 @@ def ignore_labels(self, axes=None): Returns ------- - LArray + Array Notes ----- @@ -2409,14 +2409,14 @@ def ignore_labels(self, axes=None): else: axes = self.axes[axes] res_axes = self.axes.replace([(axis, axis.ignore_labels()) for axis in axes]) - return LArray(self.data, res_axes) + return Array(self.data, res_axes) drop_labels = renamed_to(ignore_labels, 'drop_labels') def __str__(self): if not self.ndim: return str(np.asscalar(self)) elif not len(self): - return 'LArray([])' + return 'Array([])' else: table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS]) return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1, @@ -2428,20 +2428,20 @@ def __iter__(self): if self.ndim <= 1: return iter(self.data) else: - return LArrayIterator(self) + return ArrayIterator(self) def __contains__(self, key): return any(key in axis for axis in self.axes) def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name='value'): r""" - Deprecated. Please use LArray.dump() instead. + Deprecated. Please use Array.dump() instead. """ - warnings.warn("LArray.as_table() is deprecated. Please use LArray.dump() instead.", FutureWarning, + warnings.warn("Array.as_table() is deprecated. Please use Array.dump() instead.", FutureWarning, stacklevel=2) return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name) - # XXX: dump as a 2D LArray with row & col dims? + # XXX: dump as a 2D Array with row & col dims? def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is', maxlines=-1, edgeitems=5): r""" @@ -2621,14 +2621,14 @@ def _axis_aggregate(self, op, axes=(), keepaxes=False, out=None, **kwargs): An aggregate function with this signature: func(a, axis=None, dtype=None, out=None, keepdims=False) axes : tuple of axes, optional Each axis can be an Axis object, str or int. - out : LArray, optional + out : Array, optional Alternative output array in which to place the result. It must have the same shape as the expected output. keepaxes : bool or scalar, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. Returns ------- - LArray or scalar + Array or scalar """ src_data = np.asarray(self) axes = self.axes[list(axes)] if axes else self.axes @@ -2641,7 +2641,7 @@ def _axis_aggregate(self, op, axes=(), keepaxes=False, out=None, **kwargs): else: kwargs['keepdims'] = bool(keepaxes) if out is not None: - assert isinstance(out, LArray) + assert isinstance(out, Array) kwargs['out'] = out.data res_data = op(src_data, axis=axes_indices, **kwargs) if keepaxes: @@ -2652,10 +2652,10 @@ def _axis_aggregate(self, op, axes=(), keepaxes=False, out=None, **kwargs): else: res_axes = self.axes - axes if not res_axes: - # scalars don't need to be wrapped in LArray + # scalars don't need to be wrapped in Array return res_data else: - return LArray(res_data, res_axes) + return Array(res_data, res_axes) def _cum_aggregate(self, op, axis): r""" @@ -2664,8 +2664,8 @@ def _cum_aggregate(self, op, axis): time. """ # TODO: accept a single group in axis, to filter & aggregate in one shot - return LArray(op(np.asarray(self), axis=self.axes.index(axis)), - self.axes) + return Array(op(np.asarray(self), axis=self.axes.index(axis)), + self.axes) # TODO: now that items is never a (k, v), it should be renamed to # something else: args? (groups would be misleading because each "item" can contain several groups) @@ -2754,7 +2754,7 @@ def _group_aggregate(self, op, items, keepaxes=False, out=None, **kwargs): res_axes[axis_idx] = Axis(groups, axis.name) if isinstance(res_data, np.ndarray): - res = LArray(res_data, res_axes) + res = Array(res_data, res_axes) else: res = res_data return res @@ -2877,7 +2877,7 @@ def with_total(self, *args, **kwargs): Returns ------- - LArray + Array Examples -------- @@ -2975,7 +2975,7 @@ def labelofmin(self, axis=None): Returns ------- - LArray + Array Notes ----- @@ -2986,7 +2986,7 @@ def labelofmin(self, axis=None): -------- >>> nat = Axis('nat=BE,FR,IT') >>> sex = Axis('sex=M,F') - >>> arr = LArray([[0, 1], [3, 2], [2, 5]], [nat, sex]) + >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex]) >>> arr nat\sex M F BE 0 1 @@ -3001,7 +3001,7 @@ def labelofmin(self, axis=None): if axis is not None: axis, axis_idx = self.axes[axis], self.axes.index(axis) data = axis.labels[self.data.argmin(axis_idx)] - return LArray(data, self.axes - axis) + return Array(data, self.axes - axis) else: indices = np.unravel_index(self.data.argmin(), self.shape) return tuple(axis.labels[i] for i, axis in zip(indices, self.axes)) @@ -3018,7 +3018,7 @@ def indexofmin(self, axis=None): Returns ------- - LArray + Array Notes ----- @@ -3029,7 +3029,7 @@ def indexofmin(self, axis=None): -------- >>> nat = Axis('nat=BE,FR,IT') >>> sex = Axis('sex=M,F') - >>> arr = LArray([[0, 1], [3, 2], [2, 5]], [nat, sex]) + >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex]) >>> arr nat\sex M F BE 0 1 @@ -3043,7 +3043,7 @@ def indexofmin(self, axis=None): """ if axis is not None: axis, axis_idx = self.axes[axis], self.axes.index(axis) - return LArray(self.data.argmin(axis_idx), self.axes - axis) + return Array(self.data.argmin(axis_idx), self.axes - axis) else: return np.unravel_index(self.data.argmin(), self.shape) @@ -3059,7 +3059,7 @@ def labelofmax(self, axis=None): Returns ------- - LArray + Array Notes ----- @@ -3070,7 +3070,7 @@ def labelofmax(self, axis=None): -------- >>> nat = Axis('nat=BE,FR,IT') >>> sex = Axis('sex=M,F') - >>> arr = LArray([[0, 1], [3, 2], [2, 5]], [nat, sex]) + >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex]) >>> arr nat\sex M F BE 0 1 @@ -3085,7 +3085,7 @@ def labelofmax(self, axis=None): if axis is not None: axis, axis_idx = self.axes[axis], self.axes.index(axis) data = axis.labels[self.data.argmax(axis_idx)] - return LArray(data, self.axes - axis) + return Array(data, self.axes - axis) else: indices = np.unravel_index(self.data.argmax(), self.shape) return tuple(axis.labels[i] for i, axis in zip(indices, self.axes)) @@ -3102,7 +3102,7 @@ def indexofmax(self, axis=None): Returns ------- - LArray + Array Notes ----- @@ -3113,7 +3113,7 @@ def indexofmax(self, axis=None): -------- >>> nat = Axis('nat=BE,FR,IT') >>> sex = Axis('sex=M,F') - >>> arr = LArray([[0, 1], [3, 2], [2, 5]], [nat, sex]) + >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex]) >>> arr nat\sex M F BE 0 1 @@ -3127,7 +3127,7 @@ def indexofmax(self, axis=None): """ if axis is not None: axis, axis_idx = self.axes[axis], self.axes.index(axis) - return LArray(self.data.argmax(axis_idx), self.axes - axis) + return Array(self.data.argmax(axis_idx), self.axes - axis) else: return np.unravel_index(self.data.argmax(), self.shape) @@ -3150,11 +3150,11 @@ def labelsofsorted(self, axis=None, ascending=True, kind='quicksort'): Returns ------- - LArray + Array Examples -------- - >>> arr = LArray([[0, 1], [3, 2], [2, 5]], "nat=BE,FR,IT; sex=M,F") + >>> arr = Array([[0, 1], [3, 2], [2, 5]], "nat=BE,FR,IT; sex=M,F") >>> arr nat\sex M F BE 0 1 @@ -3177,7 +3177,7 @@ def labelsofsorted(self, axis=None, ascending=True, kind='quicksort'): axis = self.axes[0] axis = self.axes[axis] pos = self.indicesofsorted(axis, ascending=ascending, kind=kind) - return LArray(axis.labels[pos.data], pos.axes) + return Array(axis.labels[pos.data], pos.axes) argsort = renamed_to(labelsofsorted, 'argsort') @@ -3198,11 +3198,11 @@ def indicesofsorted(self, axis=None, ascending=True, kind='quicksort'): Returns ------- - LArray + Array Examples -------- - >>> arr = LArray([[1, 5], [3, 2], [0, 4]], "nat=BE,FR,IT; sex=M,F") + >>> arr = Array([[1, 5], [3, 2], [0, 4]], "nat=BE,FR,IT; sex=M,F") >>> arr nat\sex M F BE 1 5 @@ -3230,7 +3230,7 @@ def indicesofsorted(self, axis=None, ascending=True, kind='quicksort'): for i in range(self.ndim)) data = data[reverser] new_axis = Axis(np.arange(len(axis)), axis.name) - return LArray(data, self.axes.replace(axis, new_axis)) + return Array(data, self.axes.replace(axis, new_axis)) posargsort = renamed_to(indicesofsorted, 'posargsort') @@ -3492,12 +3492,12 @@ def items(self, axes=None, ascending=True): @lazy_attribute def iflat(self): - return LArrayFlatIndicesIndexer(self) - iflat.__doc__ = LArrayFlatIndicesIndexer.__doc__ + return ArrayFlatIndicesIndexer(self) + iflat.__doc__ = ArrayFlatIndicesIndexer.__doc__ def copy(self): r"""Returns a copy of the array. """ - return LArray(self.data.copy(), axes=self.axes[:], meta=self.meta) + return Array(self.data.copy(), axes=self.axes[:], meta=self.meta) # XXX: we might want to implement this using .groupby().first() def unique(self, axes=None, sort=False, sep='_'): @@ -3515,12 +3515,12 @@ def unique(self, axes=None, sort=False, sep='_'): Returns ------- - LArray + Array array with unique values Examples -------- - >>> arr = LArray([[0, 2, 0, 0], + >>> arr = Array([[0, 2, 0, 0], ... [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3') >>> arr a\b b0 b1 b2 b3 @@ -3578,7 +3578,7 @@ def unique(self, axes=None, sort=False, sep='_'): first_axis_idx = self.axes.index(axes[0]) # XXX: use combine_axes(axes).items() instead? for labels, value in self.items(axes): - hashable_value = value.data.tobytes() if isinstance(value, LArray) else value + hashable_value = value.data.tobytes() if isinstance(value, Array) else value if hashable_value not in seen: list_append((sep_join(str(l) for l in labels), value)) seen_add(hashable_value) @@ -3595,7 +3595,7 @@ def unique(self, axes=None, sort=False, sep='_'): @property def info(self): - r"""Describes a LArray (metadata + shape and labels for each axis). + r"""Describes an Array (metadata + shape and labels for each axis). Returns ------- @@ -3604,7 +3604,7 @@ def info(self): Examples -------- - >>> mat0 = LArray([[2.0, 5.0], [8.0, 6.0]], "nat=BE,FO; sex=F,M") + >>> mat0 = Array([[2.0, 5.0], [8.0, 6.0]], "nat=BE,FO; sex=F,M") >>> mat0.info 2 x 2 nat [2]: 'BE' 'FO' @@ -3635,14 +3635,14 @@ def ratio(self, *axes): Returns ------- - LArray + Array array / array.sum(axes) Examples -------- >>> nat = Axis('nat=BE,FO') >>> sex = Axis('sex=M,F') - >>> a = LArray([[4, 6], [2, 8]], [nat, sex]) + >>> a = Array([[4, 6], [2, 8]], [nat, sex]) >>> a nat\sex M F BE 4 6 @@ -3718,7 +3718,7 @@ def ratio(self, *axes): return self / self.sum(*axes) def rationot0(self, *axes): - r"""Returns a LArray with values array / array.sum(axes) where the sum is not 0, 0 otherwise. + r"""Returns an Array with values array / array.sum(axes) where the sum is not 0, 0 otherwise. Parameters ---------- @@ -3726,14 +3726,14 @@ def rationot0(self, *axes): Returns ------- - LArray + Array array / array.sum(axes) Examples -------- >>> a = Axis('a=a0,a1') >>> b = Axis('b=b0,b1,b2') - >>> arr = LArray([[6, 0, 2], + >>> arr = Array([[6, 0, 2], ... [4, 0, 8]], [a, b]) >>> arr a\b b0 b1 b2 @@ -3768,14 +3768,14 @@ def percent(self, *axes): Returns ------- - LArray + Array array / array.sum(axes) * 100 Examples -------- >>> nat = Axis('nat=BE,FO') >>> sex = Axis('sex=M,F') - >>> a = LArray([[4, 6], [2, 8]], [nat, sex]) + >>> a = Array([[4, 6], [2, 8]], [nat, sex]) >>> a nat\sex M F BE 4 6 @@ -3825,11 +3825,11 @@ def all(self, *args, **kwargs): Returns ------- - LArray of bool or bool + Array of bool or bool See Also -------- - LArray.all_by, LArray.any, LArray.any_by + Array.all_by, Array.any, Array.any_by Examples -------- @@ -3896,11 +3896,11 @@ def all_by(self, *args, **kwargs): Returns ------- - LArray of bool or bool + Array of bool or bool See Also -------- - LArray.all, LArray.any, LArray.any_by + Array.all, Array.any, Array.any_by Examples -------- @@ -3964,11 +3964,11 @@ def any(self, *args, **kwargs): Returns ------- - LArray of bool or bool + Array of bool or bool See Also -------- - LArray.any_by, LArray.all, LArray.all_by + Array.any_by, Array.all, Array.all_by Examples -------- @@ -4035,11 +4035,11 @@ def any_by(self, *args, **kwargs): Returns ------- - LArray of bool or bool + Array of bool or bool See Also -------- - LArray.any, LArray.all, LArray.all_by + Array.any, Array.all, Array.all_by Examples -------- @@ -4105,12 +4105,12 @@ def sum(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.sum_by, LArray.prod, LArray.prod_by, - LArray.cumsum, LArray.cumprod + Array.sum_by, Array.prod, Array.prod_by, + Array.cumsum, Array.cumprod Examples -------- @@ -4170,12 +4170,12 @@ def sum_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.sum, LArray.prod, LArray.prod_by, - LArray.cumsum, LArray.cumprod + Array.sum, Array.prod, Array.prod_by, + Array.cumsum, Array.cumprod Examples -------- @@ -4233,12 +4233,12 @@ def prod(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.prod_by, LArray.sum, LArray.sum_by, - LArray.cumsum, LArray.cumprod + Array.prod_by, Array.sum, Array.sum_by, + Array.cumsum, Array.cumprod Examples -------- @@ -4299,12 +4299,12 @@ def prod_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.prod, LArray.sum, LArray.sum_by, - LArray.cumsum, LArray.cumprod + Array.prod, Array.sum, Array.sum_by, + Array.cumsum, Array.cumprod Examples -------- @@ -4361,11 +4361,11 @@ def min(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.min_by, LArray.max, LArray.max_by + Array.min_by, Array.max, Array.max_by Examples -------- @@ -4425,11 +4425,11 @@ def min_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.min, LArray.max, LArray.max_by + Array.min, Array.max, Array.max_by Examples -------- @@ -4486,11 +4486,11 @@ def max(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.max_by, LArray.min, LArray.min_by + Array.max_by, Array.min, Array.min_by Examples -------- @@ -4550,11 +4550,11 @@ def max_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.max, LArray.min, LArray.min_by + Array.max, Array.min, Array.min_by Examples -------- @@ -4611,13 +4611,13 @@ def mean(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.mean_by, LArray.median, LArray.median_by, - LArray.var, LArray.var_by, LArray.std, LArray.std_by, - LArray.percentile, LArray.percentile_by + Array.mean_by, Array.median, Array.median_by, + Array.var, Array.var_by, Array.std, Array.std_by, + Array.percentile, Array.percentile_by Examples -------- @@ -4677,13 +4677,13 @@ def mean_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.mean, LArray.median, LArray.median_by, - LArray.var, LArray.var_by, LArray.std, LArray.std_by, - LArray.percentile, LArray.percentile_by + Array.mean, Array.median, Array.median_by, + Array.var, Array.var_by, Array.std, Array.std_by, + Array.percentile, Array.percentile_by Examples -------- @@ -4740,13 +4740,13 @@ def median(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.median_by, LArray.mean, LArray.mean_by, - LArray.var, LArray.var_by, LArray.std, LArray.std_by, - LArray.percentile, LArray.percentile_by + Array.median_by, Array.mean, Array.mean_by, + Array.var, Array.var_by, Array.std, Array.std_by, + Array.percentile, Array.percentile_by Examples -------- @@ -4810,13 +4810,13 @@ def median_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.median, LArray.mean, LArray.mean_by, - LArray.var, LArray.var_by, LArray.std, LArray.std_by, - LArray.percentile, LArray.percentile_by + Array.median, Array.mean, Array.mean_by, + Array.var, Array.var_by, Array.std, Array.std_by, + Array.percentile, Array.percentile_by Examples -------- @@ -4881,13 +4881,13 @@ def percentile(self, q, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.percentile_by, LArray.mean, LArray.mean_by, - LArray.median, LArray.median_by, LArray.var, LArray.var_by, - LArray.std, LArray.std_by + Array.percentile_by, Array.mean, Array.mean_by, + Array.median, Array.median_by, Array.var, Array.var_by, + Array.std, Array.std_by Examples -------- @@ -4969,13 +4969,13 @@ def percentile_by(self, q, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.percentile, LArray.mean, LArray.mean_by, - LArray.median, LArray.median_by, LArray.var, LArray.var_by, - LArray.std, LArray.std_by + Array.percentile, Array.mean, Array.mean_by, + Array.median, Array.median_by, Array.var, Array.var_by, + Array.std, Array.std_by Examples -------- @@ -5057,7 +5057,7 @@ def ptp(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar Examples -------- @@ -5122,13 +5122,13 @@ def var(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.var_by, LArray.std, LArray.std_by, - LArray.mean, LArray.mean_by, LArray.median, LArray.median_by, - LArray.percentile, LArray.percentile_by + Array.var_by, Array.std, Array.std_by, + Array.mean, Array.mean_by, Array.median, Array.median_by, + Array.percentile, Array.percentile_by Examples -------- @@ -5186,13 +5186,13 @@ def var_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.var, LArray.std, LArray.std_by, - LArray.mean, LArray.mean_by, LArray.median, LArray.median_by, - LArray.percentile, LArray.percentile_by + Array.var, Array.std, Array.std_by, + Array.mean, Array.mean_by, Array.median, Array.median_by, + Array.percentile, Array.percentile_by Examples -------- @@ -5250,13 +5250,13 @@ def std(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.std_by, LArray.var, LArray.var_by, - LArray.mean, LArray.mean_by, LArray.median, LArray.median_by, - LArray.percentile, LArray.percentile_by + Array.std_by, Array.var, Array.var_by, + Array.mean, Array.mean_by, Array.median, Array.median_by, + Array.percentile, Array.percentile_by Examples -------- @@ -5315,13 +5315,13 @@ def std_by(self, *args, **kwargs): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.std_by, LArray.var, LArray.var_by, - LArray.mean, LArray.mean_by, LArray.median, LArray.median_by, - LArray.percentile, LArray.percentile_by + Array.std_by, Array.var, Array.var_by, + Array.mean, Array.mean_by, Array.median, Array.median_by, + Array.percentile, Array.percentile_by Examples -------- @@ -5381,12 +5381,12 @@ def cumsum(self, axis=-1): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.cumprod, LArray.sum, LArray.sum_by, - LArray.prod, LArray.prod_by + Array.cumprod, Array.sum, Array.sum_by, + Array.prod, Array.prod_by Notes ----- @@ -5429,12 +5429,12 @@ def cumprod(self, axis=-1): Returns ------- - LArray or scalar + Array or scalar See Also -------- - LArray.cumsum, LArray.sum, LArray.sum_by, - LArray.prod, LArray.prod_by + Array.cumsum, Array.sum, Array.sum_by, + Array.prod, Array.prod_by Notes ----- @@ -5482,18 +5482,18 @@ def opmethod(self, other): if isinstance(other, Group) and np.isscalar(other.key): other = other.eval() - # we could pass scalars through aslarray too but it is too costly performance-wise for only suppressing one + # we could pass scalars through asarray too but it is too costly performance-wise for only suppressing one # isscalar test and an if statement. # TODO: ndarray should probably be converted to larrays because that would harmonize broadcasting rules, but # it makes some tests fail for some reason. - if not isinstance(other, (LArray, np.ndarray)) and not np.isscalar(other): - other = aslarray(other) + if not isinstance(other, (Array, np.ndarray)) and not np.isscalar(other): + other = asarray(other) - if isinstance(other, LArray): + if isinstance(other, Array): # TODO: first test if it is not already broadcastable (self, other), res_axes = make_numpy_broadcastable([self, other]) other = other.data - return LArray(super_method(self.data, other), res_axes) + return Array(super_method(self.data, other), res_axes) opmethod.__name__ = fullname return opmethod @@ -5586,10 +5586,10 @@ def __matmul__(self, other): """ current = self[:] axes = self.axes - if not isinstance(other, (LArray, np.ndarray)): + if not isinstance(other, (Array, np.ndarray)): raise NotImplementedError("matrix multiplication not implemented for %s" % type(other)) if isinstance(other, np.ndarray): - other = LArray(other) + other = Array(other) other_axes = other.axes combined_axes = axes[:-2] + other_axes[:-2] @@ -5614,14 +5614,14 @@ def __matmul__(self, other): if other.ndim > 1: res_axes += [other_axes[-1].copy()] if res_axes: - return LArray(res_data, res_axes) + return Array(res_data, res_axes) else: return res_data def __rmatmul__(self, other): if isinstance(other, np.ndarray): - other = LArray(other) - if not isinstance(other, LArray): + other = Array(other) + if not isinstance(other, Array): raise NotImplementedError("matrix multiplication not implemented for %s" % type(other)) return other.__matmul__(self) @@ -5631,7 +5631,7 @@ def _unaryop(opname): super_method = getattr(np.ndarray, fullname) def opmethod(self): - return LArray(super_method(self.data), self.axes) + return Array(super_method(self.data), self.axes) opmethod.__name__ = fullname return opmethod @@ -5661,8 +5661,8 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False): Parameters ---------- - other : LArray-like - Input array. aslarray() is used on a non-LArray input. + other : Array-like + Input array. asarray() is used on a non-Array input. rtol : float or int, optional The relative tolerance parameter (see Notes). Defaults to 0. atol : float or int, optional @@ -5684,7 +5684,7 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False): See Also -------- - LArray.eq + Array.eq Notes ----- @@ -5715,11 +5715,11 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False): Test equality between two arrays within a given tolerance range. Return True if absolute(array1 - array2) <= (atol + rtol * absolute(array2)). - >>> arr1 = LArray([6., 8.], "a=a0,a1") + >>> arr1 = Array([6., 8.], "a=a0,a1") >>> arr1 a a0 a1 6.0 8.0 - >>> arr2 = LArray([5.999, 8.001], "a=a0,a1") + >>> arr2 = Array([5.999, 8.001], "a=a0,a1") >>> arr2 a a0 a1 5.999 8.001 @@ -5777,7 +5777,7 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False): False """ try: - other = aslarray(other) + other = asarray(other) except Exception: return False try: @@ -5793,8 +5793,8 @@ def eq(self, other, rtol=0, atol=0, nans_equal=False): Parameters ---------- - other : LArray-like - Input array. aslarray() is used on a non-LArray input. + other : Array-like + Input array. asarray() is used on a non-Array input. rtol : float or int, optional The relative tolerance parameter (see Notes). Defaults to 0. atol : float or int, optional @@ -5807,14 +5807,14 @@ def eq(self, other, rtol=0, atol=0, nans_equal=False): Returns ------- - LArray + Array Boolean array where each cell tells whether corresponding elements of self and other are equal within a tolerance range if given. If nans_equal=True, corresponding elements with NaN values will be considered as equal. See Also -------- - LArray.equals + Array.equals Notes ----- @@ -5827,7 +5827,7 @@ def eq(self, other, rtol=0, atol=0, nans_equal=False): Examples -------- - >>> arr1 = LArray([6., np.nan, 8.], "a=a0..a2") + >>> arr1 = Array([6., np.nan, 8.], "a=a0..a2") >>> arr1 a a0 a1 a2 6.0 nan 8.0 @@ -5841,7 +5841,7 @@ def eq(self, other, rtol=0, atol=0, nans_equal=False): Test equality between two arrays within a given tolerance range. Return True if absolute(array1 - array2) <= (atol + rtol * absolute(array2)). - >>> arr2 = LArray([5.999, np.nan, 8.001], "a=a0..a2") + >>> arr2 = Array([5.999, np.nan, 8.001], "a=a0..a2") >>> arr2 a a0 a1 a2 5.999 nan 8.001 @@ -5855,7 +5855,7 @@ def eq(self, other, rtol=0, atol=0, nans_equal=False): a a0 a1 a2 True True True """ - other = aslarray(other) + other = asarray(other) if rtol == 0 and atol == 0: if not nans_equal: @@ -5867,14 +5867,14 @@ def general_isnan(a): if np.issubclass_(a.dtype.type, np.inexact): return isnan(a) elif a.dtype.type is np.object_: - return LArray(obj_isnan(a), a.axes) + return Array(obj_isnan(a), a.axes) else: return False return (self == other) | (general_isnan(self) & general_isnan(other)) else: (a1, a2), res_axes = make_numpy_broadcastable([self, other]) - return LArray(np.isclose(a1.data, a2.data, rtol=rtol, atol=atol, equal_nan=nans_equal), res_axes) + return Array(np.isclose(a1.data, a2.data, rtol=rtol, atol=atol, equal_nan=nans_equal), res_axes) def isin(self, test_values, assume_unique=False, invert=False): r""" @@ -5895,7 +5895,7 @@ def isin(self, test_values, assume_unique=False, invert=False): Returns ------- - LArray + Array boolean array of the same shape as this array that is True where the array element is in `test_values` and False otherwise. @@ -5916,19 +5916,19 @@ def isin(self, test_values, assume_unique=False, invert=False): """ if isinstance(test_values, set): test_values = list(test_values) - return LArray(np.isin(self.data, test_values, assume_unique=assume_unique, invert=invert), self.axes) + return Array(np.isin(self.data, test_values, assume_unique=assume_unique, invert=invert), self.axes) def divnot0(self, other): r"""Divides array by other, but returns 0.0 where other is 0. Parameters ---------- - other : scalar or LArray + other : scalar or Array What to divide by. Returns ------- - LArray + Array Array divided by other, 0.0 where other is 0 Examples @@ -5968,7 +5968,7 @@ def divnot0(self, other): otherdata = np.where(other_eq0, 1, otherdata) res_data = self.data / otherdata res_data[np.broadcast_to(other_eq0, res_data.shape)] = 0.0 - return LArray(res_data, res_axes) + return Array(res_data, res_axes) # XXX: rename/change to "add_axes" ? # TODO: add a flag copy=True to force a new array. @@ -5976,7 +5976,7 @@ def expand(self, target_axes=None, out=None, readonly=False): r"""Expands array to target_axes. Target axes will be added to array if not present. - In most cases this function is not needed because LArray can do operations with arrays having different + In most cases this function is not needed because Array can do operations with arrays having different (compatible) axes. Parameters @@ -5984,7 +5984,7 @@ def expand(self, target_axes=None, out=None, readonly=False): target_axes : string, list of Axis or AxisCollection, optional Self can contain axes not present in `target_axes`. The result axes will be: [self.axes not in target_axes] + target_axes - out : LArray, optional + out : Array, optional Output array, must have more axes than array. Defaults to a new array. arr.expand(out=out) is equivalent to out[:] = arr readonly : bool, optional @@ -5992,7 +5992,7 @@ def expand(self, target_axes=None, out=None, readonly=False): Returns ------- - LArray + Array Original array if possible (and out is None). Examples @@ -6054,7 +6054,7 @@ def expand(self, target_axes=None, out=None, readonly=False): if readonly: # requires numpy 1.10 - return LArray(np.broadcast_to(broadcasted, target_axes.shape), target_axes) + return Array(np.broadcast_to(broadcasted, target_axes.shape), target_axes) out = empty(target_axes, dtype=self.dtype) out[:] = broadcasted @@ -6069,14 +6069,14 @@ def append(self, axis, value, label=None): ---------- axis : axis reference Axis along which to append `value`. - value : scalar or LArray + value : scalar or Array Scalar or array with compatible axes. label : scalar, optional Label for the new item in axis Returns ------- - LArray + Array Array expanded with `value` along `axis`. Examples @@ -6120,14 +6120,14 @@ def prepend(self, axis, value, label=None): ---------- axis : axis reference Axis along which to prepend input array (`value`) - value : scalar or LArray + value : scalar or Array Scalar or array with compatible axes. label : str, optional Label for the new item in axis Returns ------- - LArray + Array Array expanded with 'value' at the start of 'axis'. Examples @@ -6170,12 +6170,12 @@ def extend(self, axis, other): ---------- axis : axis Axis along which to extend with input array (`other`) - other : LArray + other : Array Array with compatible axes Returns ------- - LArray + Array Array expanded with 'other' along 'axis'. Examples @@ -6218,8 +6218,8 @@ def insert(self, value, before=None, after=None, pos=None, axis=None, label=None Parameters ---------- - value : scalar or LArray - Value to insert. If an LArray, it must have compatible axes. If value already has the axis along which it + value : scalar or Array + Value to insert. If an Array, it must have compatible axes. If value already has the axis along which it is inserted, `label` should not be used. before : scalar or Group Label or group before which to insert `value`. @@ -6230,7 +6230,7 @@ def insert(self, value, before=None, after=None, pos=None, axis=None, label=None Returns ------- - LArray + Array Array with `value` inserted along `axis`. The dtype of the returned array will be the "closest" type which can hold both the array values and the inserted values without loss of information. For example, when mixing numeric and string types, the dtype will be object. @@ -6364,9 +6364,9 @@ def insert(self, value, before=None, after=None, pos=None, axis=None, label=None # >>> arr1.insert([dict(value=8, before='b1', label='b0.5'), # dict(value=9, before='b2', label='b1.5')]) - # It would be nice to somehow support easily inserting values defined using an LArray + # It would be nice to somehow support easily inserting values defined using an Array - # >>> toinsert = LArray([[8, 'b1', 'b0.5'], + # >>> toinsert = Array([[8, 'b1', 'b0.5'], # >>> [9, 'b2', 'b1.5']], "row=2;column=value,before,label") # >>> arr1.insert(toinsert) # >>> arr1.insert(value=toinsert['value'], before=toinsert['before'], label=toinsert['label']) @@ -6390,7 +6390,7 @@ def insert(self, value, before=None, after=None, pos=None, axis=None, label=None before_pos = axis.index(after) + 1 def length(v): - if isinstance(v, LArray) and axis in v.axes: + if isinstance(v, Array) and axis in v.axes: return len(v.axes[axis]) else: return len(v) if isinstance(v, (tuple, list, np.ndarray)) else 1 @@ -6401,12 +6401,12 @@ def expand(v, length): num_inserts = max(length(before_pos), length(label), length(value)) stops = expand(before_pos, num_inserts) - if isinstance(value, LArray) and axis in value.axes: + if isinstance(value, Array) and axis in value.axes: # FIXME: when length(before_pos) == 1 and length(label) == 1, this is inefficient values = [value[[k]] for k in value.axes[axis]] else: values = expand(value, num_inserts) - values = [aslarray(v) if not isinstance(v, LArray) else v + values = [asarray(v) if not isinstance(v, Array) else v for v in values] if label is not None: @@ -6432,7 +6432,7 @@ def drop(self, labels=None): Returns ------- - LArray + Array Array with `labels` removed along their axis. Examples @@ -6497,7 +6497,7 @@ def drop(self, labels=None): axis_idx = self.axes.index(axis) new_axis = Axis(np.delete(axis.labels, indices), axis.name) new_axes = self.axes.replace(axis, new_axis) - return LArray(np.delete(self.data, indices, axis_idx), new_axes) + return Array(np.delete(self.data, indices, axis_idx), new_axes) def transpose(self, *args): r"""Reorder axes. @@ -6512,8 +6512,8 @@ def transpose(self, *args): Returns ------- - LArray - LArray with reordered axes. + Array + Array with reordered axes. Examples -------- @@ -6562,7 +6562,7 @@ def transpose(self, *args): indices_present = set(axes_indices) missing_indices = [i for i in range(len(self.axes)) if i not in indices_present] axes_indices = axes_indices + missing_indices - return LArray(self.data.transpose(axes_indices), self.axes[axes_indices]) + return Array(self.data.transpose(axes_indices), self.axes[axes_indices]) T = property(transpose) def clip(self, minval=None, maxval=None, out=None): @@ -6580,12 +6580,12 @@ def clip(self, minval=None, maxval=None, out=None): maxval : scalar or array-like, optional Maximum value. If None, clipping is not performed on upper bound. Defaults to None. - out : LArray, optional + out : Array, optional The results will be placed in this array. Returns ------- - LArray + Array An array with the elements of the current array, but where values < `minval` are replaced with `minval`, and those > `maxval` with `maxval`. @@ -6756,12 +6756,12 @@ def to_stata(self, filepath_or_buffer, **kwargs): Notes ----- - The round trip to Stata (LArray.to_stata followed by read_stata) loose the name of the "column" axis. + The round trip to Stata (Array.to_stata followed by read_stata) loose the name of the "column" axis. Examples -------- >>> axes = [Axis(3, 'row'), Axis('column=country,sex')] # doctest: +SKIP - >>> arr = LArray([['BE', 'F'], + >>> arr = Array([['BE', 'F'], ... ['FR', 'M'], ... ['FR', 'F']], axes=axes) # doctest: +SKIP >>> arr # doctest: +SKIP @@ -7076,7 +7076,7 @@ def ndim(self): Returns ------- int - Number of dimensions of a LArray. + Number of dimensions of an Array. Examples -------- @@ -7189,7 +7189,7 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs): Returns ------- - LArray + Array Array with modified labels. See Also @@ -7265,10 +7265,10 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs): self.axes = axes return self else: - return LArray(self.data, axes) + return Array(self.data, axes) def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True): - return LArray(self.data.astype(dtype, order, casting, subok, copy), self.axes) + return Array(self.data.astype(dtype, order, casting, subok, copy), self.axes) astype.__doc__ = np.ndarray.astype.__doc__ def shift(self, axis, n=1): @@ -7283,11 +7283,11 @@ def shift(self, axis, n=1): Returns ------- - LArray + Array See Also -------- - LArray.roll : cells which are pushed "outside of the axis" are reintroduced on the opposite side of the axis + Array.roll : cells which are pushed "outside of the axis" are reintroduced on the opposite side of the axis instead of being dropped. Examples @@ -7322,17 +7322,17 @@ def roll(self, axis=None, n=1): ---------- axis : int, str or Axis, optional Axis along which to roll. Defaults to None (all axes). - n : int or LArray, optional + n : int or Array, optional Number of positions to roll. Defaults to 1. Use a negative integers to roll left. - If n is an LArray the number of positions rolled can vary along the axes of n. + If n is an Array the number of positions rolled can vary along the axes of n. Returns ------- - LArray + Array See Also -------- - LArray.shift : cells which are pushed "outside of the axis" are dropped instead of being reintroduced on the + Array.shift : cells which are pushed "outside of the axis" are dropped instead of being reintroduced on the opposite side of the axis. Examples @@ -7361,12 +7361,12 @@ def roll(self, axis=None, n=1): """ if isinstance(n, (int, np.integer)): axis_idx = None if axis is None else self.axes.index(axis) - return LArray(np.roll(self.data, n, axis=axis_idx), self.axes) + return Array(np.roll(self.data, n, axis=axis_idx), self.axes) else: - if not isinstance(n, LArray): - raise TypeError("n should either be an integer or an LArray") + if not isinstance(n, Array): + raise TypeError("n should either be an integer or an Array") if axis is None: - raise TypeError("axis may not be None if n is an LArray") + raise TypeError("axis may not be None if n is an Array") axis = self.axes[axis] seq = sequence(axis) return self[axis.i[(seq - n) % len(axis)]] @@ -7393,7 +7393,7 @@ def diff(self, axis=-1, d=1, n=1, label='upper'): Returns ------- - LArray + Array The n-th order differences. The shape of the output is the same as `a` except for `axis` which is smaller by `n` * `d`. @@ -7456,12 +7456,12 @@ def growth_rate(self, axis=-1, d=1, label='upper'): Returns ------- - LArray + Array Examples -------- >>> data = [[2, 4, 5, 4, 6], [4, 6, 3, 6, 9]] - >>> a = LArray(data, "sex=M,F; year=2016..2020") + >>> a = Array(data, "sex=M,F; year=2016..2020") >>> a sex\year 2016 2017 2018 2019 2020 M 2 4 5 4 6 @@ -7500,12 +7500,12 @@ def compact(self): Returns ------- - LArray or scalar + Array or scalar Array with constant axes removed. Examples -------- - >>> a = LArray([[1, 2], + >>> a = Array([[1, 2], ... [1, 2]], [Axis('sex=M,F'), Axis('nat=BE,FO')]) >>> a sex\nat BE FO @@ -7538,7 +7538,7 @@ def combine_axes(self, axes=None, sep='_', wildcard=False): Returns ------- - LArray + Array Array with combined axes. Examples @@ -7647,13 +7647,13 @@ def split_axes(self, axes=None, sep='_', names=None, regex=None, sort=False, fil sort : bool, optional Whether or not to sort the combined axis before splitting it. When all combinations of labels are present in the combined axis, sorting is faster than not sorting. Defaults to False. - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value to use for missing values when the combined axis does not contain all combination of labels. Defaults to NaN. Returns ------- - LArray + Array Examples -------- @@ -7767,7 +7767,7 @@ def reverse(self, axes=None): Returns ------- - LArray + Array Array with passed `axes` reversed. Examples @@ -7829,7 +7829,7 @@ def apply(self, transform, *args, **kwargs): ---------- transform : function Function to apply. This function will be called in turn with each element of the array as the first - argument and must return an LArray, scalar or tuple. + argument and must return an Array, scalar or tuple. If returning arrays the axes of those arrays must be the same for all calls to the function. *args Extra arguments to pass to the function. @@ -7848,14 +7848,14 @@ def apply(self, transform, *args, **kwargs): Returns ------- - LArray or scalar, or tuple of them + Array or scalar, or tuple of them Axes will be the union of those in axis and those of values returned by the function. Examples -------- First let us define a test array - >>> arr = LArray([[0, 2, 1], + >>> arr = Array([[0, 2, 1], ... [3, 1, 5]], 'a=a0,a1;b=b0..b2') >>> arr a\b b0 b1 b2 @@ -7933,7 +7933,7 @@ def apply(self, transform, *args, **kwargs): if axes is not None: if by is not None: - raise ValueError("cannot specify both `by` and `axes` arguments in LArray.apply") + raise ValueError("cannot specify both `by` and `axes` arguments in Array.apply") by = self.axes - axes # XXX: we could go one step further than vectorize and support a array of callables which would be broadcasted @@ -7948,9 +7948,9 @@ def apply(self, transform, *args, **kwargs): raw_bcast_args, raw_bcast_kwargs, res_axes = ((self,) + args, kwargs, self.axes) res_data = vfunc(*raw_bcast_args, **raw_bcast_kwargs) if isinstance(res_data, tuple): - return tuple(LArray(res_arr, res_axes) for res_arr in res_data) + return tuple(Array(res_arr, res_axes) for res_arr in res_data) else: - return LArray(res_data, res_axes) + return Array(res_data, res_axes) else: by = self.axes[by] @@ -7990,19 +7990,19 @@ def apply_map(self, mapping, dtype=None): Returns ------- - LArray + Array Axes will be the same as the original array axes. Notes ----- - To apply a transformation given as an LArray (with current values as labels on one axis of + To apply a transformation given as an Array (with current values as labels on one axis of the array and desired values as the array values), you can use: ``mapping_arr[original_arr]``. Examples -------- First let us define a test array - >>> arr = LArray([[0, 2, 1], + >>> arr = Array([[0, 2, 1], ... [3, 1, 5]], 'a=a0,a1;b=b0..b2') >>> arr a\b b0 b1 b2 @@ -8027,12 +8027,18 @@ def transform(v): return self.apply(transform, dtype=dtype) +class LArray(Array): + def __init__(self, *args, **kwargs): + warnings.warn("LArray has been renamed as Array.", FutureWarning, stacklevel=2) + Array.__init__(self, *args, **kwargs) + + def larray_equal(a1, a2): import warnings - msg = "larray_equal() is deprecated. Use LArray.equals() instead." + msg = "larray_equal() is deprecated. Use Array.equals() instead." warnings.warn(msg, FutureWarning, stacklevel=2) try: - a1 = aslarray(a1) + a1 = asarray(a1) except Exception: return False return a1.equals(a2) @@ -8040,36 +8046,36 @@ def larray_equal(a1, a2): def larray_nan_equal(a1, a2): import warnings - msg = "larray_nan_equal() is deprecated. Use LArray.equals() instead." + msg = "larray_nan_equal() is deprecated. Use Array.equals() instead." warnings.warn(msg, FutureWarning, stacklevel=2) try: - a1 = aslarray(a1) + a1 = asarray(a1) except Exception: return False return a1.equals(a2, nans_equal=True) -def aslarray(a, meta=None): +def asarray(a, meta=None): r""" - Converts input as LArray if possible. + Converts input as Array if possible. Parameters ---------- a : array-like - Input array to convert into a LArray. + Input array to convert into an Array. meta : list of pairs or dict or OrderedDict or Metadata, optional Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. Returns ------- - LArray + Array Examples -------- >>> # NumPy array >>> np_arr = np.arange(6).reshape((2,3)) - >>> aslarray(np_arr) + >>> asarray(np_arr) {0}*\{1}* 0 1 2 0 0 1 2 1 3 4 5 @@ -8077,13 +8083,13 @@ def aslarray(a, meta=None): >>> data = {'normal' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), ... 'reverse' : pd.Series([3., 2., 1.], index=['a', 'b', 'c'])} >>> df = pd.DataFrame(data) - >>> aslarray(df) + >>> asarray(df) {0}\{1} normal reverse a 1.0 3.0 b 2.0 2.0 c 3.0 1.0 """ - if isinstance(a, LArray): + if isinstance(a, Array): if meta is not None: res = a.copy() res.meta = meta @@ -8099,7 +8105,10 @@ def aslarray(a, meta=None): from larray.inout.pandas import from_frame return from_frame(a, meta=meta) else: - return LArray(a, meta=meta) + return Array(a, meta=meta) + + +aslarray = renamed_to(asarray, 'aslarray') def _check_axes_argument(func): @@ -8133,7 +8142,7 @@ def zeros(axes, title=None, dtype=float, order='C', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8156,7 +8165,7 @@ def zeros(axes, title=None, dtype=float, order='C', meta=None): # FIXME: the error message is wrong (stackdepth is wrong) because of _check_axes_argument meta = _handle_meta(meta, title) axes = AxisCollection(axes) - return LArray(np.zeros(axes.shape, dtype, order), axes, meta=meta) + return Array(np.zeros(axes.shape, dtype, order), axes, meta=meta) def zeros_like(array, title=None, dtype=None, order='K', meta=None): @@ -8164,7 +8173,7 @@ def zeros_like(array, title=None, dtype=None, order='K', meta=None): Parameters ---------- - array : LArray + array : Array Input array. title : str, optional Deprecated. See 'meta' below. @@ -8180,7 +8189,7 @@ def zeros_like(array, title=None, dtype=None, order='K', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8191,7 +8200,7 @@ def zeros_like(array, title=None, dtype=None, order='K', meta=None): a1 0 0 0 """ meta = _handle_meta(meta, title) - return LArray(np.zeros_like(array, dtype, order), array.axes, meta=meta) + return Array(np.zeros_like(array, dtype, order), array.axes, meta=meta) @_check_axes_argument @@ -8215,7 +8224,7 @@ def ones(axes, title=None, dtype=float, order='C', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8228,7 +8237,7 @@ def ones(axes, title=None, dtype=float, order='C', meta=None): """ meta = _handle_meta(meta, title) axes = AxisCollection(axes) - return LArray(np.ones(axes.shape, dtype, order), axes, meta=meta) + return Array(np.ones(axes.shape, dtype, order), axes, meta=meta) def ones_like(array, title=None, dtype=None, order='K', meta=None): @@ -8236,7 +8245,7 @@ def ones_like(array, title=None, dtype=None, order='K', meta=None): Parameters ---------- - array : LArray + array : Array Input array. title : str, optional Deprecated. See 'meta' below. @@ -8252,7 +8261,7 @@ def ones_like(array, title=None, dtype=None, order='K', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8264,7 +8273,7 @@ def ones_like(array, title=None, dtype=None, order='K', meta=None): """ meta = _handle_meta(meta, title) axes = array.axes - return LArray(np.ones_like(array, dtype, order), axes, meta=meta) + return Array(np.ones_like(array, dtype, order), axes, meta=meta) @_check_axes_argument @@ -8288,7 +8297,7 @@ def empty(axes, title=None, dtype=float, order='C', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8301,7 +8310,7 @@ def empty(axes, title=None, dtype=float, order='C', meta=None): """ meta = _handle_meta(meta, title) axes = AxisCollection(axes) - return LArray(np.empty(axes.shape, dtype, order), axes, meta=meta) + return Array(np.empty(axes.shape, dtype, order), axes, meta=meta) def empty_like(array, title=None, dtype=None, order='K', meta=None): @@ -8309,7 +8318,7 @@ def empty_like(array, title=None, dtype=None, order='K', meta=None): Parameters ---------- - array : LArray + array : Array Input array. title : str, optional Deprecated. See 'meta' below. @@ -8325,7 +8334,7 @@ def empty_like(array, title=None, dtype=None, order='K', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8338,7 +8347,7 @@ def empty_like(array, title=None, dtype=None, order='K', meta=None): """ meta = _handle_meta(meta, title) # cannot use empty() because order == 'K' is not understood - return LArray(np.empty_like(array.data, dtype, order), array.axes, meta=meta) + return Array(np.empty_like(array.data, dtype, order), array.axes, meta=meta) # We cannot use @_check_axes_argument here because an integer fill_value would be considered as an error @@ -8349,7 +8358,7 @@ def full(axes, fill_value, title=None, dtype=None, order='C', meta=None): ---------- axes : int, tuple of int, Axis or tuple/list/AxisCollection of Axis Collection of axes or a shape. - fill_value : scalar or LArray + fill_value : scalar or Array Value to fill the array title : str, optional Deprecated. See 'meta' below. @@ -8364,7 +8373,7 @@ def full(axes, fill_value, title=None, dtype=None, order='C', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8399,9 +8408,9 @@ def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None): Parameters ---------- - array : LArray + array : Array Input array. - fill_value : scalar or LArray + fill_value : scalar or Array Value to fill the array title : str, optional Deprecated. See 'meta' below. @@ -8417,7 +8426,7 @@ def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None): Returns ------- - LArray + Array Examples -------- @@ -8429,7 +8438,7 @@ def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None): """ meta = _handle_meta(meta, title) # cannot use full() because order == 'K' is not understood - # cannot use np.full_like() because it would not handle LArray fill_value + # cannot use np.full_like() because it would not handle Array fill_value res = empty_like(array, dtype=dtype, meta=meta) res[:] = fill_value return res @@ -8449,11 +8458,11 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None axis : axis definition (Axis, str, int) Axis along which to apply mod. An axis definition can be passed as a string. An int will be interpreted as the length for a new anonymous axis. - initial : scalar or LArray, optional + initial : scalar or Array, optional Value for the first label of axis. Defaults to 0. - inc : scalar, LArray, optional + inc : scalar, Array, optional Value to increment the previous value by. Defaults to 0 if mult is provided, 1 otherwise. - mult : scalar, LArray, optional + mult : scalar, Array, optional Value to multiply the previous value by. Defaults to 1. func : function/callable, optional Function to apply to the previous value. Defaults to None. @@ -8482,7 +8491,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None >>> sequence(year, 1.0, mult=1.5) year 2016 2017 2018 2019 1.0 1.5 2.25 3.375 - >>> inc = LArray([1, 2], [sex]) + >>> inc = Array([1, 2], [sex]) >>> inc sex M F 1 2 @@ -8490,7 +8499,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None sex\year 2016 2017 2018 2019 M 1.0 2.0 3.0 4.0 F 1.0 3.0 5.0 7.0 - >>> mult = LArray([2, 3], [sex]) + >>> mult = Array([2, 3], [sex]) >>> mult sex M F 2 3 @@ -8498,7 +8507,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None sex\year 2016 2017 2018 2019 M 1.0 2.0 4.0 8.0 F 1.0 3.0 9.0 27.0 - >>> initial = LArray([3, 4], [sex]) + >>> initial = Array([3, 4], [sex]) >>> initial sex M F 3 4 @@ -8529,7 +8538,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None sequence can be used as the inverse of growth_rate: - >>> a = LArray([1.0, 2.0, 3.0, 3.0], year) + >>> a = Array([1.0, 2.0, 3.0, 3.0], year) >>> a year 2016 2017 2018 2019 1.0 2.0 3.0 3.0 @@ -8553,7 +8562,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None # stop is not included stop = initial + inc * len(axis) data = np.arange(initial, stop, inc) - return LArray(data, axis, meta=meta) + return Array(data, axis, meta=meta) if axes is None: if not isinstance(axis, Axis): @@ -8572,7 +8581,7 @@ def strip_axes(col): res[axis.i[0]] = initial def has_axis(a, axis): - return isinstance(a, LArray) and axis in a.axes + return isinstance(a, Array) and axis in a.axes if func is not None: for i in range(1, len(axis)): res[axis.i[i]] = func(res[axis.i[i - 1]]) @@ -8603,7 +8612,7 @@ def has_axis(a, axis): # a[1:] = initial * cumprod(mult[1:]) + ... def index_if_exists(a, axis, i): - if isinstance(a, LArray) and axis in a.axes: + if isinstance(a, Array) and axis in a.axes: a_axis = a.axes[axis] return a[a_axis[axis.labels[i]]] else: @@ -8623,7 +8632,7 @@ def array_or_full(a, axis, initial): dt = common_type((a, initial)) r = empty((get_axes(a) - axis) | axis, dtype=dt) r[axis.i[0]] = initial - if isinstance(a, LArray) and axis in a.axes: + if isinstance(a, Array) and axis in a.axes: # not using axis.i[1:] because a could have less ticks # on axis than axis r[axis.i[1:]] = a[axis[axis.labels[1]:]] @@ -8631,7 +8640,7 @@ def array_or_full(a, axis, initial): r[axis.i[1:]] = a return r - if isinstance(initial, LArray) and np.isscalar(inc): + if isinstance(initial, Array) and np.isscalar(inc): inc = full_like(initial, inc) # inc only (integer scalar). Equivalent to fastpath above but with axes not None). @@ -8639,13 +8648,13 @@ def array_or_full(a, axis, initial): # stop is not included stop = initial + inc * len(axis) data = np.arange(initial, stop, inc) - res[:] = LArray(data, axis) + res[:] = Array(data, axis) # inc only (other scalar) elif np.isscalar(mult) and mult == 1 and np.isscalar(inc): # stop is included stop = initial + inc * (len(axis) - 1) data = np.linspace(initial, stop=stop, num=len(axis)) - res[:] = LArray(data, axis) + res[:] = Array(data, axis) # inc only (array) elif np.isscalar(mult) and mult == 1: inc_array = array_or_full(inc, axis, initial) @@ -8699,7 +8708,7 @@ def ndtest(shape_or_axes, start=0, label_start=0, title=None, dtype=int, meta=No Returns ------- - LArray + Array Examples -------- @@ -8755,7 +8764,7 @@ def ndtest(shape_or_axes, start=0, label_start=0, title=None, dtype=int, meta=No else: axes = AxisCollection(shape_or_axes) data = np.arange(start, start + axes.size, dtype=dtype).reshape(axes.shape) - return LArray(data, axes, meta=meta) + return Array(data, axes, meta=meta) def kth_diag_indices(shape, k): @@ -8778,7 +8787,7 @@ def diag(a, k=0, axes=(0, 1), ndim=2, split=True): Parameters ---------- - a : LArray + a : Array If `a` has 2 dimensions or more, return a copy of its `k`-th diagonal. If `a` has 1 dimension, return an array with `ndim` dimensions on the `k`-th diagonal. k : int, optional @@ -8793,7 +8802,7 @@ def diag(a, k=0, axes=(0, 1), ndim=2, split=True): Returns ------- - LArray + Array The extracted diagonal or constructed diagonal array. Examples @@ -8869,7 +8878,7 @@ def labels_array(axes, title=None, meta=None): Returns ------- - LArray + Array Examples -------- @@ -8902,7 +8911,7 @@ def labels_array(axes, title=None, meta=None): else: res_axes = axes res_data = axes[0].labels - return LArray(res_data, res_axes, meta=meta) + return Array(res_data, res_axes, meta=meta) def identity(axis): @@ -8933,7 +8942,7 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None): Returns ------- - LArray of shape (rows, columns) + Array of shape (rows, columns) An array where all elements are equal to zero, except for the k-th diagonal, whose values are equal to one. Examples @@ -8965,7 +8974,7 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None): axes = AxisCollection([rows, columns]) shape = axes.shape data = np.eye(shape[0], shape[1], k, dtype) - return LArray(data, axes, meta=meta) + return Array(data, axes, meta=meta) # XXX: we could change the syntax to use *args @@ -9123,7 +9132,7 @@ def stack(elements=None, axes=None, title=None, meta=None, dtype=None, res_axes= Returns ------- - LArray + Array A single array combining arrays. The new (stacked) axes will be the last axes of the new array. Examples @@ -9259,7 +9268,7 @@ def stack(elements=None, axes=None, title=None, meta=None, dtype=None, res_axes= elements = elements.items() - if isinstance(elements, LArray): + if isinstance(elements, Array): if axes is None: axes = -1 axes = elements.axes[axes] @@ -9313,7 +9322,7 @@ def stack_one(array_name): return Session([(array_name, stack_one(array_name)) for array_name in array_names], meta=meta) else: if res_axes is None or dtype is None: - values = [aslarray(v) if not np.isscalar(v) else v + values = [asarray(v) if not np.isscalar(v) else v for k, v in items] if res_axes is None: @@ -9330,7 +9339,7 @@ def stack_one(array_name): # or concat depending on whether or not the axis already exists. # this would be more convenient for users I think, but would mean one class of error we cannot # detect anymore: if a user unintentionally stacks an array with the axis already present. - # (this is very similar to the debate about combining LArray.append and LArray.extend) + # (this is very similar to the debate about combining Array.append and Array.extend) all_axes = [get_axes(v) for v in values] + [axes] res_axes = AxisCollection.union(*all_axes) if kludge: @@ -9356,7 +9365,7 @@ def stack_one(array_name): def get_axes(value): - return value.axes if isinstance(value, LArray) else AxisCollection([]) + return value.axes if isinstance(value, Array) else AxisCollection([]) def _strip_shape(shape): @@ -9373,7 +9382,7 @@ def _equal_modulo_len1(shape1, shape2): # but if we had assigned axes names from the start (without dropping them) this wouldn't be a problem. def make_numpy_broadcastable(values, min_axes=None): r""" - Returns values where LArrays are (NumPy) broadcastable between them. + Returns values where Arrays are (NumPy) broadcastable between them. For that to be possible, all common axes must be compatible (see Axis class documentation). Extra axes (in any array) can have any length. @@ -9407,7 +9416,7 @@ def make_numpy_broadcastable(values, min_axes=None): if not isinstance(min_axes, AxisCollection): min_axes = AxisCollection(min_axes) all_axes = min_axes | all_axes - return [v.broadcast_with(all_axes) if isinstance(v, LArray) else v + return [v.broadcast_with(all_axes) if isinstance(v, Array) else v for v in values], all_axes @@ -9416,7 +9425,7 @@ def raw_broadcastable(values, min_axes=None): same as make_numpy_broadcastable but returns numpy arrays """ arrays, res_axes = make_numpy_broadcastable(values, min_axes=min_axes) - raw = [a.data if isinstance(a, LArray) else a + raw = [a.data if isinstance(a, Array) else a for a in arrays] return raw, res_axes @@ -9502,7 +9511,7 @@ def zip_array_values(values, axes=None, ascending=True): arr1: 3, arr2: 3 """ def values_with_expand(value, axes, readonly=True, ascending=True): - if isinstance(value, LArray): + if isinstance(value, Array): # an Axis axis is not necessarily in array.axes expanded = value.expand(axes, readonly=readonly) return expanded.values(axes, ascending=ascending) diff --git a/larray/core/axis.py b/larray/core/axis.py index 0c11f0997..70e281512 100644 --- a/larray/core/axis.py +++ b/larray/core/axis.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray +from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCArray from larray.core.expr import ExprNode from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary, _range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups) @@ -461,7 +461,7 @@ def subaxis(self, key, name=None): Parameters ---------- - key : int, or collection (list, slice, array, LArray) of them + key : int, or collection (list, slice, array, Array) of them Indices of labels to use for the new axis. name : str, optional Name of the subaxis. Defaults to the name of the parent axis. @@ -470,7 +470,7 @@ def subaxis(self, key, name=None): ------- Axis Subaxis. If key is a None slice and name is None, the original Axis is returned. - If key is a LArray, the list of axes is returned. + If key is an Array, the list of axes is returned. Examples -------- @@ -484,7 +484,7 @@ def subaxis(self, key, name=None): # one because the original name is probably what users will want to use to filter if name is None: name = self.name - if isinstance(key, ABCLArray): + if isinstance(key, ABCArray): return key.axes # TODO: compute length for wildcard axes more efficiently labels = len(self.labels[key]) if self.iswildcard else self.labels[key] @@ -916,9 +916,9 @@ def index(self, key): return array_lookup2(_seq_group_to_name(key), self._sorted_keys, self._sorted_values) except KeyError: return array_lookup2(key, self._sorted_keys, self._sorted_values) - elif isinstance(key, ABCLArray): - from .array import LArray - return LArray(self.index(key.data), key.axes) + elif isinstance(key, ABCArray): + from .array import Array + return Array(self.index(key.data), key.axes) else: # the first mapping[key] above will cover most cases. # This code path is only used if the key was given in "non normalized form" @@ -1021,7 +1021,7 @@ def opmethod(self, other): def __larray__(self): r""" - Returns axis as LArray. + Returns axis as Array. """ from .array import labels_array return labels_array(self) @@ -1266,7 +1266,7 @@ def align(self, other, join='outer'): See Also -------- - LArray.align + Array.align Examples -------- @@ -2115,7 +2115,7 @@ def index(self, axis, compatible=False): if item.iscompatible(axis): return i else: - # We cannot use self._list.index because it use Axis.__eq__ which produces an LArray + # We cannot use self._list.index because it use Axis.__eq__ which produces an Array for i, item in enumerate(self._list): if item.equals(axis): return i @@ -2225,7 +2225,7 @@ def rename(self, renames=None, to=None, **kwargs): for a in self]) # XXX: what's the point in supporting a list of Axis or AxisCollection in axes_to_replace? - # it is used in LArray.set_axes but if it is only there, shouldn't the support for that be + # it is used in Array.set_axes but if it is only there, shouldn't the support for that be # moved there? def replace(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs): r"""Replace one, several or all axes of the collection. @@ -2338,7 +2338,7 @@ def _guess_axis(self, axis_key): return axis_key # TODO: instead of checking all axes, we should have a big mapping - # (in AxisCollection or LArray): + # (in AxisCollection or Array): # label -> (axis, index) # or possibly (for ambiguous labels) # label -> {axis: index} @@ -2636,7 +2636,7 @@ def _translate_axis_key(self, axis_key): """ # called from _key_to_igroups - from .array import LArray + from .array import Array # Need to convert string keys to groups otherwise command like # >>> ndtest((5, 5)).drop('1[a0]') @@ -2659,13 +2659,13 @@ def _translate_axis_key(self, axis_key): axis_key = axis_key.labels # TODO: do it for Group without axis too - if isinstance(axis_key, (tuple, list, np.ndarray, LArray)): + if isinstance(axis_key, (tuple, list, np.ndarray, Array)): axis = None # TODO: I should actually do some benchmarks to see if this is useful, and estimate which numbers to use # FIXME: check that size is < than key size for size in (1, 10, 100, 1000): # TODO: do not recheck already checked elements - key_chunk = axis_key.i[:size] if isinstance(axis_key, LArray) else axis_key[:size] + key_chunk = axis_key.i[:size] if isinstance(axis_key, Array) else axis_key[:size] try: tkey = self._translate_axis_key_chunk(key_chunk) axis = tkey.axis @@ -2694,7 +2694,7 @@ def _key_to_igroups(self, key): Parameters ---------- key : scalar, list/array of scalars, Group or tuple or dict of them - any key supported by LArray.__get|setitem__ + any key supported by Array.__get|setitem__ Returns ------- @@ -2706,7 +2706,7 @@ def _key_to_igroups(self, key): -------- Axis.index """ - from .array import LArray + from .array import Array if isinstance(key, dict): # key axes could be strings or axis references and we want real axes @@ -2725,14 +2725,14 @@ def _key_to_igroups(self, key): if axis_key.shape != self.shape: raise ValueError("boolean key with a different shape ({}) than array ({})" .format(axis_key.shape, self.shape)) - axis_key = LArray(axis_key, self) + axis_key = Array(axis_key, self) - if isinstance(axis_key, LArray) and np.issubdtype(axis_key.dtype, np.bool_): + if isinstance(axis_key, Array) and np.issubdtype(axis_key.dtype, np.bool_): extra_key_axes = axis_key.axes - self if extra_key_axes: raise ValueError("boolean subset key contains more axes ({}) than array ({})" .format(axis_key.axes, self)) - # nonzero (currently) returns a tuple of IGroups containing 1D LArrays (one IGroup per axis) + # nonzero (currently) returns a tuple of IGroups containing 1D Arrays (one IGroup per axis) nonboolkey.extend(axis_key.nonzero()) else: nonboolkey.append(axis_key) @@ -2749,19 +2749,19 @@ def _key_to_igroups(self, key): def _translated_key(self, key): """ - Transforms any key (from LArray.__get|setitem__) to a complete indices-based key. + Transforms any key (from Array.__get|setitem__) to a complete indices-based key. Parameters ---------- key : scalar, list/array of scalars, Group or tuple or dict of them - any key supported by LArray.__get|setitem__ + any key supported by Array.__get|setitem__ Returns ------- tuple len(tuple) == self.ndim - This key is not yet usable as is in a numpy array as it can still contain LArray parts and the advanced key + This key is not yet usable as is in a numpy array as it can still contain Array parts and the advanced key parts are not broadcasted together yet. """ # any key -> (IGroup, IGroup, ...) @@ -2788,13 +2788,13 @@ def _translated_key(self, key): def _key_to_raw_and_axes(self, key, collapse_slices=False, translate_key=True): r""" - Transforms any key (from LArray.__getitem__) to a raw numpy key, the resulting axes, and potentially a tuple + Transforms any key (from Array.__getitem__) to a raw numpy key, the resulting axes, and potentially a tuple of indices to transpose axes back to where they were. Parameters ---------- key : scalar, list/array of scalars, Group or tuple or dict of them - any key supported by LArray.__getitem__ + any key supported by Array.__getitem__ collapse_slices : bool, optional Whether or not to convert ranges to slices. Defaults to False. @@ -2802,7 +2802,7 @@ def _key_to_raw_and_axes(self, key, collapse_slices=False, translate_key=True): ------- raw_key, res_axes, transposed_indices """ - from .array import make_numpy_broadcastable, LArray, sequence + from .array import make_numpy_broadcastable, Array, sequence if translate_key: key = self._translated_key(key) @@ -2816,29 +2816,29 @@ def _key_to_raw_and_axes(self, key, collapse_slices=False, translate_key=True): if collapse_slices: # isinstance(np.ndarray, collections.Sequence) is False but it behaves like one seq_types = (tuple, list, np.ndarray) - # TODO: we should only do this if there are no LArray key (with axes corresponding to the range) + # TODO: we should only do this if there are no Array key (with axes corresponding to the range) # otherwise we will be translating them back to a range afterwards key = [_range_to_slice(axis_key, len(axis)) if isinstance(axis_key, seq_types) else axis_key for axis_key, axis in zip(key, self)] - # transform non-LArray advanced keys (list and ndarray) to LArray + # transform non-Array advanced keys (list and ndarray) to Array def to_la_ikey(axis, axis_key): - if isinstance(axis_key, (int, np.integer, slice, LArray)): + if isinstance(axis_key, (int, np.integer, slice, Array)): return axis_key else: assert isinstance(axis_key, (list, np.ndarray)) res_axis = axis.subaxis(axis_key) - # TODO: for perf reasons, we should bypass creating an actual LArray by returning axes and key_data + # TODO: for perf reasons, we should bypass creating an actual Array by returning axes and key_data # but then we will need to implement a function similar to make_numpy_broadcastable which works on axes # and rawdata instead of arrays - return LArray(axis_key, res_axis) + return Array(axis_key, res_axis) key = tuple(to_la_ikey(axis, axis_key) for axis, axis_key in zip(self, key)) - # transform slice keys to LArray too IF they refer to axes present in advanced key (so that those axes + # transform slice keys to Array too IF they refer to axes present in advanced key (so that those axes # broadcast together instead of being duplicated, which is not what we want) def get_axes(value): - return value.axes if isinstance(value, LArray) else AxisCollection([]) + return value.axes if isinstance(value, Array) else AxisCollection([]) def slice_to_sequence(axis, axis_key): if isinstance(axis_key, slice) and axis in la_key_axes: @@ -2907,7 +2907,7 @@ def slice_to_sequence(axis, axis_key): res_axes[adv_key_subspace_pos:adv_key_subspace_pos] = adv_key_dest_axes # transform to raw numpy arrays - raw_broadcasted_key = tuple(k.data if isinstance(k, LArray) else k + raw_broadcasted_key = tuple(k.data if isinstance(k, Array) else k for k in bcasted_adv_keys) return raw_broadcasted_key, res_axes, transpose_indices @@ -3235,7 +3235,7 @@ def split_axes(self, axes=None, sep='_', names=None, regex=None): See Also -------- Axis.split - LArray.split_axes + Array.split_axes Returns ------- @@ -3340,7 +3340,7 @@ def align(self, other, join='outer', axes=None): See Also -------- - LArray.align + Array.align Examples -------- @@ -3435,13 +3435,13 @@ def _flat_lookup(self, flat_indices): Examples -------- - >>> from larray import ndtest, LArray + >>> from larray import ndtest, Array >>> arr = ndtest((2, 3)) >>> arr a\b b0 b1 b2 a0 0 1 2 a1 3 4 5 - >>> indices = LArray([2, 5, 0], 'draw=d0..d2') + >>> indices = Array([2, 5, 0], 'draw=d0..d2') >>> indices draw d0 d1 d2 2 5 0 @@ -3451,21 +3451,21 @@ def _flat_lookup(self, flat_indices): d1 a1 b2 d2 a0 b0 """ - from larray.core.array import aslarray, LArray, stack + from larray.core.array import asarray, Array, stack - flat_indices = aslarray(flat_indices) + flat_indices = asarray(flat_indices) axes_indices = np.unravel_index(flat_indices, self.shape) - # This could return an LArray with object dtype because axes labels can have different types (but not length) + # This could return an Array with object dtype because axes labels can have different types (but not length) # TODO: this should be: # return stack([(axis.name, axis.i[inds]) for axis, inds in zip(axes, axes_indices)], axis='axis') flat_axes = flat_indices.axes - return stack([(axis.name, LArray(axis.labels[inds], flat_axes)) for axis, inds in zip(self, axes_indices)], + return stack([(axis.name, Array(axis.labels[inds], flat_axes)) for axis, inds in zip(self, axes_indices)], axes='axis') def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'): r""" - Returns key with the non-LArray "advanced indexing" key parts transformed to LArrays with a combined axis. - Scalar, slice and LArray key parts are just left as is. + Returns key with the non-Array "advanced indexing" key parts transformed to Arrays with a combined axis. + Scalar, slice and Array key parts are just left as is. Parameters ---------- @@ -3480,7 +3480,7 @@ def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'): ------- tuple """ - from larray.core.array import LArray + from larray.core.array import Array assert isinstance(key, tuple) and len(key) == self.ndim @@ -3489,7 +3489,7 @@ def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'): # TODO: use/factorize with AxisCollection.combine_axes. The problem is that it uses product(*axes_labels) # while here we need zip(*axes_labels) - ignored_types = (int, np.integer, slice, LArray) + ignored_types = (int, np.integer, slice, Array) adv_keys = [(axis_key, axis) for axis_key, axis in zip(key, self) if not isinstance(axis_key, ignored_types)] if not adv_keys: @@ -3535,9 +3535,9 @@ def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'): combined_axis = Axis(combined_labels, combined_name) combined_axes = AxisCollection(combined_axis) - # 2) transform all advanced non-LArray keys to LArray with the combined axis - # ========================================================================== - return tuple(axis_key if isinstance(axis_key, ignored_types) else LArray(axis_key, combined_axes) + # 2) transform all advanced non-Array keys to Array with the combined axis + # ======================================================================== + return tuple(axis_key if isinstance(axis_key, ignored_types) else Array(axis_key, combined_axes) for axis_key in key) diff --git a/larray/core/group.py b/larray/core/group.py index b73417381..6c3947eb1 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray +from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCArray from larray.util.oset import * from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, _isintstring, renamed_to, LHDFStore) @@ -487,7 +487,7 @@ def _to_key(v, stack_depth=1, parse_single_int=False): Parameters ---------- - v : int or basestring or tuple or list or slice or LArray or Group + v : int or basestring or tuple or list or slice or Array or Group value to convert into a key usable for indexing Returns @@ -577,7 +577,7 @@ def _to_key(v, stack_depth=1, parse_single_int=False): return cls(key, name=name, axis=axis) else: return _seq_str_to_seq(v, stack_depth + 1, parse_single_int=parse_single_int) - elif v is Ellipsis or np.isscalar(v) or isinstance(v, (Group, slice, list, np.ndarray, ABCLArray, OrderedSet)): + elif v is Ellipsis or np.isscalar(v) or isinstance(v, (Group, slice, list, np.ndarray, ABCArray, OrderedSet)): return v else: raise TypeError("%s has an invalid type (%s) for a key" % (v, type(v).__name__)) @@ -591,7 +591,7 @@ def _to_keys(value, stack_depth=1): Parameters ---------- - value : int or basestring or tuple or list or slice or LArray or Group + value : int or basestring or tuple or list or slice or Array or Group (collection of) value(s) to convert into key(s) usable for indexing Returns @@ -857,7 +857,7 @@ def __len__(self): # XXX: we probably want to_label instead of .eval (so that we do not expand slices) value = self.eval() # for some reason this breaks having LGroup ticks/labels on an axis - # if isinstance(value, (tuple, list, LArray, np.ndarray, str)): + # if isinstance(value, (tuple, list, Array, np.ndarray, str)): if hasattr(value, '__len__'): return len(value) elif isinstance(value, slice): @@ -1002,7 +1002,7 @@ def __getitem__(self, key): return IGroup(orig_start_pos + key * orig_step, None, self.axis) elif isinstance(key, (tuple, list)): return IGroup([orig_start_pos + k * orig_step for k in key], None, self.axis) - elif isinstance(orig_key, ABCLArray): + elif isinstance(orig_key, ABCArray): # XXX: why .i ? return cls(orig_key.i[key], None, self.axis) elif np.isscalar(orig_key): diff --git a/larray/core/metadata.py b/larray/core/metadata.py index c0d9f32b5..0d5e1eb89 100644 --- a/larray/core/metadata.py +++ b/larray/core/metadata.py @@ -148,10 +148,10 @@ def __larray__(self): @classmethod def from_array(cls, array): - from larray.core.array import aslarray - array = aslarray(array) + from larray.core.array import asarray + array = asarray(array) if array.ndim != 1: - raise ValueError("Expected LArray object of dimension 1. Got array of dimension {}".format(array.ndim)) + raise ValueError("Expected Array object of dimension 1. Got array of dimension {}".format(array.ndim)) from pandas import to_numeric, to_datetime diff --git a/larray/core/npufuncs.py b/larray/core/npufuncs.py index 4203f8d85..1ce29093e 100644 --- a/larray/core/npufuncs.py +++ b/larray/core/npufuncs.py @@ -127,9 +127,9 @@ isinf = broadcastify(np.isinf) inverse = broadcastify(np.linalg.inv) -# XXX: create a new LArray method instead ? -# TODO: should appear in the API doc if it actually works with LArrays, +# XXX: create a new Array method instead ? +# TODO: should appear in the API doc if it actually works with Arrays, # which I have never tested (and I doubt is the case). # Might be worth having specific documentation if it works well. -# My guess is that we should rather make a new LArray method for that one. +# My guess is that we should rather make a new Array method for that one. interp = broadcastify(np.interp) diff --git a/larray/core/session.py b/larray/core/session.py index c67dccae4..748cccd98 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -14,12 +14,12 @@ from larray.core.group import Group from larray.core.axis import Axis from larray.core.constants import nan -from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray +from larray.core.array import Array, get_axes, ndtest, zeros, zeros_like, sequence, asarray from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring from larray.inout.session import ext_default_engine, get_file_handler -# XXX: inherit from OrderedDict or LArray? +# XXX: inherit from OrderedDict or Array? class Session(object): r""" Groups several objects together. @@ -247,7 +247,7 @@ def __getitem__(self, key): if isinstance(key, int): keys = list(self.keys()) return self._objects[keys[key]] - elif isinstance(key, LArray): + elif isinstance(key, Array): assert np.issubdtype(key.dtype, np.bool_) assert key.ndim == 1 # only keep True values @@ -344,7 +344,7 @@ def __setstate__(self, d): def load(self, fname, names=None, engine='auto', display=False, **kwargs): r""" - Load LArray, Axis and Group objects from a file, or several .csv files. + Load Array, Axis and Group objects from a file, or several .csv files. WARNING: never load a file using the pickle engine (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code execution. @@ -431,7 +431,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs): def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs): r""" - Dumps LArray, Axis and Group objects from the current session to a file. + Dumps Array, Axis and Group objects from the current session to a file. Parameters ---------- @@ -439,7 +439,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False, Path of the file for the dump. If objects are saved in CSV files, the path corresponds to a directory. names : list of str or None, optional - List of names of LArray/Axis/Group objects to dump. + List of names of Array/Axis/Group objects to dump. If `fname` is None, list of paths to CSV files. Defaults to all objects present in the Session. engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional @@ -552,7 +552,7 @@ def to_globals(self, names=None, depth=0, warn=True, inplace=False): if k not in d: raise ValueError("'{}' not found in current namespace. Session.to_globals(inplace=True) requires " "all arrays to already exist.".format(k)) - if not isinstance(v, LArray): + if not isinstance(v, Array): continue if not d[k].axes == v.axes: raise ValueError("Session.to_globals(inplace=True) requires the existing (destination) arrays " @@ -565,7 +565,7 @@ def to_globals(self, names=None, depth=0, warn=True, inplace=False): def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps LArray, Axis and Group objects from the current session to a file using pickle. + Dumps Array, Axis and Group objects from the current session to a file using pickle. WARNING: never load a pickle file (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code execution. @@ -575,7 +575,7 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): fname : str Path for the dump. names : list of str or None, optional - Names of LArray/Axis/Group objects to dump. + Names of Array/Axis/Group objects to dump. Defaults to all objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. @@ -610,14 +610,14 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps LArray, Axis and Group objects from the current session to an HDF file. + Dumps Array, Axis and Group objects from the current session to an HDF file. Parameters ---------- fname : str Path of the file for the dump. names : list of str or None, optional - Names of LArray/Axis/Group objects to dump. + Names of Array/Axis/Group objects to dump. Defaults to all objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. @@ -652,14 +652,14 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps LArray, Axis and Group objects from the current session to an Excel file. + Dumps Array, Axis and Group objects from the current session to an Excel file. Parameters ---------- fname : str Path of the file for the dump. names : list of str or None, optional - Names of LArray/Axis/Group objects to dump. + Names of Array/Axis/Group objects to dump. Defaults to all objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. If False, file is updated. Defaults to True. @@ -700,14 +700,14 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_csv(self, fname, names=None, display=False, **kwargs): r""" - Dumps LArray, Axis and Group objects from the current session to CSV files. + Dumps Array, Axis and Group objects from the current session to CSV files. Parameters ---------- fname : str Path for the directory that will contain CSV files. names : list of str or None, optional - Names of LArray/Axis/Group objects to dump. + Names of Array/Axis/Group objects to dump. Defaults to all objects present in the Session. display : bool, optional Whether or not to display which file is being worked on. Defaults to False. @@ -907,7 +907,7 @@ def items(self): >>> arr1, arr2 = arr1.astype(np.int64), arr2.astype(np.int64) >>> s = Session([('arr2', arr2), ('arr1', arr1), ('group1', group1), ('axis1', axis1)]) >>> for k, v in s.items(): - ... print("{}: {}".format(k, v.info if isinstance(v, LArray) else repr(v))) + ... print("{}: {}".format(k, v.info if isinstance(v, Array) else repr(v))) arr2: 4 a [4]: 'a0' 'a1' 'a2' 'a3' dtype: int64 @@ -935,14 +935,14 @@ def _binop(opname, arrays_only=True): def opmethod(self, other): self_keys = set(self.keys()) all_keys = list(self.keys()) - if not isinstance(other, LArray) and hasattr(other, 'keys'): + if not isinstance(other, Array) and hasattr(other, 'keys'): all_keys += [n for n in other.keys() if n not in self_keys] with np.errstate(call=_session_float_error_handler): res = [] for name in all_keys: self_item = self.get(name, nan) other_operand = other.get(name, nan) if hasattr(other, 'get') else other - if arrays_only and not isinstance(self_item, LArray): + if arrays_only and not isinstance(self_item, Array): res_item = self_item else: try: @@ -1001,7 +1001,7 @@ def element_equals(self, other): r"""Test if each element (group, axis and array) of the current session equals the corresponding element of another session. - For arrays, it is equivalent to apply :py:meth:`LArray.equals` with flag nans_equal=True + For arrays, it is equivalent to apply :py:meth:`Array.equals` with flag nans_equal=True to all arrays from two sessions. Parameters @@ -1011,7 +1011,7 @@ def element_equals(self, other): Returns ------- - Boolean LArray + Boolean Array Notes ----- @@ -1057,7 +1057,7 @@ def element_equals(self, other): name a a01 arr1 arr2 arr3 False True False False False """ - supported_objects = (Axis, Group, LArray) + supported_objects = (Axis, Group, Array) self_keys = [k for k, v in self.items() if isinstance(v, supported_objects)] other_keys = [k for k, v in other.items() if isinstance(v, supported_objects) and k not in self_keys] all_keys = self_keys + other_keys @@ -1071,7 +1071,7 @@ def elem_equal(e1, e2): return e1.equals(e2, nans_equal=True) res = [elem_equal(self.get(key), other.get(key)) for key in all_keys] - return LArray(res, [Axis(all_keys, 'name')]) + return Array(res, [Axis(all_keys, 'name')]) array_equals = renamed_to(element_equals, 'array_equals') @@ -1146,7 +1146,7 @@ def transpose(self, *args): See Also -------- - LArray.transpose + Array.transpose Examples -------- @@ -1156,7 +1156,7 @@ def transpose(self, *args): >>> arr2 = ndtest((2, 2)) >>> sess = Session([('arr1', arr1), ('arr2', arr2)]) >>> def print_summary(s): - ... print(s.summary({LArray: "{key} -> {axes_names}"})) + ... print(s.summary({Array: "{key} -> {axes_names}"})) >>> print_summary(sess) arr1 -> a, b, c arr2 -> a, b @@ -1234,7 +1234,7 @@ def apply(self, func, *args, **kwargs): Any extra arguments are passed to the function kind : type or tuple of types, optional Type(s) of elements `func` will be applied to. Other elements will be left intact. Use ´kind=object´ to - apply to all kinds of objects. Defaults to LArray. + apply to all kinds of objects. Defaults to Array. **kwargs : any Any extra keyword arguments are passed to the function @@ -1279,7 +1279,7 @@ def apply(self, func, *args, **kwargs): a a0 a1 a2 4 6 8 """ - kind = kwargs.pop('kind', LArray) + kind = kwargs.pop('kind', Array) return Session([(k, func(v, *args, **kwargs) if isinstance(v, kind) else v) for k, v in self.items()]) def summary(self, template=None): @@ -1338,7 +1338,7 @@ def summary(self, template=None): ... array.meta.title, array.dtype) >>> template = {Axis: "{key} -> {name} [{labels}] ({length})", ... Group: "{key} -> {name}: {axis_name}{labels} ({length})", - ... LArray: print_array, + ... Array: print_array, ... Metadata: "\\t{key} -> {value}"} >>> print(s.summary(template)) # doctest: +NORMALIZE_WHITESPACE Metadata: @@ -1362,8 +1362,8 @@ def summary(self, template=None): template[Axis] = "{key}: {name} [{labels}] ({length})" if Group not in template: template[Group] = "{key}: {axis_name}{labels} >> {name} ({length})" - if LArray not in template: - template[LArray] = "{key}: {axes_names} ({shape}) [{dtype}]" + if Array not in template: + template[Array] = "{key}: {axes_names} ({shape}) [{dtype}]" if Metadata not in template: template[Metadata] = "\t{key}: {value}" @@ -1381,7 +1381,7 @@ def display(k, v, is_metadata=False): return tmpl.format(key=k, name=v.name, labels=v.labels_summary(), length=len(v)) elif isinstance(v, Group): return tmpl.format(key=k, name=v.name, axis_name=v.axis.name, labels=v.key, length=len(v)) - elif isinstance(v, LArray): + elif isinstance(v, Array): return tmpl.format(key=k, axes_names=', '.join(v.axes.display_names), shape=' x '.join(str(i) for i in v.shape), dtype=v.dtype) else: @@ -1423,7 +1423,7 @@ def local_arrays(depth=0, include_private=False, meta=None): d = sys._getframe(depth + 1).f_locals if not include_private: d = _exclude_private_vars(d) - return Session([(k, d[k]) for k in sorted(d.keys()) if isinstance(d[k], LArray)], meta=meta) + return Session([(k, d[k]) for k in sorted(d.keys()) if isinstance(d[k], Array)], meta=meta) def global_arrays(depth=0, include_private=False, meta=None): @@ -1448,7 +1448,7 @@ def global_arrays(depth=0, include_private=False, meta=None): d = sys._getframe(depth + 1).f_globals if not include_private: d = _exclude_private_vars(d) - return Session([(k, d[k]) for k in sorted(d.keys()) if isinstance(d[k], LArray)], meta=meta) + return Session([(k, d[k]) for k in sorted(d.keys()) if isinstance(d[k], Array)], meta=meta) def arrays(depth=0, include_private=False, meta=None): @@ -1485,7 +1485,7 @@ def arrays(depth=0, include_private=False, meta=None): all_keys = sorted(set(global_vars.keys()) | set(local_vars.keys())) combined_vars = [(k, local_vars[k] if k in local_vars else global_vars[k]) for k in all_keys] - return Session([(k, v) for k, v in combined_vars if isinstance(v, LArray)], meta=meta) + return Session([(k, v) for k, v in combined_vars if isinstance(v, Array)], meta=meta) _session_float_error_handler = float_error_handler_factory(4) diff --git a/larray/core/ufuncs.py b/larray/core/ufuncs.py index 6a3b8c8a7..087fc91bd 100644 --- a/larray/core/ufuncs.py +++ b/larray/core/ufuncs.py @@ -3,7 +3,7 @@ import numpy as np -from larray.core.array import LArray, make_args_broadcastable +from larray.core.array import Array, make_args_broadcastable def wrap_elementwise_array_func(func): @@ -20,7 +20,7 @@ def wrap_elementwise_array_func(func): Returns ------- function - A function taking LArray arguments and returning LArrays. + A function taking LArray arrays arguments and returning LArray arrays. Examples -------- @@ -29,7 +29,7 @@ def wrap_elementwise_array_func(func): >>> from statsmodels.tsa.filters.hp_filter import hpfilter # doctest: +SKIP >>> hpfilter = wrap_elementwise_array_func(hpfilter) # doctest: +SKIP - hpfilter is now a function taking a one dimensional LArray as input and returning a one dimensional LArray as output + hpfilter is now a function taking a one dimensional Array as input and returning a one dimensional Array as output Now let us suppose we have a ND array such as: @@ -61,15 +61,15 @@ def wrapper(*args, **kwargs): # and then tries to get them back from high, where they are possibly # incomplete if broadcasting happened - # It fails on "np.minimum(ndarray, LArray)" because it calls __array_wrap__(high, result) which cannot work if + # It fails on "np.minimum(ndarray, Array)" because it calls __array_wrap__(high, result) which cannot work if # there was broadcasting involved (high has potentially less labels than result). # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__ res_data = func(*raw_bcast_args, **raw_bcast_kwargs) if res_axes: if isinstance(res_data, tuple): - return tuple(LArray(res_arr, res_axes) for res_arr in res_data) + return tuple(Array(res_arr, res_axes) for res_arr in res_data) else: - return LArray(res_data, res_axes) + return Array(res_data, res_axes) else: return res_data # copy function name. We are intentionally not using functools.wraps, because it does not work for wrapping a @@ -113,22 +113,22 @@ def broadcastify(func): Parameters ---------- - condition : boolean LArray + condition : boolean Array When True, yield `x`, otherwise yield `y`. - x, y : LArray + x, y : Array Values from which to choose. Returns ------- - out : LArray + out : Array If both `x` and `y` are specified, the output array contains elements of `x` where `condition` is True, and elements from `y` elsewhere. Examples -------- - >>> from larray import LArray - >>> arr = LArray([[10, 7, 5, 9], + >>> from larray import Array + >>> arr = Array([[10, 7, 5, 9], ... [5, 8, 3, 7], ... [6, 2, 0, 9], ... [9, 10, 5, 6]], "a=a0..a3;b=b0..b3") @@ -178,16 +178,16 @@ def broadcastify(func): Parameters ---------- - x1, x2 : LArray + x1, x2 : Array The arrays holding the elements to be compared. - out : LArray, optional + out : Array, optional An array into which the result is stored. dtype : data-type, optional Overrides the dtype of the output array. Returns ------- - y : LArray or scalar + y : Array or scalar The maximum of `x1` and `x2`, element-wise. This is a scalar if both `x1` and `x2` are scalars. @@ -203,10 +203,10 @@ def broadcastify(func): Examples -------- - >>> from larray import LArray - >>> arr1 = LArray([[10, 7, 5, 9], + >>> from larray import Array + >>> arr1 = Array([[10, 7, 5, 9], ... [5, 8, 3, 7]], "a=a0,a1;b=b0..b3") - >>> arr2 = LArray([[6, 2, 9, 0], + >>> arr2 = Array([[6, 2, 9, 0], ... [9, 10, 5, 6]], "a=a0,a1;b=b0..b3") >>> arr1 a\b b0 b1 b2 b3 @@ -248,16 +248,16 @@ def broadcastify(func): Parameters ---------- - x1, x2 : LArray + x1, x2 : Array The arrays holding the elements to be compared. - out : LArray, optional + out : Array, optional An array into which the result is stored. dtype : data-type, optional Overrides the dtype of the output array. Returns ------- - y : LArray or scalar + y : Array or scalar The minimum of `x1` and `x2`, element-wise. This is a scalar if both `x1` and `x2` are scalars. @@ -273,10 +273,10 @@ def broadcastify(func): Examples -------- - >>> from larray import LArray - >>> arr1 = LArray([[10, 7, 5, 9], + >>> from larray import Array + >>> arr1 = Array([[10, 7, 5, 9], ... [5, 8, 3, 7]], "a=a0,a1;b=b0..b3") - >>> arr2 = LArray([[6, 2, 9, 0], + >>> arr2 = Array([[6, 2, 9, 0], ... [9, 10, 5, 6]], "a=a0,a1;b=b0..b3") >>> arr1 a\b b0 b1 b2 b3 diff --git a/larray/extra/ipfp.py b/larray/extra/ipfp.py index 774aad97b..4ffbaad87 100644 --- a/larray/extra/ipfp.py +++ b/larray/extra/ipfp.py @@ -1,7 +1,7 @@ import math from collections import deque -from larray.core.array import LArray, aslarray, ones, any +from larray.core.array import Array, asarray, ones, any import numpy as np @@ -81,23 +81,23 @@ def ipfp(target_sums, a=None, axes=None, maxiter=1000, threshold=0.5, stepstoabo Returns ------- - LArray + Array Examples -------- >>> from larray import * >>> a = Axis('a=a0,a1') >>> b = Axis('b=b0,b1') - >>> initial = LArray([[2, 1], [1, 2]], [a, b]) + >>> initial = Array([[2, 1], [1, 2]], [a, b]) >>> initial a\b b0 b1 a0 2 1 a1 1 2 - >>> target_sum_along_a = LArray([2, 1], b) + >>> target_sum_along_a = Array([2, 1], b) >>> target_sum_along_a b b0 b1 2 1 - >>> target_sum_along_b = LArray([1, 2], a) + >>> target_sum_along_b = Array([1, 2], a) >>> target_sum_along_b a a0 a1 1 2 @@ -142,7 +142,7 @@ def ipfp(target_sums, a=None, axes=None, maxiter=1000, threshold=0.5, stepstoabo assert no_convergence in {'ignore', 'warn', 'raise'} assert isinstance(display_progress, bool) or display_progress == 'condensed' - target_sums = [aslarray(ts) for ts in target_sums] + target_sums = [asarray(ts) for ts in target_sums] n = len(target_sums) @@ -186,10 +186,10 @@ def has_anonymous_axes(a): else: # TODO: only make a copy if there are actually any bad values, but I am unsure we should make a copy at all. # Either way, this should be documented. - if nzvzs in {'warn', 'fix'} and isinstance(a, LArray): + if nzvzs in {'warn', 'fix'} and isinstance(a, Array): a = a.copy() else: - a = aslarray(a) + a = asarray(a) # TODO: this should be a builtin op a = a.rename({i: name if name is not None else 'axis{}'.format(i) for i, name in enumerate(a.axes.names)}) diff --git a/larray/inout/common.py b/larray/inout/common.py index fd0fe3cb8..eb9056c8d 100644 --- a/larray/inout/common.py +++ b/larray/inout/common.py @@ -3,7 +3,7 @@ import os from collections import OrderedDict -from larray.core.array import LArray +from larray.core.array import Array def _get_index_col(nb_axes=None, index_col=None, wide=True): @@ -112,7 +112,7 @@ def read(self, keys, *args, **kwargs): ------- Metadata List of metadata to load. - OrderedDict(str, LArray/Axis/Group) + OrderedDict(str, Array/Axis/Group) Dictionary containing the loaded objects. """ display = kwargs.pop('display', False) @@ -144,7 +144,7 @@ def dump(self, metadata, key_values, *args, **kwargs): ---------- metadata: Metadata List of metadata to dump. - key_values : list of (str, LArray/Axis/Group) pairs + key_values : list of (str, Array/Axis/Group) pairs Name and data of objects to dump. kwargs : * display: whether or not to display when the dump of each object is started/done. @@ -155,7 +155,7 @@ def dump(self, metadata, key_values, *args, **kwargs): if metadata is not None: self._dump_metadata(metadata) for key, value in key_values: - if isinstance(value, LArray) and value.ndim == 0: + if isinstance(value, Array) and value.ndim == 0: if display: print('Cannot dump {}. Dumping 0D arrays is currently not supported.'.format(key)) continue diff --git a/larray/inout/csv.py b/larray/inout/csv.py index 87318f15a..dd2512a3a 100644 --- a/larray/inout/csv.py +++ b/larray/inout/csv.py @@ -9,7 +9,7 @@ import pandas as pd import numpy as np -from larray.core.array import LArray, aslarray, ndtest +from larray.core.array import Array, asarray, ndtest from larray.core.axis import Axis from larray.core.constants import nan from larray.core.group import Group @@ -17,7 +17,7 @@ from larray.util.misc import skip_comment_cells, strip_rows, csv_open, deprecate_kwarg from larray.inout.session import register_file_handler from larray.inout.common import _get_index_col, FileHandler -from larray.inout.pandas import df_aslarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups +from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups from larray.example import get_example_filepath @@ -42,7 +42,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse Separator. headersep : str or None, optional Separator for headers. - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not present in the input. Defaults to NaN. sort_rows : bool, optional @@ -61,7 +61,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse Returns ------- - LArray + Array Notes ----- @@ -230,7 +230,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse df.index.names = combined_axes_names.split(headersep) raw = False - return df_aslarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide) + return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide) def read_tsv(filepath_or_buffer, **kwargs): @@ -251,7 +251,7 @@ def read_eurostat(filepath_or_buffer, **kwargs): Returns ------- - LArray + Array """ return read_csv(filepath_or_buffer, sep='\t', headersep=',', **kwargs) @@ -351,7 +351,7 @@ def _read_item(self, key, type, *args, **kwargs): raise TypeError() def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, LArray): + if isinstance(value, Array): value.to_csv(self._to_filepath(key), *args, **kwargs) elif isinstance(value, Axis): self.axes[key] = value @@ -370,7 +370,7 @@ def _read_metadata(self): def _dump_metadata(self, metadata): if len(metadata) > 0: - meta = aslarray(metadata) + meta = asarray(metadata) meta.to_csv(self._to_filepath('__metadata__'), sep=self.sep, wide=False, value_name='') def save(self): diff --git a/larray/inout/excel.py b/larray/inout/excel.py index 4e76bb1c6..1ceaaaf49 100644 --- a/larray/inout/excel.py +++ b/larray/inout/excel.py @@ -11,7 +11,7 @@ except ImportError: xw = None -from larray.core.array import LArray, aslarray +from larray.core.array import Array, asarray from larray.core.axis import Axis from larray.core.constants import nan from larray.core.group import Group, _translate_sheet_name @@ -19,7 +19,7 @@ from larray.util.misc import deprecate_kwarg from larray.inout.session import register_file_handler from larray.inout.common import _get_index_col, FileHandler -from larray.inout.pandas import df_aslarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups +from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups from larray.inout.xw_excel import open_excel from larray.example import get_example_filepath @@ -33,7 +33,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan, na=nan, sort_rows=False, sort_columns=False, wide=True, engine=None, range=slice(None), **kwargs): r""" - Reads excel file from sheet name and returns an LArray with the contents + Reads excel file from sheet name and returns an Array with the contents Parameters ---------- @@ -49,7 +49,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan, array is assumed to have one axis. Defaults to None. index_col : list, optional Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above). - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not present in the input. Defaults to NaN. sort_rows : bool, optional @@ -71,7 +71,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan, Returns ------- - LArray + Array Examples -------- @@ -220,8 +220,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan, else: # TODO: add support for range argument (using usecols, skiprows and nrows arguments of pandas.read_excel) df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs) - return df_aslarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None, - fill_value=fill_value, wide=wide) + return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None, + fill_value=fill_value, wide=wide) @register_file_handler('pandas_excel', ['xls', 'xlsx'] if xw is None else None) @@ -282,7 +282,7 @@ def list_items(self): def _read_item(self, key, type, *args, **kwargs): if type == 'Array': df = self.handle.parse(key, *args, **kwargs) - return df_aslarray(df, raw=True) + return df_asarray(df, raw=True) elif type == 'Axis': return self.axes[key] elif type == 'Group': @@ -292,7 +292,7 @@ def _read_item(self, key, type, *args, **kwargs): def _dump_item(self, key, value, *args, **kwargs): kwargs['engine'] = 'xlsxwriter' - if isinstance(value, LArray): + if isinstance(value, Array): value.to_excel(self.handle, key, *args, **kwargs) elif isinstance(value, Axis): self.axes[key] = value @@ -311,7 +311,7 @@ def _read_metadata(self): def _dump_metadata(self, metadata): if len(metadata) > 0: - metadata = aslarray(metadata) + metadata = asarray(metadata) metadata.to_excel(self.handle, '__metadata__', engine='xlsxwriter', wide=False, value_name='') def save(self): @@ -395,7 +395,7 @@ def _read_item(self, key, type, *args, **kwargs): raise TypeError() def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, LArray): + if isinstance(value, Array): self.handle[key] = value.dump(*args, **kwargs) elif isinstance(value, Axis): self.axes[key] = value @@ -414,7 +414,7 @@ def _read_metadata(self): def _dump_metadata(self, metadata): if len(metadata) > 0: - metadata = aslarray(metadata) + metadata = asarray(metadata) self.handle['__metadata__'] = metadata.dump(wide=False, value_name='') def save(self): diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index 25d0df0eb..3407471d1 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -5,7 +5,7 @@ import numpy as np from pandas import HDFStore -from larray.core.array import LArray +from larray.core.array import Array from larray.core.axis import Axis from larray.core.constants import nan from larray.core.group import Group, LGroup, _translate_group_key_hdf @@ -13,7 +13,7 @@ from larray.util.misc import LHDFStore from larray.inout.session import register_file_handler from larray.inout.common import FileHandler -from larray.inout.pandas import df_aslarray +from larray.inout.pandas import df_asarray from larray.example import get_example_filepath @@ -27,7 +27,7 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s Path and name where the HDF5 file is stored or a HDFStore object. key : str or Group Name of the array. - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not present in the input. Defaults to NaN. sort_rows : bool, optional @@ -44,7 +44,7 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s Returns ------- - LArray + Array Examples -------- @@ -76,11 +76,11 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s # for backward compatibility but any object read from an hdf file should have an attribute 'type' _type = attrs.type if 'type' in attrs else 'Array' _meta = attrs.metadata if 'metadata' in attrs else None - if _type == 'Array': + if _type in ['Array', 'LArray']: # cartesian product is not necessary if the array was written by LArray cartesian_prod = writer != 'LArray' - res = df_aslarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, - parse_header=False, cartesian_prod=cartesian_prod) + res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, + parse_header=False, cartesian_prod=cartesian_prod) if _meta is not None: res.meta = _meta elif _type == 'Axis': @@ -141,7 +141,7 @@ def _read_item(self, key, type, *args, **kwargs): return read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, LArray): + if isinstance(value, Array): hdf_key = '/' + key value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Axis): diff --git a/larray/inout/misc.py b/larray/inout/misc.py index ec3e04e59..e780b8c55 100644 --- a/larray/inout/misc.py +++ b/larray/inout/misc.py @@ -6,7 +6,7 @@ from larray.core.constants import nan from larray.util.misc import StringIO, deprecate_kwarg from larray.inout.common import _get_index_col -from larray.inout.pandas import df_aslarray +from larray.inout.pandas import df_asarray from larray.inout.csv import read_csv @@ -26,7 +26,7 @@ def from_lists(data, nb_axes=None, index_col=None, fill_value=nan, sort_rows=Fal is assumed to have one axis. Defaults to None. index_col : list, optional Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above). - fill_value : scalar or LArray, optional + fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not present in the input. Defaults to NaN. sort_rows : bool, optional @@ -41,7 +41,7 @@ def from_lists(data, nb_axes=None, index_col=None, fill_value=nan, sort_rows=Fal Returns ------- - LArray + Array Examples -------- @@ -114,8 +114,8 @@ def from_lists(data, nb_axes=None, index_col=None, fill_value=nan, sort_rows=Fal if index_col is not None: df.set_index([df.columns[c] for c in index_col], inplace=True) - return df_aslarray(df, raw=index_col is None, parse_header=False, sort_rows=sort_rows, sort_columns=sort_columns, - fill_value=fill_value, wide=wide) + return df_asarray(df, raw=index_col is None, parse_header=False, sort_rows=sort_rows, sort_columns=sort_columns, + fill_value=fill_value, wide=wide) @deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1) @@ -144,7 +144,7 @@ def from_string(s, nb_axes=None, index_col=None, sep=' ', wide=True, **kwargs): Returns ------- - LArray + Array Examples -------- diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index a9732f4c0..d24a83478 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from larray.core.array import LArray +from larray.core.array import Array from larray.core.axis import Axis, AxisCollection from larray.core.group import LGroup from larray.core.constants import nan @@ -74,7 +74,7 @@ def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs): r""" - Converts Pandas Series into LArray. + Converts Pandas Series into Array. Parameters ---------- @@ -91,11 +91,11 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs): Returns ------- - LArray + Array See Also -------- - LArray.to_series + Array.to_series Examples -------- @@ -137,13 +137,13 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs): name = decode(s.name, 'utf8') if s.name is not None else decode(s.index.name, 'utf8') if sort_rows: s = s.sort_index() - return LArray(s.values, Axis(s.index.values, name), meta=meta) + return Array(s.values, Axis(s.index.values, name), meta=meta) def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False, fill_value=nan, meta=None, cartesian_prod=True, **kwargs): r""" - Converts Pandas DataFrame into LArray. + Converts Pandas DataFrame into Array. Parameters ---------- @@ -171,18 +171,18 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. cartesian_prod : bool, optional - Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray. + Whether or not to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. Returns ------- - LArray + Array See Also -------- - LArray.to_frame + Array.to_frame Examples -------- @@ -249,13 +249,13 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) data = df.values.reshape(axes.shape) - return LArray(data, axes, meta=meta) + return Array(data, axes, meta=meta) -def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=True, wide=True, cartesian_prod=True, - **kwargs): +def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=True, wide=True, cartesian_prod=True, + **kwargs): r""" - Prepare Pandas DataFrame and then convert it into LArray. + Prepare Pandas DataFrame and then convert it into Array. Parameters ---------- @@ -280,16 +280,16 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header If False, the array is assumed to be stored in "narrow" format: one column per axis plus one value column. Defaults to True. cartesian_prod : bool, optional - Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray. + Whether or not to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. Returns ------- - LArray + Array """ - # we could inline df_aslarray into the functions that use it, so that the original (non-cartesian) df is freed from + # we could inline df_asarray into the functions that use it, so that the original (non-cartesian) df is freed from # memory at this point, but it would be much uglier and would not lower the peak memory usage which happens during # cartesian_product_df.reindex diff --git a/larray/inout/pickle.py b/larray/inout/pickle.py index 8b4c98501..56f30d5d8 100644 --- a/larray/inout/pickle.py +++ b/larray/inout/pickle.py @@ -5,7 +5,7 @@ from larray.core.axis import Axis from larray.core.group import Group -from larray.core.array import LArray +from larray.core.array import Array from larray.core.metadata import Metadata from larray.util.misc import pickle from larray.inout.session import register_file_handler @@ -30,7 +30,7 @@ def list_items(self): # groups items += [(key, 'Group') for key, value in self.data.items() if isinstance(value, Group)] # arrays - items += [(key, 'Array') for key, value in self.data.items() if isinstance(value, LArray)] + items += [(key, 'Array') for key, value in self.data.items() if isinstance(value, Array)] return items def _read_item(self, key, type, *args, **kwargs): @@ -40,7 +40,7 @@ def _read_item(self, key, type, *args, **kwargs): raise TypeError() def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, (LArray, Axis, Group)): + if isinstance(value, (Array, Axis, Group)): self.data[key] = value else: raise TypeError() diff --git a/larray/inout/sas.py b/larray/inout/sas.py index 585b6f20f..aed18aff9 100644 --- a/larray/inout/sas.py +++ b/larray/inout/sas.py @@ -6,7 +6,7 @@ import pandas as pd from larray.core.constants import nan -from larray.inout.pandas import df_aslarray +from larray.inout.pandas import df_asarray from larray.util.misc import deprecate_kwarg @@ -14,7 +14,7 @@ def read_sas(filepath, nb_axes=None, index_col=None, fill_value=nan, na=nan, sort_rows=False, sort_columns=False, **kwargs): r""" - Reads sas file and returns an LArray with the contents + Reads sas file and returns an Array with the contents nb_axes: number of axes of the output array or index_col: Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]) @@ -32,4 +32,4 @@ def read_sas(filepath, nb_axes=None, index_col=None, fill_value=nan, na=nan, sor index_col = [index_col] df = pd.read_sas(filepath, index=index_col, **kwargs) - return df_aslarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value) + return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value) diff --git a/larray/inout/stata.py b/larray/inout/stata.py index 8d16c1176..5741e4525 100644 --- a/larray/inout/stata.py +++ b/larray/inout/stata.py @@ -9,7 +9,7 @@ def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns=False, **kwargs): r""" - Reads Stata .dta file and returns an LArray with the contents + Reads Stata .dta file and returns an Array with the contents Parameters ---------- @@ -26,15 +26,15 @@ def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns Returns ------- - LArray + Array See Also -------- - LArray.to_stata + Array.to_stata Notes ----- - The round trip to Stata (LArray.to_stata followed by read_stata) loose the name of the "column" axis. + The round trip to Stata (Array.to_stata followed by read_stata) loose the name of the "column" axis. Examples -------- diff --git a/larray/inout/xw_excel.py b/larray/inout/xw_excel.py index 9bd57b02b..10b158c62 100644 --- a/larray/inout/xw_excel.py +++ b/larray/inout/xw_excel.py @@ -10,11 +10,11 @@ except ImportError: xw = None -from larray.core.array import LArray, ndtest +from larray.core.array import Array, ndtest from larray.core.axis import Axis from larray.core.constants import nan from larray.core.group import _translate_sheet_name -from larray.inout.pandas import df_aslarray +from larray.inout.pandas import df_asarray from larray.inout.misc import from_lists from larray.util.misc import PY2, deprecate_kwarg @@ -50,20 +50,20 @@ def kill_global_app(): global_app = None - class LArrayConverter(PandasDataFrameConverter): - writes_types = LArray + class ArrayConverter(PandasDataFrameConverter): + writes_types = Array @classmethod def read_value(cls, value, options): df = PandasDataFrameConverter.read_value(value, options) - return df_aslarray(df) + return df_asarray(df) @classmethod def write_value(cls, value, options): df = value.to_frame(fold_last_axis_name=True) return PandasDataFrameConverter.write_value(df, options) - LArrayConverter.register(LArray) + ArrayConverter.register(Array) def _disable_screen_updates(app): xl_app = app.api @@ -397,7 +397,7 @@ def __getitem__(self, key): return Range(self, (row + 1, col + 1)) def __setitem__(self, key, value): - if isinstance(value, LArray): + if isinstance(value, Array): value = value.dump(header=False) self[key].xw_range.value = value @@ -499,7 +499,7 @@ def array(self, data, row_labels=None, column_labels=None, names=None): Returns ------- - LArray + Array """ if row_labels is not None: row_labels = np.asarray(self[row_labels]) @@ -511,7 +511,7 @@ def array(self, data, row_labels=None, column_labels=None, names=None): else: axes = (row_labels, column_labels) # _converted_value is used implicitly via Range.__array__ - return LArray(np.asarray(self[data]), axes) + return Array(np.asarray(self[data]), axes) def __repr__(self): cls = self.__class__ @@ -585,13 +585,13 @@ def __array__(self, dtype=None): return np.array(self._converted_value(), dtype=dtype) def __larray__(self): - return LArray(self._converted_value()) + return Array(self._converted_value()) def __dir__(self): return list(set(dir(self.__class__)) | set(dir(self.xw_range))) def __getattr__(self, key): - if hasattr(LArray, key): + if hasattr(Array, key): return getattr(self.__larray__(), key) else: return getattr(self.xw_range, key) @@ -611,7 +611,7 @@ def __str__(self): def load(self, header=True, convert_float=True, nb_axes=None, index_col=None, fill_value=nan, sort_rows=False, sort_columns=False, wide=True): if not self.ndim: - return LArray([]) + return Array([]) list_data = self._converted_value(convert_float=convert_float) @@ -619,7 +619,7 @@ def load(self, header=True, convert_float=True, nb_axes=None, index_col=None, fi return from_lists(list_data, nb_axes=nb_axes, index_col=index_col, fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, wide=wide) else: - return LArray(list_data) + return Array(list_data) # XXX: deprecate this function? diff --git a/larray/inout/xw_reporting.py b/larray/inout/xw_reporting.py index 424cc124d..f897ebc2d 100644 --- a/larray/inout/xw_reporting.py +++ b/larray/inout/xw_reporting.py @@ -4,7 +4,7 @@ from larray.util.misc import PY2, _positive_integer, _validate_dir from larray.core.group import _translate_sheet_name -from larray.core.array import aslarray, zip_array_items +from larray.core.array import asarray, zip_array_items from larray.example import load_example_data, EXAMPLE_EXCEL_TEMPLATES_DIR try: @@ -577,7 +577,7 @@ def __init__(self, data, title, template, top, left, width, height): self.top = top self.left = left self.title = str(title) if title is not None else None - data = aslarray(data) + data = asarray(data) if not (1 <= data.ndim <= 2): raise ValueError("Expected 1D or 2D array for data argument. " "Got array of dimensions {}".format(data.ndim)) diff --git a/larray/random.py b/larray/random.py index 08a066e38..8e4cb890f 100644 --- a/larray/random.py +++ b/larray/random.py @@ -26,7 +26,7 @@ import numpy as np from larray.core.axis import Axis, AxisCollection -from larray.core.array import LArray, aslarray +from larray.core.array import Array, asarray from larray.core.array import raw_broadcastable import larray as la @@ -37,7 +37,7 @@ def generic_random(np_func, args, min_axes, meta): args, res_axes = raw_broadcastable(args, min_axes=min_axes) res_data = np_func(*args, size=res_axes.shape) - return LArray(res_data, res_axes, meta=meta) + return Array(res_data, res_axes, meta=meta) # We choose to place the axes argument in place of the numpy size argument, instead of having axes as the first @@ -70,7 +70,7 @@ def randint(low, high=None, axes=None, dtype='l', meta=None): Returns ------- - LArray + Array Examples -------- @@ -100,7 +100,7 @@ def randint(low, high=None, axes=None, dtype='l', meta=None): # to do that, uncommenting the following code should be enough: # return generic_random(np.random.randint, (low, high), axes, meta) axes = AxisCollection(axes) - return LArray(np.random.randint(low, high, axes.shape, dtype), axes, meta=meta) + return Array(np.random.randint(low, high, axes.shape, dtype), axes, meta=meta) def normal(loc=0.0, scale=1.0, axes=None, meta=None): @@ -127,7 +127,7 @@ def normal(loc=0.0, scale=1.0, axes=None, meta=None): Returns ------- - LArray or scalar + Array or scalar Drawn samples from the parameterized normal distribution. Notes @@ -238,7 +238,7 @@ def uniform(low=0.0, high=1.0, axes=None, meta=None): Returns ------- - LArray or scalar + Array or scalar Drawn samples from the parameterized uniform distribution. See Also @@ -330,7 +330,7 @@ def permutation(x, axis=0): Returns ------- - LArray + Array Permuted sequence or array range. Examples @@ -357,9 +357,9 @@ def permutation(x, axis=0): a2 7 8 6 """ if isinstance(x, (int, np.integer)): - return LArray(np.random.permutation(x)) + return Array(np.random.permutation(x)) else: - x = aslarray(x) + x = asarray(x) axis = x.axes[axis] g = axis.i[np.random.permutation(len(axis))] return x[g] @@ -375,7 +375,7 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): Values to choose from. If an array, a random sample is generated from its elements. If an int n, the random sample is generated as if choices was la.sequence(n) - If p is a 1-D LArray, choices are taken from its axis. + If p is a 1-D Array, choices are taken from its axis. axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional Axes (or shape) of the resulting array. If ``axes`` is None (the default), a single value is returned. Otherwise, if the resulting axes have a shape of, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. @@ -383,7 +383,7 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): Whether the sample is with or without replacement. p : array-like, optional The probabilities associated with each entry in choices. - If p is a 1-D LArray, choices are taken from its axis labels. If p is an N-D LArray, each cell represents the + If p is a 1-D Array, choices are taken from its axis labels. If p is an N-D Array, each cell represents the probability that the combination of labels will occur. If not given the sample assumes a uniform distribution over all entries in choices. meta : list of pairs or dict or OrderedDict or Metadata, optional @@ -392,7 +392,7 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): Returns ------- - LArray or scalar + Array or scalar The generated random samples with given ``axes`` (or shape). Raises @@ -426,9 +426,9 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): a0 15 10 10 a1 10 5 10 - Same as above with labels and probabilities given as a one dimensional LArray + Same as above with labels and probabilities given as a one dimensional Array - >>> proba = LArray([0.3, 0.5, 0.2], Axis([5, 10, 15], 'outcome')) # doctest: +SKIP + >>> proba = Array([0.3, 0.5, 0.2], Axis([5, 10, 15], 'outcome')) # doctest: +SKIP >>> proba # doctest: +SKIP outcome 5 10 15 0.3 0.5 0.2 @@ -452,7 +452,7 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): Using an N-dimensional array as probabilities: - >>> proba = LArray([[0.15, 0.25, 0.10], + >>> proba = Array([[0.15, 0.25, 0.10], ... [0.20, 0.10, 0.20]], 'a=a0,a1;b=b0..b2') # doctest: +SKIP >>> proba # doctest: +SKIP a\b b0 b1 b2 @@ -468,9 +468,9 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): d5 a0 b1 """ axes = AxisCollection(axes) - if isinstance(p, LArray): + if isinstance(p, Array): if choices is not None: - raise ValueError("choices argument cannot be used when p argument is an LArray") + raise ValueError("choices argument cannot be used when p argument is an Array") if p.ndim > 1: flat_p = p.data.reshape(-1) @@ -480,5 +480,5 @@ def choice(choices=None, axes=None, replace=True, p=None, meta=None): choices = p.axes[0].labels p = p.data if choices is None: - raise ValueError("choices argument must be provided unless p is an LArray") - return LArray(np.random.choice(choices, axes.shape, replace, p), axes, meta=meta) + raise ValueError("choices argument must be provided unless p is an Array") + return Array(np.random.choice(choices, axes.shape, replace, p), axes, meta=meta) diff --git a/larray/tests/common.py b/larray/tests/common.py index e3126d5fe..59eeaa60b 100644 --- a/larray/tests/common.py +++ b/larray/tests/common.py @@ -12,7 +12,7 @@ except ImportError: xw = None -from larray import LArray, isnan, aslarray, Metadata +from larray import Array, isnan, asarray, Metadata TESTDATADIR = os.path.dirname(__file__) @@ -38,11 +38,11 @@ def inputpath(relpath): def assert_equal_factory(test_func): def assert_equal(a, b): - if isinstance(a, LArray) and isinstance(b, LArray) and a.axes != b.axes: + if isinstance(a, Array) and isinstance(b, Array) and a.axes != b.axes: raise AssertionError("axes differ:\n%s\n\nvs\n\n%s" % (a.axes.info, b.axes.info)) - if not isinstance(a, (np.ndarray, LArray)): + if not isinstance(a, (np.ndarray, Array)): a = np.asarray(a) - if not isinstance(b, (np.ndarray, LArray)): + if not isinstance(b, (np.ndarray, Array)): b = np.asarray(b) if a.shape != b.shape: raise AssertionError("shapes differ: %s != %s" % (a.shape, b.shape)) @@ -58,8 +58,8 @@ def assert_equal(a, b): def assert_larray_equal_factory(test_func, convert=True, check_axes=False): def assert_equal(a, b): if convert: - a = aslarray(a) - b = aslarray(b) + a = asarray(a) + b = asarray(b) if check_axes and a.axes != b.axes: raise AssertionError("axes differ:\n%s\n\nvs\n\n%s" % (a.axes.info, b.axes.info)) equal = test_func(a, b) diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 10871e498..3a5597922 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -18,7 +18,7 @@ from larray.tests.common import (inputpath, assert_array_equal, assert_array_nan_equal, assert_larray_equiv, tmp_path, meta, needs_xlwings, needs_python35, needs_python36, needs_python37, assert_larray_equal) -from larray import (LArray, Axis, LGroup, union, zeros, zeros_like, ndtest, empty, ones, eye, diag, stack, +from larray import (Array, LArray, Axis, LGroup, union, zeros, zeros_like, ndtest, empty, ones, eye, diag, stack, clip, exp, where, X, mean, isnan, round, read_hdf, read_csv, read_eurostat, read_excel, from_lists, from_string, open_excel, from_frame, sequence, nan, IGroup) from larray.inout.pandas import from_series @@ -132,16 +132,16 @@ def test_meta_arg_array_creation(array): meta = Metadata(meta_list) # meta as list - arr = LArray(array.data, array.axes, meta=meta_list) + arr = Array(array.data, array.axes, meta=meta_list) assert arr.meta == meta # meta as OrderedDict - arr = LArray(array.data, array.axes, meta=OrderedDict(meta_list)) + arr = Array(array.data, array.axes, meta=OrderedDict(meta_list)) assert arr.meta == meta -# ================= # -# Test LArray # -# ================= # +# ================ # +# Test Array # +# ================ # # AXES lipro = Axis(['P%02d' % i for i in range(1, 16)], 'lipro') @@ -161,13 +161,13 @@ def test_meta_arg_array_creation(array): @pytest.fixture() def array(): data = np.arange(116 * 44 * 2 * 15).reshape(116, 44, 2, 15).astype(float) - return LArray(data, axes=(age, geo, sex, lipro)) + return Array(data, axes=(age, geo, sex, lipro)) @pytest.fixture() def small_array(): small_data = np.arange(30).reshape(2, 15) - return LArray(small_data, axes=(sex, lipro)) + return Array(small_data, axes=(sex, lipro)) io_1d = ndtest(3) @@ -182,6 +182,14 @@ def small_array(): io_narrow_missing_values[2, 'b1', 'c1'] = nan +def test_larray_renamed_as_array(): + with pytest.warns(FutureWarning) as caught_warnings: + arr = LArray([0, 1, 2, 3], 'a=a0..a3') + assert len(caught_warnings) == 1 + assert caught_warnings[0].message.args[0] == "LArray has been renamed as Array." + assert caught_warnings[0].filename == __file__ + + def test_ndtest(): arr = ndtest('a=a0..a2') assert arr.shape == (3,) @@ -234,10 +242,10 @@ def test_bool(): a = zeros([1]) assert not bool(a) - a = LArray(np.array(2), []) + a = Array(np.array(2), []) assert bool(a) - a = LArray(np.array(0), []) + a = Array(np.array(0), []) assert not bool(a) @@ -423,7 +431,7 @@ def test_str(small_array, array): assert str(small_array[lipro['P01'], sex['F']]) == "15" # empty / len 0 first dimension - assert str(small_array[sex[[]]]) == "LArray([])" + assert str(small_array[sex[[]]]) == "Array([])" # one dimension assert str(small_array[lipro3, sex['M']]) == """\ @@ -456,7 +464,7 @@ def test_str(small_array, array): age 0 1 2 ... 112 113 114 115 0.0 1320.0 2640.0 ... 147840.0 149160.0 150480.0 151800.0""" - arr = LArray([0, ''], Axis(['a0', ''], 'a')) + arr = Array([0, ''], Axis(['a0', ''], 'a')) assert str(arr) == "a a0 \n 0 " @@ -682,7 +690,7 @@ def test_getitem_str_positional_group(): arr = ndtest('a=l0..l2;b=l0..l2') a, b = arr.axes res = arr['b.i[1]'] - expected = LArray([1, 4, 7], 'a=l0..l2') + expected = Array([1, 4, 7], 'a=l0..l2') assert_array_equal(res, expected) @@ -729,14 +737,14 @@ def test_getitem_bool_larray_key_arr_whout_bool_axis(): # all dimensions res = arr[arr < 5] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.ndim == 1 assert_array_equal(res, raw[raw < 5]) # missing dimension filter_ = arr['b1'] % 5 == 0 res = arr[filter_] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.ndim == 2 assert res.shape == (3, 2) raw_key = raw[:, 1, :] % 5 == 0 @@ -756,10 +764,10 @@ def test_getitem_bool_larray_key_arr_whout_bool_axis(): def test_getitem_bool_larray_key_arr_wh_bool_axis(): gender = Axis([False, True], 'gender') - arr = LArray([0.1, 0.2], gender) + arr = Array([0.1, 0.2], gender) id_axis = Axis('id=0..3') - key = LArray([True, False, True, True], id_axis) - expected = LArray([0.2, 0.1, 0.2, 0.2], id_axis) + key = Array([True, False, True, True], id_axis) + expected = Array([0.2, 0.1, 0.2, 0.2], id_axis) # LGroup using the real axis assert_larray_equal(arr[gender[key]], expected) @@ -779,14 +787,14 @@ def test_getitem_bool_larray_and_group_key(): # using axis res = arr['a0,a2', arr.b < 3, 'c0:c3'] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.ndim == 3 expected = arr['a0,a2', '0:2', 'c0:c3'] assert_array_equal(res, expected) # using axis reference res = arr['a0,a2', X.b < 3, 'c0:c3'] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.ndim == 3 assert_array_equal(res, expected) @@ -794,14 +802,14 @@ def test_getitem_bool_larray_and_group_key(): def test_getitem_bool_ndarray_key_arr_whout_bool_axis(array): raw = array.data res = array[raw < 5] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.ndim == 1 assert_array_equal(res, raw[raw < 5]) def test_getitem_bool_ndarray_key_arr_wh_bool_axis(): gender = Axis([False, True], 'gender') - arr = LArray([0.1, 0.2], gender) + arr = Array([0.1, 0.2], gender) key = np.array([True, False, True, True]) expected = arr.i[[1, 0, 1, 1]] @@ -912,7 +920,7 @@ def test_getitem_single_larray_key_guess(): # a1 3 4 5 # 1) key with extra axis - key = LArray(['a0', 'a1', 'a1', 'a0'], c) + key = Array(['a0', 'a1', 'a1', 'a0'], c) # replace the target axis by the extra axis expected = from_string(r""" c\b b0 b1 b2 @@ -923,7 +931,7 @@ def test_getitem_single_larray_key_guess(): assert_array_equal(arr[key], expected) # 2) key with the target axis (the one being replaced) - key = LArray(['b1', 'b0', 'b2'], b) + key = Array(['b1', 'b0', 'b2'], b) # axis stays the same but data should be flipped/shuffled expected = from_string(r""" a\b b0 b1 b2 @@ -932,7 +940,7 @@ def test_getitem_single_larray_key_guess(): assert_array_equal(arr[key], expected) # 2bis) key with part of the target axis (the one being replaced) - key = LArray(['b2', 'b1'], 'b=b0,b1') + key = Array(['b2', 'b1'], 'b=b0,b1') expected = from_string(r""" a\b b0 b1 a0 2 1 @@ -940,7 +948,7 @@ def test_getitem_single_larray_key_guess(): assert_array_equal(arr[key], expected) # 3) key with another existing axis (not the target axis) - key = LArray(['a0', 'a1', 'a0'], b) + key = Array(['a0', 'a1', 'a0'], b) expected = from_string(""" b b0 b1 b2 \t 0 4 2""") @@ -948,7 +956,7 @@ def test_getitem_single_larray_key_guess(): # TODO: this does not work yet but should be much easier to implement with "align" in make_np_broadcastable # 3bis) key with *part* of another existing axis (not the target axis) - # key = LArray(['a1', 'a0'], 'b=b0,b1') + # key = Array(['a1', 'a0'], 'b=b0,b1') # expected = from_string(""" # b b0 b1 # \t 3 1""") @@ -1074,7 +1082,7 @@ def test_getitem_ndarray_key_guess(array): keys = ['P04', 'P01', 'P03', 'P02'] key = np.array(keys) res = array[key] - assert isinstance(res, LArray) + assert isinstance(res, Array) assert res.axes == array.axes.replace(X.lipro, Axis(keys, 'lipro')) assert_array_equal(res, raw[:, :, :, [3, 0, 2, 1]]) @@ -1087,7 +1095,7 @@ def test_getitem_int_larray_key_guess(): e = Axis([8, 9, 10, 11], 'e') arr = ndtest([c, d, e]) - key = LArray([[8, 9], [10, 11]], [a, b]) + key = Array([[8, 9], [10, 11]], [a, b]) assert arr[key].axes == [c, d, a, b] @@ -1125,7 +1133,7 @@ def test_getitem_empty_tuple(): assert_array_equal(res, arr) assert res is not arr - z = LArray(0) + z = Array(0) res = z[()] assert res == z assert res is not z @@ -1224,7 +1232,7 @@ def test_points_indexer_setitem(): def test_setitem_larray(array, small_array): """ - tests LArray.__setitem__(key, value) where value is an LArray + tests Array.__setitem__(key, value) where value is an Array """ age, geo, sex, lipro = array.axes @@ -1252,7 +1260,7 @@ def test_setitem_larray(array, small_array): raw_value = raw[[1, 5, 9], np.newaxis] + 26.0 fake_axis = Axis(['label'], 'fake') age_axis = arr[ages1_5_9].axes.age - value = LArray(raw_value, axes=(age_axis, fake_axis, geo, sex, lipro)) + value = Array(raw_value, axes=(age_axis, fake_axis, geo, sex, lipro)) arr[ages1_5_9] = value raw[[1, 5, 9]] = raw[[1, 5, 9]] + 26.0 assert_array_equal(arr, raw) @@ -1325,12 +1333,12 @@ def test_setitem_larray(array, small_array): # 7) incompatible labels sex2 = Axis('sex=F,M') - la2 = LArray(small_array.data, axes=(sex2, lipro)) + la2 = Array(small_array.data, axes=(sex2, lipro)) with pytest.raises(ValueError, match="incompatible axes:"): arr[:] = la2 - # key has multiple LArrays (this is used within .points indexing) - # =============================================================== + # key has multiple Arrays (this is used within .points indexing) + # ============================================================== # first some setup a = Axis(['a0', 'a1'], None) b = Axis(['b0', 'b1', 'b2'], None) @@ -1339,8 +1347,8 @@ def test_setitem_larray(array, small_array): # a) with anonymous axes combined_axis = value.axes[0] - a_key = LArray([0, 0, 0, 1, 1, 1], combined_axis) - b_key = LArray([0, 1, 2, 0, 1, 2], combined_axis) + a_key = Array([0, 0, 0, 1, 1, 1], combined_axis) + b_key = Array([0, 1, 2, 0, 1, 2], combined_axis) key = (a.i[a_key], b.i[b_key]) array = empty((a, b)) array[key] = value @@ -1348,8 +1356,8 @@ def test_setitem_larray(array, small_array): # b) with wildcard combined_axis wild_combined_axis = combined_axis.ignore_labels() - wild_a_key = LArray([0, 0, 0, 1, 1, 1], wild_combined_axis) - wild_b_key = LArray([0, 1, 2, 0, 1, 2], wild_combined_axis) + wild_a_key = Array([0, 0, 0, 1, 1, 1], wild_combined_axis) + wild_b_key = Array([0, 1, 2, 0, 1, 2], wild_combined_axis) wild_key = (a.i[wild_a_key], b.i[wild_b_key]) array = empty((a, b)) array[wild_key] = value @@ -1369,7 +1377,7 @@ def test_setitem_larray(array, small_array): def test_setitem_ndarray(array): """ - tests LArray.__setitem__(key, value) where value is a raw ndarray. + tests Array.__setitem__(key, value) where value is a raw ndarray. In that case, value.shape is more restricted as we rely on numpy broadcasting. """ # a) value has exactly the same shape as the target slice @@ -1391,7 +1399,7 @@ def test_setitem_ndarray(array): def test_setitem_scalar(array): """ - tests LArray.__setitem__(key, value) where value is a scalar + tests Array.__setitem__(key, value) where value is a scalar """ # a) list key (one dimension) arr = array.copy() @@ -1412,7 +1420,7 @@ def test_setitem_bool_array_key(array): # XXX: this test is awfully slow (more than 1s) age, geo, sex, lipro = array.axes - # LArray key + # Array key # a1) same shape, same order arr = array.copy() raw = array.data.copy() @@ -1437,7 +1445,7 @@ def test_setitem_bool_array_key(array): # raw[raw[:, :, [1]] < 5] = 0 # assert_array_equal(arr, raw) - # c) LArray-broadcastable shape (missing axis) + # c) Array-broadcastable shape (missing axis) arr = array.copy() raw = array.data.copy() key = arr[sex['M']] < 5 @@ -1456,7 +1464,7 @@ def test_setitem_bool_array_key(array): raw[raw < 5] = 0 assert_array_equal(arr, raw) - # d) LArray with extra axes + # d) Array with extra axes arr = array.copy() key = (arr < 5).expand([Axis(2, 'extra')]) assert key.ndim == 5 @@ -1486,7 +1494,7 @@ def test_set(array): raw_value = raw[[1, 5, 9], np.newaxis] + 26.0 fake_axis = Axis(['label'], 'fake') age_axis = arr[ages1_5_9].axes.age - value = LArray(raw_value, axes=(age_axis, fake_axis, geo, sex, lipro)) + value = Array(raw_value, axes=(age_axis, fake_axis, geo, sex, lipro)) arr.set(value, age=ages1_5_9) raw[[1, 5, 9]] = raw[[1, 5, 9]] + 26.0 assert_array_equal(arr, raw) @@ -2092,7 +2100,7 @@ def test_group_agg_anonymous_axis(): def test_group_agg_zero_padded_label(): arr = ndtest("a=01,02,03,10,11; b=b0..b2") - expected = LArray([36, 30, 39], "a=01_03,10,11") + expected = Array([36, 30, 39], "a=01_03,10,11") assert_array_equal(arr.sum("01,02,03 >> 01_03; 10; 11", "b"), expected) @@ -2619,7 +2627,7 @@ def test_binary_ops(small_array): assert_array_equal(30 / (small_array + 1), 30 / (raw + 1)) raw_int = raw.astype(int) - la_int = LArray(raw_int, axes=(sex, lipro)) + la_int = Array(raw_int, axes=(sex, lipro)) assert_array_equal(la_int / 2, raw_int / 2) assert_array_equal(la_int // 2, raw_int // 2) @@ -2677,7 +2685,7 @@ def test_binary_ops_no_name_axes(small_array): assert_array_equal(30 / (la + 1), 30 / (raw + 1)) raw_int = raw.astype(int) - la_int = LArray(raw_int) + la_int = Array(raw_int) assert_array_equal(la_int / 2, raw_int / 2) assert_array_equal(la_int // 2, raw_int // 2) @@ -2769,21 +2777,21 @@ def test_sequence(): def test_sort_values(): # 1D arrays - arr = LArray([0, 1, 6, 3, -1], "a=a0..a4") + arr = Array([0, 1, 6, 3, -1], "a=a0..a4") res = arr.sort_values() - expected = LArray([-1, 0, 1, 3, 6], "a=a4,a0,a1,a3,a2") + expected = Array([-1, 0, 1, 3, 6], "a=a4,a0,a1,a3,a2") assert_array_equal(res, expected) # ascending arg res = arr.sort_values(ascending=False) - expected = LArray([6, 3, 1, 0, -1], "a=a2,a3,a1,a0,a4") + expected = Array([6, 3, 1, 0, -1], "a=a2,a3,a1,a0,a4") assert_array_equal(res, expected) # 3D arrays - arr = LArray([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]], + arr = Array([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]], 'a=a0,a1; b=b0,b1; c=c0..c2') res = arr.sort_values(axis='c') - expected = LArray([[[2, 4, 10], [1, 3, 7]], [[1, 5, 6], [2, 8, 9]]], - [Axis('a=a0,a1'), Axis('b=b0,b1'), Axis(3, 'c')]) + expected = Array([[[2, 4, 10], [1, 3, 7]], [[1, 5, 6], [2, 8, 9]]], + [Axis('a=a0,a1'), Axis('b=b0,b1'), Axis(3, 'c')]) assert_array_equal(res, expected) @@ -2797,12 +2805,12 @@ def test_set_axes(small_array): lipro2 = Axis([l.replace('P', 'Q') for l in lipro.labels], 'lipro2') sex2 = Axis(['Man', 'Woman'], 'sex2') - la = LArray(small_array.data, axes=(sex, lipro2)) + la = Array(small_array.data, axes=(sex, lipro2)) # replace one axis la2 = small_array.set_axes(X.lipro, lipro2) assert_array_equal(la, la2) - la = LArray(small_array.data, axes=(sex2, lipro2)) + la = Array(small_array.data, axes=(sex2, lipro2)) # all at once la2 = small_array.set_axes([sex2, lipro2]) assert_array_equal(la, la2) @@ -2830,7 +2838,7 @@ def test_reindex(): a0 1 -1 0 a1 3 -1 2""")) - # LArray fill value + # Array fill value filler = ndtest(arr.a) res = arr.reindex(X.b, ['b1', 'b2', 'b0'], fill_value=filler) assert_array_equal(res, from_string("""a\\b b1 b2 b0 @@ -2981,7 +2989,7 @@ def test_insert(): def test_drop(): arr1 = ndtest(3) - expected = LArray([0, 2], 'a=a0,a2') + expected = Array([0, 2], 'a=a0,a2') # indices res = arr1.drop('a.i[1]') @@ -3047,8 +3055,8 @@ def test_shift_axis(small_array): # TODO: check how awful the syntax is with an axis that is not last # or first - l2 = LArray(small_array[:, :'P14'], axes=[sex, Axis(lipro.labels[1:], 'lipro')]) - l2 = LArray(small_array[:, :'P14'], axes=[sex, lipro.subaxis(slice(1, None))]) + l2 = Array(small_array[:, :'P14'], axes=[sex, Axis(lipro.labels[1:], 'lipro')]) + l2 = Array(small_array[:, :'P14'], axes=[sex, lipro.subaxis(slice(1, None))]) # We can also modify the axis in-place (dangerous!) # lipro.labels = np.append(lipro.labels[1:], lipro.labels[0]) @@ -3057,9 +3065,9 @@ def test_shift_axis(small_array): def test_unique(): - arr = LArray([[[0, 2, 0, 0], - [1, 1, 1, 0]], - [[0, 2, 0, 0], + arr = Array([[[0, 2, 0, 0], + [1, 1, 1, 0]], + [[0, 2, 0, 0], [2, 1, 2, 0]]], 'a=a0,a1;b=b0,b1;c=c0..c3') assert_array_equal(arr.unique('a'), arr) assert_array_equal(arr.unique('b'), arr) @@ -3457,7 +3465,7 @@ def test_from_series(): def test_from_frame(): # 1) data = scalar # ================ - # Dataframe becomes 1D LArray + # Dataframe becomes 1D Array data = np.array([10]) index = ['i0'] columns = ['c0'] @@ -3474,8 +3482,8 @@ def test_from_frame(): # ---------------- # c0 # i0 10 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\{1} c0 # i0 10 la = from_frame(df) @@ -3484,7 +3492,7 @@ def test_from_frame(): assert la.axes.names == [None, None] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, 1)), [axis_index, axis_columns]) + expected_la = Array(data.reshape((1, 1)), [axis_index, axis_columns]) assert_array_equal(la, expected_la) # anonymous columns @@ -3493,8 +3501,8 @@ def test_from_frame(): # c0 # index # i0 10 - # output LArray: - # -------------- + # output Array: + # ------------- # index\{1} c0 # i0 10 df.index.name, df.columns.name = 'index', None @@ -3504,7 +3512,7 @@ def test_from_frame(): assert la.axes.names == ['index', None] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns]) + expected_la = Array(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns]) assert_array_equal(la, expected_la) # anonymous columns/non string row axis name @@ -3513,8 +3521,8 @@ def test_from_frame(): # c0 # 0 # i0 10 - # output LArray: - # -------------- + # output Array: + # ------------- # 0\{1} c0 # i0 10 df = pd.DataFrame([10], index=pd.Index(['i0'], name=0), columns=['c0']) @@ -3524,15 +3532,15 @@ def test_from_frame(): assert res.axes.names == ['0', None] assert list(res.axes[0].labels) == ['i0'] assert list(res.axes[1].labels) == ['c0'] - assert_array_equal(res, LArray([[10]], "0=i0;c0,")) + assert_array_equal(res, Array([[10]], "0=i0;c0,")) # anonymous index # input dataframe: # ---------------- # columns c0 # i0 10 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\columns c0 # i0 10 df.index.name, df.columns.name = None, 'columns' @@ -3542,7 +3550,7 @@ def test_from_frame(): assert la.axes.names == [None, 'columns'] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, 1)), [axis_index, axis_columns.rename('columns')]) + expected_la = Array(data.reshape((1, 1)), [axis_index, axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # index and columns with name @@ -3551,8 +3559,8 @@ def test_from_frame(): # columns c0 # index # i0 10 - # output LArray: - # -------------- + # output Array: + # ------------- # index\columns c0 # i0 10 df.index.name, df.columns.name = 'index', 'columns' @@ -3562,7 +3570,7 @@ def test_from_frame(): assert la.axes.names == ['index', 'columns'] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns.rename('columns')]) + expected_la = Array(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # 2) data = vector @@ -3571,7 +3579,7 @@ def test_from_frame(): # 2A) data = horizontal vector (1 x N) # ==================================== - # Dataframe becomes 1D LArray + # Dataframe becomes 1D Array data = np.arange(size) indexes = ['i0'] columns = ['c{}'.format(i) for i in range(size)] @@ -3588,8 +3596,8 @@ def test_from_frame(): # ---------------- # c0 c1 c2 # i0 0 1 2 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\{1} c0 c1 c2 # i0 0 1 2 la = from_frame(df) @@ -3598,7 +3606,7 @@ def test_from_frame(): assert la.axes.names == [None, None] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, size)), [axis_index, axis_columns]) + expected_la = Array(data.reshape((1, size)), [axis_index, axis_columns]) assert_array_equal(la, expected_la) # anonymous columns @@ -3607,8 +3615,8 @@ def test_from_frame(): # c0 c1 c2 # index # i0 0 1 2 - # output LArray: - # -------------- + # output Array: + # ------------- # index\{1} c0 c1 c2 # i0 0 1 2 df.index.name, df.columns.name = 'index', None @@ -3618,7 +3626,7 @@ def test_from_frame(): assert la.axes.names == ['index', None] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, size)), [axis_index.rename('index'), axis_columns]) + expected_la = Array(data.reshape((1, size)), [axis_index.rename('index'), axis_columns]) assert_array_equal(la, expected_la) # anonymous index @@ -3626,8 +3634,8 @@ def test_from_frame(): # ---------------- # columns c0 c1 c2 # i0 0 1 2 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\columns c0 c1 c2 # i0 0 1 2 df.index.name, df.columns.name = None, 'columns' @@ -3637,7 +3645,7 @@ def test_from_frame(): assert la.axes.names == [None, 'columns'] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, size)), [axis_index, axis_columns.rename('columns')]) + expected_la = Array(data.reshape((1, size)), [axis_index, axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # index and columns with name @@ -3646,8 +3654,8 @@ def test_from_frame(): # columns c0 c1 c2 # index # i0 0 1 2 - # output LArray: - # -------------- + # output Array: + # ------------- # index\columns c0 c1 c2 # i0 0 1 2 df.index.name, df.columns.name = 'index', 'columns' @@ -3657,12 +3665,12 @@ def test_from_frame(): assert la.axes.names == ['index', 'columns'] assert list(la.axes.labels[0]) == index assert list(la.axes.labels[1]) == columns - expected_la = LArray(data.reshape((1, size)), [axis_index.rename('index'), axis_columns.rename('columns')]) + expected_la = Array(data.reshape((1, size)), [axis_index.rename('index'), axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # 2B) data = vertical vector (N x 1) # ================================== - # Dataframe becomes 2D LArray + # Dataframe becomes 2D Array data = data.reshape(size, 1) indexes = ['i{}'.format(i) for i in range(size)] columns = ['c0'] @@ -3681,8 +3689,8 @@ def test_from_frame(): # i0 0 # i1 1 # i2 2 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\{1} c0 # i0 0 # i1 1 @@ -3693,7 +3701,7 @@ def test_from_frame(): assert la.axes.names == [None, None] assert list(la.axes.labels[0]) == indexes assert list(la.axes.labels[1]) == columns - expected_la = LArray(data, [axis_index, axis_columns]) + expected_la = Array(data, [axis_index, axis_columns]) assert_array_equal(la, expected_la) # anonymous columns @@ -3704,8 +3712,8 @@ def test_from_frame(): # i0 0 # i1 1 # i2 2 - # output LArray: - # -------------- + # output Array: + # ------------- # index\{1} c0 # i0 0 # i1 1 @@ -3717,7 +3725,7 @@ def test_from_frame(): assert la.axes.names == ['index', None] assert list(la.axes.labels[0]) == indexes assert list(la.axes.labels[1]) == columns - expected_la = LArray(data, [axis_index.rename('index'), axis_columns]) + expected_la = Array(data, [axis_index.rename('index'), axis_columns]) assert_array_equal(la, expected_la) # anonymous index @@ -3727,8 +3735,8 @@ def test_from_frame(): # i0 0 # i1 1 # i2 2 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\columns c0 # i0 0 # i1 1 @@ -3740,7 +3748,7 @@ def test_from_frame(): assert la.axes.names == [None, 'columns'] assert list(la.axes.labels[0]) == indexes assert list(la.axes.labels[1]) == columns - expected_la = LArray(data, [axis_index, axis_columns.rename('columns')]) + expected_la = Array(data, [axis_index, axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # index and columns with name @@ -3751,8 +3759,8 @@ def test_from_frame(): # i0 0 # i1 1 # i2 2 - # output LArray: - # -------------- + # output Array: + # ------------- # {0}\columns c0 # i0 0 # i1 1 @@ -3763,7 +3771,7 @@ def test_from_frame(): assert la.axes.names == [None, 'columns'] assert list(la.axes.labels[0]) == indexes assert list(la.axes.labels[1]) == columns - expected_la = LArray(data, [axis_index, axis_columns.rename('columns')]) + expected_la = Array(data, [axis_index, axis_columns.rename('columns')]) assert_array_equal(la, expected_la) # 3) 3D array @@ -4237,8 +4245,8 @@ def test_open_excel(tmpdir): assert_array_equal(res, a1.data) # Sheet1/A1(transposed) - # FIXME: we need to .dump(header=False) explicitly because otherwise we go via LArrayConverter which - # includes labels. for consistency's sake we should either change LArrayConverter to not include + # FIXME: we need to .dump(header=False) explicitly because otherwise we go via ArrayConverter which + # includes labels. for consistency's sake we should either change ArrayConverter to not include # labels, or change wb[0] = a1 to include them (and use wb[0] = a1.data to avoid them?) but that # would be heavily backward incompatible and how would I load them back? # wb[0]['A1'].options(transpose=True).value = a1 @@ -4459,7 +4467,7 @@ def test_matmul(): # Note that we cannot use @ because that is an invalid syntax in Python 2 - # LArray value + # Array value assert_array_equal(a1.__matmul__(a2), ndtest([Axis(3), Axis(3)]) * 2) # ndarray value @@ -4476,11 +4484,11 @@ def test_matmul(): # 1D @ 2D assert_array_equal(arr1d.__matmul__(arr2d), - LArray([15, 18, 21], 'b=b0..b2')) + Array([15, 18, 21], 'b=b0..b2')) # 2D @ 1D assert_array_equal(arr2d.__matmul__(arr1d), - LArray([5, 14, 23], 'a=a0..a2')) + Array([5, 14, 23], 'a=a0..a2')) # 2D(a,b) @ 2D(a,b) -> 2D(a,b) res = from_lists([['a\\b', 'b0', 'b1', 'b2'], @@ -4498,7 +4506,7 @@ def test_matmul(): # ndarray value assert_array_equal(arr1d.__matmul__(arr2d.data), - LArray([15, 18, 21])) + Array([15, 18, 21])) assert_array_equal(arr2d.data.__matmul__(arr2d.T.data), res.data) @@ -4617,7 +4625,7 @@ def test_rmatmul(): # equivalent to a1.data @ a2 res = a2.__rmatmul__(a1.data) - assert isinstance(res, LArray) + assert isinstance(res, Array) assert_array_equal(res, ndtest([Axis(3), Axis(3)]) * 2) @@ -4664,7 +4672,7 @@ def test_plot(): # tick_v = np.random.randint(ord('a'), ord('z'), size=1000) # ticks = [chr(c) for c in tick_v] # large_axis = Axis('large', ticks) - # large = LArray(large_data, axes=[large_axis]) + # large = Array(large_data, axes=[large_axis]) # large.plot() # large.hist() @@ -4871,9 +4879,9 @@ def test_stack(): arr1 = ndtest(a, start=-1) res = stack((arr0, arr1), b) - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [a, b]) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [a, b]) assert_array_equal(res, expected) # same but using a group as the stacking axis @@ -4886,24 +4894,24 @@ def test_stack(): arr0 = ndtest(axis0) arr1 = ndtest(axis0, start=-1) res = stack((arr0, arr1), b) - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [axis0, b]) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [axis0, b]) assert_array_equal(res, expected) # using res_axes res = stack({'b0': 0, 'b1': 1}, axes=b, res_axes=(a, b)) - expected = LArray([[0, 1], - [0, 1], - [0, 1]], [a, b]) + expected = Array([[0, 1], + [0, 1], + [0, 1]], [a, b]) assert_array_equal(res, expected) - # giving elements as on LArray containing LArrays + # giving elements as on Array containing Arrays sex = Axis('sex=M,F') # not using the same length for nat and type, otherwise numpy gets confused :( arr1 = ones('nat=BE, FO') arr2 = zeros('type=1..3') - array_of_arrays = LArray([arr1, arr2], sex) + array_of_arrays = Array([arr1, arr2], sex) res = stack(array_of_arrays, sex) expected = from_string(r"""nat type\sex M F BE 1 1.0 0.0 @@ -4914,11 +4922,11 @@ def test_stack(): FO 3 1.0 0.0""") assert_array_equal(res, expected) - # non scalar/non LArray + # non scalar/non Array res = stack(([1, 2, 3], [4, 5, 6])) - expected = LArray([[1, 4], - [2, 5], - [3, 6]]) + expected = Array([[1, 4], + [2, 5], + [3, 6]]) assert_array_equal(res, expected) # stack along multiple axes @@ -4976,12 +4984,12 @@ def test_stack_kwargs_no_axis_labels(): # ---------------- # a) with an axis name res = stack(a0=0, a1=1, axes='a') - expected = LArray([0, 1], 'a=a0,a1') + expected = Array([0, 1], 'a=a0,a1') assert_array_equal(res, expected) # b) without an axis name res = stack(a0=0, a1=1) - expected = LArray([0, 1], 'a0,a1') + expected = Array([0, 1], 'a0,a1') assert_array_equal(res, expected) # 2) dict of arrays @@ -4992,16 +5000,16 @@ def test_stack_kwargs_no_axis_labels(): # a) with an axis name res = stack(b0=arr0, b1=arr1, axes='b') - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [a, 'b=b0,b1']) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [a, 'b=b0,b1']) assert_array_equal(res, expected) # b) without an axis name res = stack(b0=arr0, b1=arr1) - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [a, 'b0,b1']) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [a, 'b0,b1']) assert_array_equal(res, expected) @@ -5013,12 +5021,12 @@ def test_stack_dict_no_axis_labels(): # ------------------ # a) with an axis name res = stack({'a0': 0, 'a1': 1}, 'a') - expected = LArray([0, 1], 'a=a0,a1') + expected = Array([0, 1], 'a=a0,a1') assert_array_equal(res, expected) # b) without an axis name res = stack({'a0': 0, 'a1': 1}) - expected = LArray([0, 1], 'a0,a1') + expected = Array([0, 1], 'a0,a1') assert_array_equal(res, expected) # 2) dict of arrays @@ -5029,26 +5037,26 @@ def test_stack_dict_no_axis_labels(): # a) with an axis name res = stack({'b0': arr0, 'b1': arr1}, 'b') - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [a, 'b=b0,b1']) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [a, 'b=b0,b1']) assert_array_equal(res, expected) # b) without an axis name res = stack({'b0': arr0, 'b1': arr1}) - expected = LArray([[0, -1], - [1, 0], - [2, 1]], [a, 'b0,b1']) + expected = Array([[0, -1], + [1, 0], + [2, 1]], [a, 'b0,b1']) assert_array_equal(res, expected) def test_0darray_convert(): - int_arr = LArray(1) + int_arr = Array(1) assert int(int_arr) == 1 assert float(int_arr) == 1.0 assert int_arr.__index__() == 1 - float_arr = LArray(1.0) + float_arr = Array(1.0) assert int(float_arr) == 1 assert float(float_arr) == 1.0 with pytest.raises(TypeError) as e_info: diff --git a/larray/tests/test_excel.py b/larray/tests/test_excel.py index c5ec86221..9f56cd30b 100644 --- a/larray/tests/test_excel.py +++ b/larray/tests/test_excel.py @@ -7,7 +7,7 @@ import numpy as np from larray.tests.common import needs_xlwings, TESTDATADIR -from larray import ndtest, open_excel, aslarray, Axis, nan, ExcelReport +from larray import ndtest, open_excel, asarray, Axis, nan, ExcelReport from larray.inout import xw_excel from larray.example import load_example_data, EXAMPLE_EXCEL_TEMPLATES_DIR @@ -221,18 +221,18 @@ def test_asarray(self): assert np.array_equal(res1, arr1.data) assert res1.dtype == arr1.dtype - def test_aslarray(self): + def test_asarray(self): with open_excel(visible=False) as wb: sheet = wb[0] arr1 = ndtest([Axis(2), Axis(3)]) # no header so that we have an uniform dtype for the whole sheet sheet['A1'] = arr1 - res1 = aslarray(sheet['A1:C2']) + res1 = asarray(sheet['A1:C2']) assert res1.equals(arr1) assert res1.dtype == arr1.dtype - # this tests Range.__getattr__ with an LArray attribute + # this tests Range.__getattr__ with an Array attribute def test_aggregate(self): with open_excel(visible=False) as wb: sheet = wb[0] diff --git a/larray/tests/test_ipfp.py b/larray/tests/test_ipfp.py index 97f91a51d..2eeb75990 100644 --- a/larray/tests/test_ipfp.py +++ b/larray/tests/test_ipfp.py @@ -2,13 +2,13 @@ import pytest from larray.tests.common import assert_array_equal -from larray import Axis, LArray, ndtest, ipfp, X +from larray import Axis, Array, ndtest, ipfp, X def test_ipfp(): a = Axis('a=a0,a1') b = Axis('b=b0,b1') - initial = LArray([[2, 1], [1, 2]], [a, b]) + initial = Array([[2, 1], [1, 2]], [a, b]) # array sums already match target sums # [3, 3], [3, 3] @@ -16,8 +16,8 @@ def test_ipfp(): assert_array_equal(r, initial) # array sums do not match target sums (ie the usual case) - along_a = LArray([2, 1], b) - along_b = LArray([1, 2], a) + along_a = Array([2, 1], b) + along_b = Array([1, 2], a) r = ipfp([along_a, along_b], initial) assert_array_equal(r, [[0.8, 0.2], [1.0, 1.0]]) @@ -33,31 +33,31 @@ def test_ipfp(): ipfp([along_b, along_a], initial) # different target sums totals - along_a = LArray([2, 1], b) - along_b = LArray([1, 3], a) + along_a = Array([2, 1], b) + along_b = Array([1, 3], a) with pytest.raises(ValueError, match=r"target sum along b \(axis 1\) is different than target sum along " r"a \(axis 0\): 4 vs 3"): ipfp([along_a, along_b], initial) # all zero values - initial = LArray([[0, 0], [1, 2]], [a, b]) - along_a = LArray([2, 1], b) - along_b = LArray([1, 2], a) + initial = Array([[0, 0], [1, 2]], [a, b]) + along_a = Array([2, 1], b) + along_b = Array([1, 2], a) with pytest.raises(ValueError, match="found all zero values sum along b \\(axis 1\\) but non zero target " "sum:\na0: 1"): ipfp([along_a, along_b], initial) # zero target sum - initial = LArray([[2, 1], [1, 2]], [a, b]) - along_a = LArray([0, 1], b) - along_b = LArray([1, 0], a) + initial = Array([[2, 1], [1, 2]], [a, b]) + along_a = Array([0, 1], b) + along_b = Array([1, 0], a) with pytest.raises(ValueError, match="found Non Zero Values but Zero target Sum \\(nzvzs\\) along a " "\\(axis 0\\), use nzvzs='warn' or 'fix' to set them to zero " "automatically:\nb0: 3"): ipfp([along_a, along_b], initial) # negative initial values - initial = LArray([[2, -1], [1, 2]], [a, b]) + initial = Array([[2, -1], [1, 2]], [a, b]) with pytest.raises(ValueError, match="negative value\\(s\\) found:\na0_b1: -1"): ipfp([along_a, along_b], initial) @@ -150,8 +150,8 @@ def test_ipfp_no_values(): [2.0, 4.0, 6.0], [3.0, 6.0, 9.0]]) - along_a = LArray([2, 1], Axis(2, 'b')) - along_b = LArray([1, 2], Axis(2, 'a')) + along_a = Array([2, 1], Axis(2, 'b')) + along_b = Array([1, 2], Axis(2, 'a')) r = ipfp([along_a, along_b]) assert_array_equal(r, [[2 / 3, 1 / 3], [4 / 3, 2 / 3]]) @@ -169,7 +169,7 @@ def test_ipfp_no_values_no_name(): def test_ipfp_no_name(): - initial = LArray([[2, 1], [1, 2]]) + initial = Array([[2, 1], [1, 2]]) # sums already correct # [3, 3], [3, 3] @@ -177,8 +177,8 @@ def test_ipfp_no_name(): assert_array_equal(r, [[2, 1], [1, 2]]) # different sums (ie the usual case) - along_a = LArray([2, 1]) - along_b = LArray([1, 2]) + along_a = Array([2, 1]) + along_b = Array([1, 2]) r = ipfp([along_a, along_b], initial) assert_array_equal(r, [[0.8, 0.2], [1.0, 1.0]]) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index 8b73ae5c9..d4d70bda3 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -8,13 +8,13 @@ import pytest from larray.tests.common import assert_array_nan_equal, inputpath, tmp_path, meta, needs_xlwings -from larray import (Session, Axis, LArray, Group, isnan, zeros_like, ndtest, ones_like, ones, full, +from larray import (Session, Axis, Array, Group, isnan, zeros_like, ndtest, ones_like, ones, full, local_arrays, global_arrays, arrays) from larray.util.misc import pickle def equal(o1, o2): - if isinstance(o1, LArray) or isinstance(o2, LArray): + if isinstance(o1, Array) or isinstance(o2, Array): return o1.equals(o2) elif isinstance(o1, Axis) or isinstance(o2, Axis): return o1.equals(o2) @@ -87,7 +87,7 @@ def test_getitem_list(session): def test_getitem_larray(session): - s1 = session.filter(kind=LArray) + s1 = session.filter(kind=Array) s2 = Session({'e': e + 1, 'f': f}) res_eq = s1[s1.element_equals(s2)] res_neq = s1[~(s1.element_equals(s2))] @@ -140,7 +140,7 @@ def test_filter(session): assert list(session.filter(kind=Axis)) == [b, a] assert list(session.filter('a01', Group)) == [a01] assert list(session.filter(kind=Group)) == [b12, a01] - assertObjListEqual(session.filter(kind=LArray), [e, g, f]) + assertObjListEqual(session.filter(kind=Array), [e, g, f]) assert list(session.filter(kind=dict)) == [{}] assert list(session.filter(kind=(Axis, Group))) == [b, b12, a, a01] @@ -161,7 +161,7 @@ def test_h5_io(tmpdir, session, meta): s = Session() s.load(fpath) # HDF does *not* keep ordering (ie, keys are always sorted + - # read Axis objects, then Groups objects and finally LArray objects) + # read Axis objects, then Groups objects and finally Array objects) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta @@ -260,7 +260,7 @@ def test_csv_io(tmpdir, session, meta): s = Session() s.load(fpath, engine='pandas_csv') # CSV cannot keep ordering (so we always sort keys) - # Also, Axis objects are read first, then Groups objects and finally LArray objects + # Also, Axis objects are read first, then Groups objects and finally Array objects assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta @@ -361,7 +361,7 @@ def test_to_globals(session): def test_element_equals(session): - sess = session.filter(kind=(Axis, Group, LArray)) + sess = session.filter(kind=(Axis, Group, Array)) expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('g', g), ('f', f)]) assert all(sess.element_equals(expected)) @@ -383,44 +383,44 @@ def test_element_equals(session): def test_eq(session): - sess = session.filter(kind=(Axis, Group, LArray)) + sess = session.filter(kind=(Axis, Group, Array)) expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('g', g), ('f', f)]) - assert all([item.all() if isinstance(item, LArray) else item + assert all([item.all() if isinstance(item, Array) else item for item in (sess == expected).values()]) other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) res = sess == other assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] - assert [item.all() if isinstance(item, LArray) else item + assert [item.all() if isinstance(item, Array) else item for item in res.values()] == [True, True, True, True, True, False, True] e2 = e.copy() e2.i[1, 1] = 42 other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) res = sess == other - assert [item.all() if isinstance(item, LArray) else item + assert [item.all() if isinstance(item, Array) else item for item in res.values()] == [True, True, True, True, False, False, True] def test_ne(session): - sess = session.filter(kind=(Axis, Group, LArray)) + sess = session.filter(kind=(Axis, Group, Array)) expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('g', g), ('f', f)]) - assert ([(~item).all() if isinstance(item, LArray) else not item + assert ([(~item).all() if isinstance(item, Array) else not item for item in (sess != expected).values()]) other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) res = sess != other assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] - assert [(~item).all() if isinstance(item, LArray) else not item + assert [(~item).all() if isinstance(item, Array) else not item for item in res.values()] == [True, True, True, True, True, False, True] e2 = e.copy() e2.i[1, 1] = 42 other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) res = sess != other - assert [(~item).all() if isinstance(item, LArray) else not item + assert [(~item).all() if isinstance(item, Array) else not item for item in res.values()] == [True, True, True, True, False, False, True] @@ -446,7 +446,7 @@ def test_sub(session): assert diff.a01 is a01 assert diff.c is c - # session - dict(LArray and scalar) + # session - dict(Array and scalar) other = {'e': ones_like(e), 'f': 1} diff = sess - other assert_array_nan_equal(diff['e'], e - ones_like(e)) @@ -481,7 +481,7 @@ def test_rsub(session): assert diff.a01 is a01 assert diff.c is c - # dict(LArray and scalar) - session + # dict(Array and scalar) - session other = {'e': ones_like(e), 'f': 1} diff = other - sess assert_array_nan_equal(diff['e'], ones_like(e) - e) @@ -523,7 +523,7 @@ def test_rdiv(session): assert res.a01 is a01 assert res.c is c - # dict(LArray and scalar) - session + # dict(Array and scalar) - session other = {'e': e, 'f': f} res = other / sess assert_array_nan_equal(res['e'], e / e) @@ -534,7 +534,7 @@ def test_rdiv(session): def test_pickle_roundtrip(session, meta): - original = session.filter(kind=LArray) + original = session.filter(kind=Array) original.meta = meta s = pickle.dumps(original) res = pickle.loads(s) diff --git a/larray/viewer/__init__.py b/larray/viewer/__init__.py index e9aec4229..f9b0f1f98 100644 --- a/larray/viewer/__init__.py +++ b/larray/viewer/__init__.py @@ -7,7 +7,7 @@ def view(obj=None, title='', depth=0): Parameters ---------- - obj : np.ndarray, LArray, Session, dict or str, optional + obj : np.ndarray, Array, Session, dict or str, optional Object to visualize. If string, array(s) will be loaded from the file given as argument. Defaults to the collection of all local variables where the function was called. title : str, optional @@ -40,7 +40,7 @@ def edit(obj=None, title='', minvalue=None, maxvalue=None, readonly=False, depth Parameters ---------- - obj : np.ndarray, LArray, Session, dict, str or REOPEN_LAST_FILE, optional + obj : np.ndarray, Array, Session, dict, str or REOPEN_LAST_FILE, optional Object to visualize. If string, array(s) will be loaded from the file given as argument. Passing the constant REOPEN_LAST_FILE loads the last opened file. Defaults to the collection of all local variables where the function was called. @@ -81,7 +81,7 @@ def compare(*args, **kwargs): Parameters ---------- - *args : LArrays or Sessions + *args : Arrays or Sessions Arrays or sessions to compare. title : str, optional Title for the window. Defaults to ''. diff --git a/setup.cfg b/setup.cfg index 3aa0093e2..40813fc4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,12 +5,12 @@ test=pytest testpaths = larray # - exclude (doc)tests from ufuncs (because docstrings are copied from numpy # and many of those doctests are failing -# - deselect LArray.astype since doctests fails for Python 3.6 and numpy >= 1.17 +# - deselect Array.astype since doctests fails for Python 3.6 and numpy >= 1.17 addopts = -v --doctest-modules --ignore=larray/core/npufuncs.py --ignore=larray/ipfp --ignore=larray/inout/xw_reporting.py - --deselect larray/core/array.py::larray.core.array.LArray.astype + --deselect larray/core/array.py::larray.core.array.Array.astype --pep8 #--cov # E122: continuation line missing indentation or outdented