From 9ce56de06837c0481d65435e94e7824d607a9cc8 Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 13:56:06 -0400 Subject: [PATCH 1/6] Add type hints everywhere --- docs/change_log.rst | 4 + pyproject.toml | 26 +- raccoon/dataframe.py | 363 +++++++++------- raccoon/series.py | 283 +++++++----- raccoon/sort_utils.py | 8 +- raccoon/utils.py | 6 +- tests/test_dataframe/test_dataframe.py | 2 +- tests/test_dataframe/test_delete.py | 64 +-- tests/test_dataframe/test_get.py | 499 +++++++++++++--------- tests/test_dataframe/test_getters.py | 43 +- tests/test_dataframe/test_initialize.py | 64 +-- tests/test_dataframe/test_iterators.py | 52 +-- tests/test_dataframe/test_set.py | 373 +++++++++------- tests/test_dataframe/test_sort.py | 65 +-- tests/test_dataframe/test_utils.py | 18 +- tests/test_dataframe/test_validate.py | 34 +- tests/test_dropin/test_dataframe_blist.py | 118 ++--- tests/test_dropin/test_series_blist.py | 36 +- tests/test_series/test_delete.py | 28 +- tests/test_series/test_get.py | 36 +- tests/test_series/test_getters.py | 44 +- tests/test_series/test_initialize.py | 40 +- tests/test_series/test_series.py | 74 ++-- tests/test_series/test_set.py | 10 +- tests/test_series/test_sort.py | 8 +- tests/test_series/test_utils.py | 4 +- tests/test_series/test_view_series.py | 154 +++---- 27 files changed, 1395 insertions(+), 1061 deletions(-) diff --git a/docs/change_log.rst b/docs/change_log.rst index b535aa0..0eb6265 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -183,3 +183,7 @@ an installation requirement. - Small fixes to pyproject.toml - Remove travis-CI configs as it is no longer used - Merge coveragerc file into pyproject.toml + +3.2.0 (04/14/25) +~~~~~~~~~~~~~~~~ +- Add type hints diff --git a/pyproject.toml b/pyproject.toml index 03d5b2b..0257e88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "raccoon" -version = "3.1.1" +version = "3.2.0" authors = [ { name="Ryan Sheftel", email="rsheftel@alumni.upenn.edu" }, ] @@ -25,6 +25,9 @@ classifiers = [ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', ] dependencies = ['tabulate'] @@ -62,3 +65,24 @@ exclude_also = [ ] ignore_errors = true + +[tool.ruff] +line-length = 120 +target-version = "py312" + +[tool.ruff.lint] +extend-select = ['I'] # uncomment to use ruff import sorting +# ignore = ["E231", "W503", "E203", "W605", "E501"] + +#[tool.ruff.lint.isort] +#force-single-line = false + +[tool.black] +line-length = 120 + +[tool.isort] +profile = "black" +line_length = 88 +skip_gitignore = true +skip_glob = ["tests/data", "profiling"] +known_first_party = ["black", "blib2to3", "blackd", "_black_version"] diff --git a/raccoon/dataframe.py b/raccoon/dataframe.py index 5ceb846..d0b5d37 100644 --- a/raccoon/dataframe.py +++ b/raccoon/dataframe.py @@ -6,6 +6,7 @@ from bisect import bisect_left, bisect_right from collections import OrderedDict, namedtuple from itertools import compress +from typing import Any, Callable, Generator, Literal, Self from tabulate import tabulate @@ -22,17 +23,27 @@ class DataFrame(object): sort by index on construction, and then any addition of a new row will insert it into the DataFrame so that the index remains sort. """ - # Define slots to make object faster - __slots__ = ['_data', '_index', '_index_name', '_columns', '_sort', '_dropin'] - def __init__(self, data=None, columns=None, index=None, index_name='index', sort=None, dropin=None): + # Define slots to make object faster + __slots__ = ["_data", "_index", "_index_name", "_columns", "_sort", "_dropin"] + + def __init__( + self, + data: dict[Any, list] | None = None, + columns: list | None = None, + index: list | None = None, + index_name: str | tuple | None = "index", + sort: bool | None = None, + dropin: Callable = None, + ): """ :param data: (optional) dictionary of lists. The keys of the dictionary will be used for the column names and\ the lists will be used for the column data. :param columns: (optional) list of column names that will define the order :param index: (optional) list of index values. If None then the index will be integers starting with zero :param index_name: (optional) name for the index. Default is "index" - :param sort: if True then DataFrame will keep the index sort. If True all index values must be of same type + :param sort: if True then DataFrame will keep the index sort. If True all index values must be of same type. + If None then will default to True if no index is provided. :param dropin: if supplied the drop-in replacement for list that will be used """ # standard variable setup @@ -43,23 +54,24 @@ def __init__(self, data=None, columns=None, index=None, index_name='index', sort # quality checks if (index is not None) and not (self._check_list(index) or isinstance(index, list)): - raise TypeError('index must be a list. if dropin provided, must be of that type') + raise TypeError("index must be a list. if dropin provided, must be of that type") if (columns is not None) and not (self._check_list(columns) or isinstance(columns, list)): - raise TypeError('columns must be a list. if dropin provided, must be of that type') + raise TypeError("columns must be a list. if dropin provided, must be of that type") # define from dictionary if data is None: self._data = dropin() if dropin else list() if columns: # expand to the number of columns - self._data = dropin([dropin() for _ in range(len(columns))]) if dropin \ - else [[] for _ in range(len(columns))] + self._data = ( + dropin([dropin() for _ in range(len(columns))]) if dropin else [[] for _ in range(len(columns))] + ) self.columns = columns else: self.columns = list() if index: if not columns: - raise ValueError('cannot initialize with index but no columns') + raise ValueError("cannot initialize with index but no columns") # pad out to the number of rows self._pad_data(max_len=len(index)) self.index = index @@ -67,9 +79,13 @@ def __init__(self, data=None, columns=None, index=None, index_name='index', sort self.index = list() elif isinstance(data, dict): # set data from dict values. If dict value is not a list, wrap it to make a single element list - self._data = dropin([dropin(x) if ((type(x) == dropin) or (type(x) == list)) - else dropin([x]) for x in data.values()]) if dropin \ + self._data = ( + dropin( + [dropin(x) if ((type(x) == dropin) or (type(x) == list)) else dropin([x]) for x in data.values()] + ) + if dropin else [x if type(x) == list else [x] for x in data.values()] + ) # setup columns from directory keys self.columns = data.keys() # pad the data @@ -80,7 +96,7 @@ def __init__(self, data=None, columns=None, index=None, index_name='index', sort else: self.index = range(len(self._data[0])) else: - raise TypeError('Not valid data type.') + raise TypeError("Not valid data type.") # sort by columns if provided if columns: @@ -97,20 +113,24 @@ def __init__(self, data=None, columns=None, index=None, index_name='index', sort self.sort = True def __repr__(self): - return 'object id: %s\ncolumns:\n%s\ndata:\n%s\nindex:\n%s\n' % (id(self), self._columns, - self._data, self._index) - - def __str__(self): + return "object id: %s\ncolumns:\n%s\ndata:\n%s\nindex:\n%s\n" % ( + id(self), + self._columns, + self._data, + self._index, + ) + + def __str__(self) -> str: return self._make_table() - def _check_list(self, x): + def _check_list(self, x: list) -> bool: return type(x) == (self._dropin if self._dropin else list) - def _make_table(self, index=True, **kwargs): - kwargs['headers'] = 'keys' if 'headers' not in kwargs.keys() else kwargs['headers'] + def _make_table(self, index: bool = True, **kwargs) -> str: + kwargs["headers"] = "keys" if "headers" not in kwargs.keys() else kwargs["headers"] return tabulate(self.to_dict(ordered=True, index=index), **kwargs) - def print(self, index=True, **kwargs): + def print(self, index: bool = True, **kwargs) -> None: """ Print the contents of the DataFrame. This method uses the tabulate function from the tabulate package. Use the kwargs to pass along any arguments to the tabulate function. @@ -121,7 +141,7 @@ def print(self, index=True, **kwargs): """ print(self._make_table(index=index, **kwargs)) - def _sort_columns(self, columns_list): + def _sort_columns(self, columns_list: list) -> None: """ Given a list of column names will sort the DataFrame columns to match the given order @@ -130,16 +150,19 @@ def _sort_columns(self, columns_list): """ if not (all([x in columns_list for x in self._columns]) and all([x in self._columns for x in columns_list])): raise ValueError( - 'columns_list must be all in current columns, and all current columns must be in columns_list') + "columns_list must be all in current columns, and all current columns must be in columns_list" + ) new_sort = [self._columns.index(x) for x in columns_list] - self._data = self._dropin([self._data[x] for x in new_sort]) if self._dropin \ - else [self._data[x] for x in new_sort] - self._columns = self._dropin([self._columns[x] for x in new_sort]) if self._dropin \ - else [self._columns[x] for x in new_sort] + self._data = ( + self._dropin([self._data[x] for x in new_sort]) if self._dropin else [self._data[x] for x in new_sort] + ) + self._columns = ( + self._dropin([self._columns[x] for x in new_sort]) if self._dropin else [self._columns[x] for x in new_sort] + ) - def _pad_data(self, max_len=None): + def _pad_data(self, max_len: int | None = None) -> None: """ - Pad the data in DataFrame with [None} to ensure that all columns have the same length. + Pad the data in DataFrame with [None] to ensure that all columns have the same length. :param max_len: If provided will extend all columns to this length, if not then will use the longest column :return: nothing @@ -149,24 +172,24 @@ def _pad_data(self, max_len=None): for _, col in enumerate(self._data): col.extend([None] * (max_len - len(col))) - def __len__(self): + def __len__(self) -> int: return len(self._index) @property - def data(self): + def data(self) -> list[list]: return self._data.copy() @property - def columns(self): + def columns(self) -> list: return self._columns.copy() @columns.setter - def columns(self, columns_list): + def columns(self, columns_list: list) -> None: self._validate_columns(columns_list) self._columns = self._dropin(columns_list) if self._dropin else list(columns_list) @property - def index(self): + def index(self) -> list: """ Return a view of the index as a list. Because this is a view any change to the return list from this method will corrupt the DataFrame. @@ -176,36 +199,36 @@ def index(self): return self._index @index.setter - def index(self, index_list): + def index(self, index_list: list) -> None: self._validate_index(index_list) self._index = self._dropin(index_list) if self._dropin else list(index_list) @property - def index_name(self): + def index_name(self) -> str | tuple | None: return self._index_name @index_name.setter - def index_name(self, name): + def index_name(self, name: str | tuple | None) -> None: self._index_name = name @property - def dropin(self): + def dropin(self) -> Callable: return self._dropin @property - def sort(self): + def sort(self) -> bool: return self._sort @sort.setter - def sort(self, boolean): + def sort(self, boolean: bool) -> None: self._sort = boolean if self._sort: self.sort_index() - def select_index(self, compare, result='boolean'): + def select_index(self, compare: Any | tuple, result: Literal["boolean", "value"] = "boolean") -> list[bool | Any]: """ Finds the elements in the index that match the compare parameter and returns either a list of the values that - match, of a boolean list the length of the index with True to each index that matches. If the indexes are + match, or a boolean list the length of the index with True to each index that matches. If the indexes are tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all values. @@ -215,22 +238,30 @@ def select_index(self, compare, result='boolean'): """ if isinstance(compare, tuple): # this crazy list comprehension will match all the tuples in the list with None being an * wildcard - booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)]) - for x, v in enumerate(self._index)] + booleans = [ + all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)]) + for x, v in enumerate(self._index) + ] else: booleans = [False] * len(self._index) if self._sort: booleans[sorted_index(self._index, compare)] = True else: booleans[self._index.index(compare)] = True - if result == 'boolean': + if result == "boolean": return booleans - elif result == 'value': + elif result == "value": return list(compress(self._index, booleans)) else: - raise ValueError('only valid values for result parameter are: boolean or value.') + raise ValueError("only valid values for result parameter are: boolean or value.") - def get(self, indexes=None, columns=None, as_list=False, as_dict=False): + def get( + self, + indexes: Any | list[Any | bool] = None, + columns: Any | list = None, + as_list: bool = False, + as_dict: bool = False, + ) -> Self | list | dict | Any: """ Given indexes and columns will return a sub-set of the DataFrame. This method will direct to the below methods based on what types are passed in for the indexes and columns. The type of the return is determined by the @@ -261,7 +292,7 @@ def get(self, indexes=None, columns=None, as_list=False, as_dict=False): else: return self.get_cell(indexes, columns) - def get_cell(self, index, column): + def get_cell(self, index: Any, column: Any) -> Any: """ For a single index and column value return the value of the cell @@ -273,7 +304,7 @@ def get_cell(self, index, column): c = self._columns.index(column) return self._data[c][i] - def get_rows(self, indexes, column, as_list=False): + def get_rows(self, indexes: list[bool | Any], column: Any, as_list: bool = False) -> Self | list: """ For a list of indexes and a single column name return the values of the indexes in that column. @@ -285,7 +316,7 @@ def get_rows(self, indexes, column, as_list=False): c = self._columns.index(column) if all([isinstance(i, bool) for i in indexes]): # boolean list if len(indexes) != len(self._index): - raise ValueError('boolean index list must be same size of existing index') + raise ValueError("boolean index list must be same size of existing index") if all(indexes): # the entire column data = self._data[c] index = self._index @@ -293,14 +324,20 @@ def get_rows(self, indexes, column, as_list=False): data = list(compress(self._data[c], indexes)) index = list(compress(self._index, indexes)) else: # index values list - locations = [sorted_index(self._index, x) for x in indexes] if self._sort \ + locations = ( + [sorted_index(self._index, x) for x in indexes] + if self._sort else [self._index.index(x) for x in indexes] + ) data = [self._data[c][i] for i in locations] index = [self._index[i] for i in locations] - return data if as_list else DataFrame(data={column: data}, index=index, index_name=self._index_name, - sort=self._sort) + return ( + data + if as_list + else DataFrame(data={column: data}, index=index, index_name=self._index_name, sort=self._sort) + ) - def get_columns(self, index, columns=None, as_dict=False): + def get_columns(self, index: Any, columns: list[Any] = None, as_dict: bool = False) -> Self | dict: """ For a single index and list of column names return a DataFrame of the values in that index as either a dict or a DataFrame @@ -313,7 +350,7 @@ def get_columns(self, index, columns=None, as_dict=False): i = sorted_index(self._index, index) if self._sort else self._index.index(index) return self.get_location(i, columns, as_dict) - def get_entire_column(self, column, as_list=False): + def get_entire_column(self, column: Any, as_list: bool = False) -> Self | list: """ Shortcut method to retrieve a single column all rows. Since this is a common use case this method will be faster than the more general method. @@ -324,10 +361,13 @@ def get_entire_column(self, column, as_list=False): """ c = self._columns.index(column) data = self._data[c] - return data if as_list else DataFrame(data={column: data}, index=self._index, index_name=self._index_name, - sort=self._sort) + return ( + data + if as_list + else DataFrame(data={column: data}, index=self._index, index_name=self._index_name, sort=self._sort) + ) - def get_matrix(self, indexes, columns): + def get_matrix(self, indexes: list[Any | bool], columns: list[Any]) -> Self: """ For a list of indexes and list of columns return a DataFrame of the values. @@ -340,34 +380,41 @@ def get_matrix(self, indexes, columns): if all([isinstance(i, bool) for i in indexes]): # boolean list is_bool_indexes = True if len(indexes) != len(self._index): - raise ValueError('boolean index list must be same size of existing index') + raise ValueError("boolean index list must be same size of existing index") bool_indexes = indexes indexes = list(compress(self._index, indexes)) else: is_bool_indexes = False - locations = [sorted_index(self._index, x) for x in indexes] if self._sort \ + locations = ( + [sorted_index(self._index, x) for x in indexes] + if self._sort else [self._index.index(x) for x in indexes] + ) if all([isinstance(i, bool) for i in columns]): # boolean list if len(columns) != len(self._columns): - raise ValueError('boolean column list must be same size of existing columns') + raise ValueError("boolean column list must be same size of existing columns") columns = list(compress(self._columns, columns)) col_locations = [self._columns.index(x) for x in columns] data_dict = dict() for c in col_locations: - data_dict[self._columns[c]] = list(compress(self._data[c], bool_indexes)) if is_bool_indexes \ + data_dict[self._columns[c]] = ( + list(compress(self._data[c], bool_indexes)) + if is_bool_indexes else [self._data[c][i] for i in locations] + ) - return DataFrame(data=data_dict, index=indexes, columns=columns, index_name=self._index_name, - sort=self._sort) + return DataFrame(data=data_dict, index=indexes, columns=columns, index_name=self._index_name, sort=self._sort) - def get_location(self, location, columns=None, as_dict=False, index=True): + def get_location( + self, location: int, columns: Any | list | None = None, as_dict: bool = False, index: bool = True + ) -> Self | dict | Any: """ For an index location and either (1) list of columns return a DataFrame or dictionary of the values or (2) single column name and return the value of that cell. This is optimized for speed because it does not need - to lookup the index location with a search. Also can accept relative indexing from the end of the DataFrame + to look up the index location with a search. Also, can accept relative indexing from the end of the DataFrame in standard python notation [-3, -2, -1] :param location: index location in standard python form of positive or negative number @@ -383,7 +430,7 @@ def get_location(self, location, columns=None, as_dict=False, index=True): return self._data[c][location] elif all([isinstance(i, bool) for i in columns]): if len(columns) != len(self._columns): - raise ValueError('boolean column list must be same size of existing columns') + raise ValueError("boolean column list must be same size of existing columns") columns = list(compress(self._columns, columns)) data = dict() for column in columns: @@ -396,10 +443,11 @@ def get_location(self, location, columns=None, as_dict=False, index=True): return data else: data = {k: [data[k]] for k in data} # this makes the dict items lists - return DataFrame(data=data, index=[index_value], columns=columns, index_name=self._index_name, - sort=self._sort) + return DataFrame( + data=data, index=[index_value], columns=columns, index_name=self._index_name, sort=self._sort + ) - def get_locations(self, locations, columns=None, **kwargs): + def get_locations(self, locations: list, columns: Any | list | None = None, **kwargs) -> Self: """ For list of locations and list of columns return a DataFrame of the values. @@ -412,9 +460,11 @@ def get_locations(self, locations, columns=None, **kwargs): indexes = [self._index[x] for x in locations] return self.get(indexes, columns, **kwargs) - def get_slice(self, start_index=None, stop_index=None, columns=None, as_dict=False): + def get_slice( + self, start_index: Any = None, stop_index: Any = None, columns: list | None = None, as_dict: bool = False + ) -> Self | tuple: """ - For sorted DataFrames will return either a DataFrame or dict of all of the rows where the index is greater than + For sorted DataFrames will return either a DataFrame or dict of all the rows where the index is greater than or equal to the start_index if provided and less than or equal to the stop_index if provided. If either the start or stop index is None then will include from the first or last element, similar to standard python slide of [:5] or [:5]. Both end points are considered inclusive. @@ -426,13 +476,13 @@ def get_slice(self, start_index=None, stop_index=None, columns=None, as_dict=Fal :return: DataFrame or tuple """ if not self._sort: - raise RuntimeError('Can only use get_slice on sorted DataFrames') + raise RuntimeError("Can only use get_slice on sorted DataFrames") if columns is None: columns = self._columns elif all([isinstance(i, bool) for i in columns]): if len(columns) != len(self._columns): - raise ValueError('boolean column list must be same size of existing columns') + raise ValueError("boolean column list must be same size of existing columns") columns = list(compress(self._columns, columns)) start_location = bisect_left(self._index, start_index) if start_index is not None else None @@ -448,10 +498,16 @@ def get_slice(self, start_index=None, stop_index=None, columns=None, as_dict=Fal return index, data else: data = data if data else None # if the dict is empty, convert to None - return DataFrame(data=data, index=index, columns=columns, index_name=self._index_name, sort=self._sort, - dropin=self._dropin) + return DataFrame( + data=data, + index=index, + columns=columns, + index_name=self._index_name, + sort=self._sort, + dropin=self._dropin, + ) - def _insert_row(self, i, index): + def _insert_row(self, i: int, index: Any) -> None: """ Insert a new row in the DataFrame. @@ -466,7 +522,7 @@ def _insert_row(self, i, index): for c in range(len(self._columns)): self._data[c].insert(i, None) - def _insert_missing_rows(self, indexes): + def _insert_missing_rows(self, indexes: list[Any]) -> None: """ Given a list of indexes, find all the indexes that are not currently in the DataFrame and make a new row for that index, inserting into the index. This requires the DataFrame to be sort=True @@ -478,7 +534,7 @@ def _insert_missing_rows(self, indexes): for x in new_indexes: self._insert_row(bisect_left(self._index, x), x) - def _add_row(self, index): + def _add_row(self, index: Any) -> None: """ Add a new row to the DataFrame @@ -489,7 +545,7 @@ def _add_row(self, index): for c, _ in enumerate(self._columns): self._data[c].append(None) - def _add_missing_rows(self, indexes): + def _add_missing_rows(self, indexes: list[Any]) -> None: """ Given a list of indexes, find all the indexes that are not currently in the DataFrame and make a new row for that index by appending to the DataFrame. This does not maintain sort order for the index. @@ -501,7 +557,7 @@ def _add_missing_rows(self, indexes): for x in new_indexes: self._add_row(x) - def _add_column(self, column): + def _add_column(self, column: Any) -> None: """ Add a new column to the DataFrame @@ -514,18 +570,20 @@ def _add_column(self, column): else: self._data.append([None] * len(self._index)) - def set(self, indexes=None, columns=None, values=None): + def set( + self, indexes: Any | list | list[bool] = None, columns: Any | None = None, values: Any | list = None + ) -> None: """ Given indexes and columns will set a sub-set of the DataFrame to the values provided. This method will direct to the below methods based on what types are passed in for the indexes and columns. If the indexes or columns contains values not in the DataFrame then new rows or columns will be added. :param indexes: indexes value, list of indexes values, or a list of booleans. If None then all indexes are used - :param columns: columns name, if None then all columns are used. Currently can only handle a single column or\ - all columns - :param values: value or list of values to set (index, column) to. If setting just a single row, then must be a\ - dict where the keys are the column names. If a list then must be the same length as the indexes parameter, if\ - indexes=None, then must be the same and length of DataFrame + :param columns: columns name, if None then all columns are used. Currently, can only handle a single column or + all columns + :param values: value or list of values to set (index, column) to. If setting just a single row, then must be a + dict where the keys are the column names. If a list then must be the same length as the indexes parameter, if + indexes=None, then must be the same and length of DataFrame :return: nothing """ if (indexes is not None) and (columns is not None): @@ -538,7 +596,7 @@ def set(self, indexes=None, columns=None, values=None): elif (indexes is None) and (columns is not None): self.set_column(indexes, columns, values) else: - raise ValueError('either or both of indexes or columns must be provided') + raise ValueError("either or both of indexes or columns must be provided") def set_cell(self, index, column, value): """ @@ -587,11 +645,11 @@ def set_row(self, index, values): self._add_row(index) if isinstance(values, dict): if not (set(values.keys()).issubset(self._columns)): - raise ValueError('keys of values are not all in existing columns') + raise ValueError("keys of values are not all in existing columns") for c, column in enumerate(self._columns): self._data[c][i] = values.get(column, self._data[c][i]) else: - raise TypeError('cannot handle values of this type.') + raise TypeError("cannot handle values of this type.") def set_column(self, index=None, column=None, values=None): """ @@ -615,9 +673,9 @@ def set_column(self, index=None, column=None, values=None): if not self._check_list(values): # single value provided, not a list, so turn values into list values = [values for x in index if x] if len(index) != len(self._index): - raise ValueError('boolean index list must be same size of existing index') + raise ValueError("boolean index list must be same size of existing index") if len(values) != index.count(True): - raise ValueError('length of values list must equal number of True entries in index list') + raise ValueError("length of values list must equal number of True entries in index list") indexes = [i for i, x in enumerate(index) if x] for x, i in enumerate(indexes): self._data[c][i] = values[x] @@ -625,7 +683,7 @@ def set_column(self, index=None, column=None, values=None): if not self._check_list(values): # single value provided, not a list, so turn values into list values = [values for _ in index] if len(values) != len(index): - raise ValueError('length of values and index must be the same.') + raise ValueError("length of values and index must be the same.") # insert or append indexes as needed if self._sort: exists_tuples = list(zip(*[sorted_exists(self._index, x) for x in index])) @@ -646,7 +704,7 @@ def set_column(self, index=None, column=None, values=None): if not self._check_list(values): # values not a list, turn into one of length same as index values = [values for _ in self._index] if len(values) != len(self._index): - raise ValueError('values list must be at same length as current index length.') + raise ValueError("values list must be at same length as current index length.") else: self._data[c] = self._dropin(values) if self._dropin else values @@ -696,7 +754,7 @@ def append_row(self, index, values, new_cols=True): """ if index in self._index: - raise IndexError('index already in DataFrame') + raise IndexError("index already in DataFrame") if new_cols: for col in values: @@ -725,12 +783,12 @@ def append_rows(self, indexes, values, new_cols=True): # check that the values data is less than or equal to the length of the indexes for column in values: if len(values[column]) > len(indexes): - raise ValueError('length of %s column in values is longer than indexes' % column) + raise ValueError("length of %s column in values is longer than indexes" % column) # check the indexes are not duplicates combined_index = self._index + indexes if len(set(combined_index)) != len(combined_index): - raise IndexError('duplicate indexes in DataFrames') + raise IndexError("duplicate indexes in DataFrames") if new_cols: for col in values: @@ -749,13 +807,13 @@ def _slice_index(self, slicer): try: start_index = sorted_index(self._index, slicer.start) if self._sort else self._index.index(slicer.start) except ValueError: - raise IndexError('start of slice not in the index') + raise IndexError("start of slice not in the index") try: end_index = sorted_index(self._index, slicer.stop) if self._sort else self._index.index(slicer.stop) except ValueError: - raise IndexError('end of slice not in the index') + raise IndexError("end of slice not in the index") if end_index < start_index: - raise IndexError('end of slice is before start of slice') + raise IndexError("end of slice is before start of slice") pre_list = [False] * start_index mid_list = [True] * (end_index - start_index + 1) @@ -817,12 +875,12 @@ def __setitem__(self, index, value): def to_list(self): """ - For a single column DataFrame returns a list of the values. Raises error if more then one column. + For a single column DataFrame returns a list of the values. Raises error if more than one column. :return: list """ if len(self._columns) > 1: - raise TypeError('tolist() only works with a single column DataFrame') + raise TypeError("tolist() only works with a single column DataFrame") return self._data[0] def to_dict(self, index=True, ordered=False): @@ -850,24 +908,24 @@ def to_json(self) -> str: the DataFrame will have a string representation in place of the object and will not reconstruct exactly. If there is a dropin supplied then the output will have a string representation of the droping func class - in the meta data as the dropin function cannot be stored with the JSON. + in the metadata as the dropin function cannot be stored with the JSON. :return: json string """ - input_dict = {'data': self.to_dict(index=False), 'index': list(self._index)} + input_dict = {"data": self.to_dict(index=False), "index": list(self._index)} # if self._dropin, turn into lists if self._dropin: - input_dict['index'] = list(input_dict['index']) - for key in input_dict['data']: - input_dict['data'][key] = list(input_dict['data'][key]) + input_dict["index"] = list(input_dict["index"]) + for key in input_dict["data"]: + input_dict["data"][key] = list(input_dict["data"][key]) meta_data = dict() for key in self.__slots__: - if key not in ['_data', '_index']: + if key not in ["_data", "_index"]: value = self.__getattribute__(key) - meta_data[key.lstrip('_')] = value if not type(value) == self._dropin else list(value) - input_dict['meta_data'] = meta_data + meta_data[key.lstrip("_")] = value if not type(value) == self._dropin else list(value) + input_dict["meta_data"] = meta_data return json.dumps(input_dict, default=repr) def rename_columns(self, rename_dict): @@ -878,7 +936,7 @@ def rename_columns(self, rename_dict): :return: nothing """ if not all([x in self._columns for x in rename_dict.keys()]): - raise ValueError('all dictionary keys must be in current columns') + raise ValueError("all dictionary keys must be in current columns") for current in rename_dict.keys(): self._columns[self._columns.index(current)] = rename_dict[current] @@ -914,11 +972,14 @@ def delete_rows(self, indexes): indexes = [indexes] if not self._check_list(indexes) else indexes if all([isinstance(i, bool) for i in indexes]): # boolean list if len(indexes) != len(self._index): - raise ValueError('boolean indexes list must be same size of existing indexes') + raise ValueError("boolean indexes list must be same size of existing indexes") indexes = [i for i, x in enumerate(indexes) if x] else: - indexes = [sorted_index(self._index, x) for x in indexes] if self._sort \ + indexes = ( + [sorted_index(self._index, x) for x in indexes] + if self._sort else [self._index.index(x) for x in indexes] + ) indexes = sorted(indexes, reverse=True) # need to sort and reverse list so deleting works for c, _ in enumerate(self._columns): for i in indexes: @@ -931,7 +992,7 @@ def delete_all_rows(self): """ Deletes the contents of all rows in the DataFrame. This function is faster than delete_rows() to remove all information, and at the same time it keeps the container lists for the columns and index so if there is another - object that references this DataFrame, like a ViewSeries, the reference remains in tact. + object that references this DataFrame, like a ViewSeries, the reference remains intact. :return: nothing """ @@ -948,7 +1009,7 @@ def delete_columns(self, columns): """ columns = [columns] if not self._check_list(columns) else columns if not all([x in self._columns for x in columns]): - raise ValueError('all columns must be in current columns') + raise ValueError("all columns must be in current columns") for column in columns: c = self._columns.index(column) del self._data[c] @@ -967,8 +1028,9 @@ def sort_index(self): self._index = self._dropin([self._index[x] for x in sort]) if self._dropin else [self._index[x] for x in sort] # each column for c in range(len(self._data)): - self._data[c] = self._dropin([self._data[c][i] for i in sort]) if self._dropin \ - else [self._data[c][i] for i in sort] + self._data[c] = ( + self._dropin([self._data[c][i] for i in sort]) if self._dropin else [self._data[c][i] for i in sort] + ) def sort_columns(self, column, key=None, reverse=False): """ @@ -982,35 +1044,36 @@ def sort_columns(self, column, key=None, reverse=False): :return: nothing """ if self._check_list(column): - raise TypeError('Can only sort by a single column ') + raise TypeError("Can only sort by a single column ") sort = sorted_list_indexes(self._data[self._columns.index(column)], key, reverse) # sort index self._index = self._dropin([self._index[x] for x in sort]) if self._dropin else [self._index[x] for x in sort] # each column for c in range(len(self._data)): - self._data[c] = self._dropin([self._data[c][i] for i in sort]) if self._dropin \ - else [self._data[c][i] for i in sort] + self._data[c] = ( + self._dropin([self._data[c][i] for i in sort]) if self._dropin else [self._data[c][i] for i in sort] + ) def _validate_index(self, indexes): if len(indexes) != len(set(indexes)): - raise ValueError('index contains duplicates') + raise ValueError("index contains duplicates") if self._data: if len(indexes) != len(self._data[0]): - raise ValueError('index length does not match data length') + raise ValueError("index length does not match data length") def _validate_columns(self, columns): if len(columns) != len(set(columns)): - raise ValueError('columns contains duplicates') + raise ValueError("columns contains duplicates") if self._data: if len(columns) != len(self._data): - raise ValueError('number of column names does not match number of data columns') + raise ValueError("number of column names does not match number of data columns") def _validate_data(self): if self._data: max_rows = max([len(x) for x in self._data]) same_lens = all([len(x) == max_rows for x in self._data]) if not same_lens: - raise ValueError('data is corrupted, each column not all same length') + raise ValueError("data is corrupted, each column not all same length") def validate_integrity(self): """ @@ -1037,7 +1100,7 @@ def append(self, data_frame): data_frame_index = data_frame.index combined_index = self._index + data_frame_index if len(set(combined_index)) != len(combined_index): - raise ValueError('duplicate indexes in DataFrames') + raise ValueError("duplicate indexes in DataFrames") for c, column in enumerate(data_frame.columns): self.set(indexes=data_frame_index, columns=column, values=data_frame.data[c].copy()) @@ -1119,9 +1182,9 @@ def divide(self, left_column, right_column, indexes=None): left_list, right_list = self._get_lists(left_column, right_column, indexes) return [l / r for l, r in zip(left_list, right_list)] - def isin(self, column, compare_list): + def isin(self, column: Any, compare_list: list) -> list[bool]: """ - Returns a boolean list where each elements is whether that element in the column is in the compare_list. + Returns a boolean list where each element is whether that element in the column is in the compare_list. :param column: single column name, does not work for multiple columns :param compare_list: list of items to compare to @@ -1129,7 +1192,7 @@ def isin(self, column, compare_list): """ return [x in compare_list for x in self._data[self._columns.index(column)]] - def iterrows(self, index=True): + def iterrows(self, index: bool = True) -> Generator[dict]: """ Iterates over DataFrame rows as dictionary of the values. The index will be included. @@ -1142,7 +1205,7 @@ def iterrows(self, index=True): row[col] = self._data[c][i] yield row - def itertuples(self, index=True, name='Raccoon'): + def itertuples(self, index: bool = True, name: str = "Raccoon") -> Generator[namedtuple]: """ Iterates over DataFrame rows as tuple of the values. @@ -1159,7 +1222,7 @@ def itertuples(self, index=True, name='Raccoon'): row[col] = self._data[c][i] yield row_tuple(**row) - def reset_index(self, drop=False): + def reset_index(self, drop: bool = False) -> None: """ Resets the index of the DataFrame to simple integer list and the index name to 'index'. If drop is True then the existing index is dropped, if drop is False then the current index is made a column in the DataFrame with @@ -1176,18 +1239,18 @@ def reset_index(self, drop=False): for i in range(len(self.index_name)): self.set_column(column=self.index_name[i], values=index_data[i]) else: - col_name = self.index_name if self.index_name != 'index' else 'index_0' + col_name = self.index_name if self.index_name != "index" else "index_0" self.set_column(column=col_name, values=self._index) self.index = list(range(self.__len__())) - self.index_name = 'index' + self.index_name = "index" # DataFrame creation functions @classmethod - def from_json(cls, json_string: str, dropin_func=None): + def from_json(cls, json_string: str, dropin_func: Callable | None = None) -> Self: """ Creates and return a DataFrame from a JSON of the type created by to_json. - If a dropin is in the meta data from the JSON, then the same dropin class must be provided here to + If a dropin is in the metadata from the JSON, then the same dropin class must be provided here to allow construction as the dropin function cannot be stored with the JSON. If required use a pickle object for that. @@ -1197,20 +1260,24 @@ def from_json(cls, json_string: str, dropin_func=None): """ input_dict = json.loads(json_string) # convert index to tuple if required - if input_dict['index'] and isinstance(input_dict['index'][0], list): - input_dict['index'] = [tuple(x) for x in input_dict['index']] + if input_dict["index"] and isinstance(input_dict["index"][0], list): + input_dict["index"] = [tuple(x) for x in input_dict["index"]] # convert index_name to tuple if required - if isinstance(input_dict['meta_data']['index_name'], list): - input_dict['meta_data']['index_name'] = tuple(input_dict['meta_data']['index_name']) - data = input_dict['data'] if input_dict['data'] else None + if isinstance(input_dict["meta_data"]["index_name"], list): + input_dict["meta_data"]["index_name"] = tuple(input_dict["meta_data"]["index_name"]) + data = input_dict["data"] if input_dict["data"] else None # confirm the dropin and replace with the actual class - if input_dict['meta_data']['dropin']: + if input_dict["meta_data"]["dropin"]: if not dropin_func: - raise AttributeError('the JSON has a dropin : %s : but the dropin parameter was not supplied' - % input_dict['meta_data']['dropin']) - elif input_dict['meta_data']['dropin'] == dropin_func.__str__(dropin_func): - input_dict['meta_data']['dropin'] = dropin_func + raise AttributeError( + "the JSON has a dropin : %s : but the dropin parameter was not supplied" + % input_dict["meta_data"]["dropin"] + ) + elif input_dict["meta_data"]["dropin"] == dropin_func.__str__(dropin_func): + input_dict["meta_data"]["dropin"] = dropin_func else: - raise AttributeError('the supplied dropin parameter: %s : does not match the value in ' - 'the JSON: %s' % (dropin_func, input_dict['meta_data']['dropin'])) - return cls(data=data, index=input_dict['index'], **input_dict['meta_data']) + raise AttributeError( + "the supplied dropin parameter: %s : does not match the value in " + "the JSON: %s" % (dropin_func, input_dict["meta_data"]["dropin"]) + ) + return cls(data=data, index=input_dict["index"], **input_dict["meta_data"]) diff --git a/raccoon/series.py b/raccoon/series.py index 76a4fdc..ee7de9e 100644 --- a/raccoon/series.py +++ b/raccoon/series.py @@ -1,13 +1,16 @@ """ Series class """ + from abc import ABC, abstractmethod from bisect import bisect_left, bisect_right from collections import OrderedDict from itertools import compress +from typing import Any, Callable, Literal, Self from tabulate import tabulate +from raccoon import DataFrame from raccoon.sort_utils import sorted_exists, sorted_index, sorted_list_indexes @@ -16,34 +19,35 @@ class SeriesBase(ABC): Base Series abstract base class that concrete implementations inherit from. Note that the .data and .index property methods in Series are views to the underlying data and not copies. """ + # Define slots to make object faster - __slots__ = ['_data', '_data_name', '_index', '_index_name', '_sort', '_dropin'] + __slots__ = ["_data", "_data_name", "_index", "_index_name", "_sort", "_dropin"] def __init__(self): """ No specific parameters, those are defined in the child classed """ - self._index = None - self._index_name = None - self._data = None - self._data_name = None - self._sort = None - self._dropin = None + self._index: list | None = None + self._index_name: str | tuple | None = None + self._data: list | None = None + self._data_name: str | tuple | None = None + self._sort: bool | None = None + self._dropin: Callable = None - def __len__(self): + def __len__(self) -> int: return len(self._index) - def __repr__(self): - return 'object id: %s\ndata:\n%s\nindex:\n%s\n' % (id(self), self._data, self._index) + def __repr__(self) -> str: + return "object id: %s\ndata:\n%s\nindex:\n%s\n" % (id(self), self._data, self._index) - def __str__(self): + def __str__(self) -> str: return self._make_table() - def _make_table(self, index=True, **kwargs): - kwargs['headers'] = 'keys' if 'headers' not in kwargs.keys() else kwargs['headers'] + def _make_table(self, index: bool = True, **kwargs) -> str: + kwargs["headers"] = "keys" if "headers" not in kwargs.keys() else kwargs["headers"] return tabulate(self.to_dict(ordered=True, index=index), **kwargs) - def print(self, index=True, **kwargs): + def print(self, index: bool = True, **kwargs) -> None: """ Print the contents of the Series. This method uses the tabulate function from the tabulate package. Use the kwargs to pass along any arguments to the tabulate function. @@ -70,19 +74,19 @@ def index(self, index_list): return @property - def data_name(self): + def data_name(self) -> str | tuple | None: return self._data_name @data_name.setter - def data_name(self, name): + def data_name(self, name: str | tuple | None) -> None: self._data_name = name @property - def index_name(self): + def index_name(self) -> str | tuple | None: return self._index_name @index_name.setter - def index_name(self, name): + def index_name(self, name: str | tuple | None) -> None: self._index_name = name @property @@ -90,10 +94,10 @@ def index_name(self, name): def sort(self): return - def _check_list(self, x): + def _check_list(self, x: Any) -> bool: return type(x) == (self._dropin if self._dropin else list) - def get(self, indexes, as_list=False): + def get(self, indexes: Any | list | list[bool], as_list: bool = False) -> Self | list | Any: """ Given indexes will return a sub-set of the Series. This method will direct to the specific methods based on what types are passed in for the indexes. The type of the return is determined by the @@ -108,7 +112,7 @@ def get(self, indexes, as_list=False): else: return self.get_cell(indexes) - def get_cell(self, index): + def get_cell(self, index: Any) -> Any: """ For a single index and return the value @@ -118,7 +122,7 @@ def get_cell(self, index): i = sorted_index(self._index, index) if self._sort else self._index.index(index) return self._data[i] - def get_rows(self, indexes, as_list=False): + def get_rows(self, indexes: Any | list | list[bool], as_list: bool = False) -> Self | list: """ For a list of indexes return the values of the indexes in that column. @@ -128,7 +132,7 @@ def get_rows(self, indexes, as_list=False): """ if all([isinstance(i, bool) for i in indexes]): # boolean list if len(indexes) != len(self._index): - raise ValueError('boolean index list must be same size of existing index') + raise ValueError("boolean index list must be same size of existing index") if all(indexes): # the entire column data = self._data index = self._index @@ -136,17 +140,30 @@ def get_rows(self, indexes, as_list=False): data = list(compress(self._data, indexes)) index = list(compress(self._index, indexes)) else: # index values list - locations = [sorted_index(self._index, x) for x in indexes] if self._sort \ + locations = ( + [sorted_index(self._index, x) for x in indexes] + if self._sort else [self._index.index(x) for x in indexes] + ) data = [self._data[i] for i in locations] index = [self._index[i] for i in locations] - return data if as_list else Series(data=data, index=index, data_name=self._data_name, - index_name=self._index_name, sort=self._sort, dropin=self._dropin) - - def get_location(self, location): + return ( + data + if as_list + else Series( + data=data, + index=index, + data_name=self._data_name, + index_name=self._index_name, + sort=self._sort, + dropin=self._dropin, + ) + ) + + def get_location(self, location: int) -> dict: """ For an index location return a dict of the index and value. This is optimized for speed because - it does not need to lookup the index location with a search. Also can accept relative indexing from the end of + it does not need to look up the index location with a search. Also, can accept relative indexing from the end of the SEries in standard python notation [-3, -2, -1] :param location: index location in standard python form of positive or negative number @@ -154,7 +171,7 @@ def get_location(self, location): """ return {self.index_name: self._index[location], self.data_name: self._data[location]} - def get_locations(self, locations, as_list=False): + def get_locations(self, locations: list[int], as_list: bool = False) -> Self | list: """ For list of locations return a Series or list of the values. @@ -166,9 +183,11 @@ def get_locations(self, locations, as_list=False): indexes = [self._index[x] for x in locations] return self.get(indexes, as_list) - def get_slice(self, start_index=None, stop_index=None, as_list=False): + def get_slice( + self, start_index: Any = None, stop_index: Any = None, as_list: bool = False + ) -> Self | tuple[list, list]: """ - For sorted Series will return either a Series or list of all of the rows where the index is greater than + For sorted Series will return either a Series or list of all the rows where the index is greater than or equal to the start_index if provided and less than or equal to the stop_index if provided. If either the start or stop index is None then will include from the first or last element, similar to standard python slide of [:5] or [:5]. Both end points are considered inclusive. @@ -179,7 +198,7 @@ def get_slice(self, start_index=None, stop_index=None, as_list=False): :return: Series or tuple of (index list, values list) """ if not self._sort: - raise RuntimeError('Can only use get_slice on sorted Series') + raise RuntimeError("Can only use get_slice on sorted Series") start_location = bisect_left(self._index, start_index) if start_index is not None else None stop_location = bisect_right(self._index, stop_index) if stop_index is not None else None @@ -190,20 +209,26 @@ def get_slice(self, start_index=None, stop_index=None, as_list=False): if as_list: return index, data else: - return Series(data=data, index=index, data_name=self._data_name, index_name=self._index_name, - sort=self._sort, dropin=self._dropin) - - def _slice_index(self, slicer): + return Series( + data=data, + index=index, + data_name=self._data_name, + index_name=self._index_name, + sort=self._sort, + dropin=self._dropin, + ) + + def _slice_index(self, slicer: slice) -> list: try: start_index = sorted_index(self._index, slicer.start) if self._sort else self._index.index(slicer.start) except ValueError: - raise IndexError('start of slice not in the index') + raise IndexError("start of slice not in the index") try: end_index = sorted_index(self._index, slicer.stop) if self._sort else self._index.index(slicer.stop) except ValueError: - raise IndexError('end of slice not in the index') + raise IndexError("end of slice not in the index") if end_index < start_index: - raise IndexError('end of slice is before start of slice') + raise IndexError("end of slice is before start of slice") pre_list = [False] * start_index mid_list = [True] * (end_index - start_index + 1) @@ -213,7 +238,7 @@ def _slice_index(self, slicer): pre_list.extend(post_list) return pre_list - def _validate_index(self, indexes): + def _validate_index(self, indexes: list) -> None: """ Raises an error if the indexes are not valid @@ -221,14 +246,14 @@ def _validate_index(self, indexes): :return: nothing """ if not (self._check_list(indexes) or type(indexes) == list or indexes is None): - raise TypeError('indexes must be list, %s or None' % self._dropin) + raise TypeError("indexes must be list, %s or None" % self._dropin) if len(indexes) != len(set(indexes)): # noqa - raise ValueError('index contains duplicates') + raise ValueError("index contains duplicates") if self._data: if len(indexes) != len(self._data): # noqa - raise ValueError('index length does not match data length') + raise ValueError("index length does not match data length") - def validate_integrity(self): + def validate_integrity(self) -> None: """ Validate the integrity of the Series. This checks that the indexes, column names and internal data are not corrupted. Will raise an error if there is a problem. @@ -237,7 +262,7 @@ def validate_integrity(self): """ self._validate_index(self._index) - def to_dict(self, index=True, ordered=False): + def to_dict(self, index: bool = True, ordered: bool = False) -> dict: """ Returns a dict where the keys are the data and index names and the values are list of the data and index. @@ -255,7 +280,7 @@ def to_dict(self, index=True, ordered=False): result.update(data_dict) return result - def head(self, rows): + def head(self, rows: int) -> Self: """ Return a Series of the first N rows @@ -266,7 +291,7 @@ def head(self, rows): rows_bool.extend([False] * max(0, len(self._index) - rows)) return self.get(indexes=rows_bool) - def tail(self, rows): + def tail(self, rows: int) -> Self: """ Return a Series of the last N rows @@ -277,7 +302,7 @@ def tail(self, rows): rows_bool.extend([True] * min(rows, len(self._index))) return self.get(indexes=rows_bool) - def select_index(self, compare, result='boolean'): + def select_index(self, compare: Any | tuple, result: Literal["boolean", "value"] = "boolean") -> list[bool | Any]: """ Finds the elements in the index that match the compare parameter and returns either a list of the values that match, of a boolean list the length of the index with True to each index that matches. If the indexes are @@ -290,31 +315,33 @@ def select_index(self, compare, result='boolean'): """ if isinstance(compare, tuple): # this crazy list comprehension will match all the tuples in the list with None being an * wildcard - booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)]) - for x, v in enumerate(self._index)] + booleans = [ + all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)]) + for x, v in enumerate(self._index) + ] else: booleans = [False] * len(self._index) if self._sort: booleans[sorted_index(self._index, compare)] = True else: booleans[self._index.index(compare)] = True - if result == 'boolean': + if result == "boolean": return booleans - elif result == 'value': + elif result == "value": return list(compress(self._index, booleans)) else: - raise ValueError('only valid values for result parameter are: boolean or value.') + raise ValueError("only valid values for result parameter are: boolean or value.") - def isin(self, compare_list): + def isin(self, compare_list: list) -> list[bool]: """ - Returns a boolean list where each elements is whether that element in the column is in the compare_list. + Returns a boolean list where each element is whether that element in the column is in the compare_list. :param compare_list: list of items to compare to :return: list of booleans """ return [x in compare_list for x in self._data] - def equality(self, indexes=None, value=None): + def equality(self, indexes: list | list[bool] = None, value: Any = None) -> list[bool]: """ Math helper method. Given a column and optional indexes will return a list of booleans on the equality of the value for that index in the DataFrame to the value parameter. @@ -340,13 +367,22 @@ class Series(SeriesBase): index remains sort. """ - def __init__(self, data=None, index=None, data_name='value', index_name='index', sort=None, dropin=None): + def __init__( + self, + data: dict | list | None = None, + index: list | None = None, + data_name: str | tuple | None = "value", + index_name: str | tuple | None = "index", + sort: bool = None, + dropin: Callable = None, + ): """ :param data: (optional) list of values. :param index: (optional) list of index values. If None then the index will be integers starting with zero :param data_name: (optional) name of the data column, or will default to 'value' :param index_name: (optional) name for the index. Default is "index" - :param sort: if True then Series will keep the index sort. If True all index values must be of same type + :param sort: if True then Series will keep the index sort. If True all index values must be of same type. If + None then will default to True if no index is provided. :param dropin: if supplied the drop-in replacement for list that will be used """ super(SeriesBase, self).__init__() @@ -375,7 +411,7 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index', else: self.index = list(range(len(self._data))) else: - raise TypeError('Not valid data type.') + raise TypeError("Not valid data type.") # setup sort self._sort = None @@ -387,7 +423,7 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index', else: self.sort = True - def _pad_data(self, index_len): + def _pad_data(self, index_len: int) -> None: """ Pad the data in Series with [None] to ensure that data is the same length as index @@ -397,33 +433,33 @@ def _pad_data(self, index_len): self._data.extend([None] * (index_len - len(self._data))) @property - def data(self): + def data(self) -> list: return self._data @property - def index(self): + def index(self) -> list: return self._index @index.setter - def index(self, index_list): + def index(self, index_list: list) -> None: self._validate_index(index_list) self._index = self._dropin(index_list) if self._dropin else list(index_list) @property - def dropin(self): + def dropin(self) -> Callable: return self._dropin @property - def sort(self): + def sort(self) -> bool: return self._sort @sort.setter - def sort(self, boolean): + def sort(self, boolean: bool) -> None: self._sort = boolean if self._sort: self.sort_index() - def sort_index(self): + def sort_index(self) -> None: """ Sort the Series by the index. The sort modifies the Series inplace @@ -435,14 +471,14 @@ def sort_index(self): # sort data self._data = self._dropin([self._data[x] for x in sort]) if self._dropin else [self._data[x] for x in sort] - def set(self, indexes, values=None): + def set(self, indexes: Any | list, values: Any | list = None) -> None: """ Given indexes will set a sub-set of the Series to the values provided. This method will direct to the below - methods based on what types are passed in for the indexes. If the indexes contains values not in the Series + methods based on what types are passed in for the indexes. If the indexes contain values not in the Series then new rows or columns will be added. :param indexes: indexes value, list of indexes values, or a list of booleans. - :param values: value or list of values to set. If a list then must be the same length as the indexes parameter. + :param values: value or list of values to set. If a list then must be the same length as the index's parameter. :return: nothing """ if self._check_list(indexes): @@ -450,7 +486,7 @@ def set(self, indexes, values=None): else: self.set_cell(indexes, values) - def _add_row(self, index): + def _add_row(self, index: Any) -> None: """ Add a new row to the Series @@ -460,7 +496,7 @@ def _add_row(self, index): self._index.append(index) self._data.append(None) - def _insert_row(self, i, index): + def _insert_row(self, i: int, index: Any) -> None: """ Insert a new row in the Series. @@ -474,7 +510,7 @@ def _insert_row(self, i, index): self._index.insert(i, index) self._data.insert(i, None) - def _add_missing_rows(self, indexes): + def _add_missing_rows(self, indexes: list) -> None: """ Given a list of indexes, find all the indexes that are not currently in the Series and make a new row for that index by appending to the Series. This does not maintain sorted order for the index. @@ -486,7 +522,7 @@ def _add_missing_rows(self, indexes): for x in new_indexes: self._add_row(x) - def _insert_missing_rows(self, indexes): + def _insert_missing_rows(self, indexes: list) -> None: """ Given a list of indexes, find all the indexes that are not currently in the Series and make a new row for that index, inserting into the index. This requires the Series to be sorted=True @@ -498,7 +534,7 @@ def _insert_missing_rows(self, indexes): for x in new_indexes: self._insert_row(bisect_left(self._index, x), x) - def set_cell(self, index, value): + def set_cell(self, index: Any, value: Any) -> None: """ Sets the value of a single cell. If the index is not in the current index then a new index will be created. @@ -518,7 +554,7 @@ def set_cell(self, index, value): self._add_row(index) self._data[i] = value - def set_rows(self, index, values=None): + def set_rows(self, index: list | list[bool], values: Any | list = None) -> None: """ Set rows to a single value or list of values. If any of the index values are not in the current indexes then a new row will be created. @@ -533,9 +569,9 @@ def set_rows(self, index, values=None): if not self._check_list(values): # single value provided, not a list, so turn values into list values = [values for x in index if x] if len(index) != len(self._index): - raise ValueError('boolean index list must be same size of existing index') + raise ValueError("boolean index list must be same size of existing index") if len(values) != index.count(True): - raise ValueError('length of values list must equal number of True entries in index list') + raise ValueError("length of values list must equal number of True entries in index list") indexes = [i for i, x in enumerate(index) if x] for x, i in enumerate(indexes): self._data[i] = values[x] @@ -543,7 +579,7 @@ def set_rows(self, index, values=None): if not self._check_list(values): # single value provided, not a list, so turn values into list values = [values for _ in index] if len(values) != len(index): - raise ValueError('length of values and index must be the same.') + raise ValueError("length of values and index must be the same.") # insert or append indexes as needed if self._sort: exists_tuples = list(zip(*[sorted_exists(self._index, x) for x in index])) @@ -561,7 +597,7 @@ def set_rows(self, index, values=None): for x, i in enumerate(indexes): self._data[i] = values[x] - def set_location(self, location, value): + def set_location(self, location: int, value: Any) -> None: """ For a location set the value @@ -571,7 +607,7 @@ def set_location(self, location, value): """ self._data[location] = value - def set_locations(self, locations, values): + def set_locations(self, locations: list[int], values: list | Any) -> None: """ For a list of locations set the values. @@ -583,7 +619,7 @@ def set_locations(self, locations, values): indexes = [self._index[x] for x in locations] self.set(indexes, values) - def __setitem__(self, index, value): + def __setitem__(self, index: Any | list | slice, value: Any | list) -> None: """ Convenience wrapper around the set() method for using srs[] = X Usage... @@ -599,7 +635,7 @@ def __setitem__(self, index, value): indexes = self._slice_index(index) if isinstance(index, slice) else index return self.set(indexes=indexes, values=value) - def __getitem__(self, index): + def __getitem__(self, index: Any | list | slice) -> Any: """ Convenience wrapper around the get() method for using srs[] Usage... @@ -619,7 +655,7 @@ def __getitem__(self, index): else: # just a single cell or list of cells return self.get(index) - def append_row(self, index, value): + def append_row(self, index: Any, value: Any) -> None: """ Appends a row of value to the end of the data. Be very careful with this function as for sorted Series it will not enforce sort order. Use this only for speed when needed, be careful. @@ -629,12 +665,12 @@ def append_row(self, index, value): :return: nothing """ if index in self._index: - raise IndexError('index already in Series') + raise IndexError("index already in Series") self._index.append(index) self._data.append(value) - def append_rows(self, indexes, values): + def append_rows(self, indexes: list, values: list) -> None: """ Appends values to the end of the data. Be very careful with this function as for sort DataFrames it will not enforce sort order. Use this only for speed when needed, be careful. @@ -646,18 +682,18 @@ def append_rows(self, indexes, values): # check that the values data is less than or equal to the length of the indexes if len(values) != len(indexes): - raise ValueError('length of values is not equal to length of indexes') + raise ValueError("length of values is not equal to length of indexes") # check the indexes are not duplicates combined_index = self._index + indexes if len(set(combined_index)) != len(combined_index): - raise IndexError('duplicate indexes in Series') + raise IndexError("duplicate indexes in Series") # append index value self._index.extend(indexes) self._data.extend(values) - def delete(self, indexes): + def delete(self, indexes: Any | list | list[bool]) -> None: """ Delete rows from the DataFrame @@ -667,11 +703,14 @@ def delete(self, indexes): indexes = [indexes] if not self._check_list(indexes) else indexes if all([isinstance(i, bool) for i in indexes]): # boolean list if len(indexes) != len(self._index): - raise ValueError('boolean indexes list must be same size of existing indexes') + raise ValueError("boolean indexes list must be same size of existing indexes") indexes = [i for i, x in enumerate(indexes) if x] else: - indexes = [sorted_index(self._index, x) for x in indexes] if self._sort \ + indexes = ( + [sorted_index(self._index, x) for x in indexes] + if self._sort else [self._index.index(x) for x in indexes] + ) indexes = sorted(indexes, reverse=True) # need to sort and reverse list so deleting works for i in indexes: del self._data[i] @@ -679,14 +718,14 @@ def delete(self, indexes): for i in indexes: del self._index[i] - def reset_index(self): + def reset_index(self) -> None: """ Resets the index of the Series to simple integer list and the index name to 'index'. :return: nothing """ self.index = list(range(self.__len__())) - self.index_name = 'index' + self.index_name = "index" class ViewSeries(SeriesBase): @@ -697,7 +736,15 @@ class ViewSeries(SeriesBase): checking of the data, so it is assumed the data type is list-style. """ - def __init__(self, data=None, index=None, data_name='value', index_name='index', sort=False, offset=0): + def __init__( + self, + data: list | tuple | None = None, + index: list | None = None, + data_name: str | tuple | None = "value", + index_name: str | tuple | None = "index", + sort: bool = False, + offset: int = 0, + ): """ :param data: (optional) list of values. :param index: (optional) list of index values. If None then the index will be integers starting with zero @@ -713,9 +760,9 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index', # check inputs if index is None: - raise ValueError('Index cannot be None.') + raise ValueError("Index cannot be None.") if data is None: - raise ValueError('Data cannot be None.') + raise ValueError("Data cannot be None.") # standard variable setup self._data = data # direct view, no copy @@ -726,27 +773,27 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index', self._offset = offset @property - def data(self): + def data(self) -> list: return self._data @property - def index(self): + def index(self) -> list: return self._index @index.setter - def index(self, index_list): + def index(self, index_list: list) -> None: self._validate_index(index_list) self._index = index_list @property - def sort(self): + def sort(self) -> bool: return self._sort @property - def offset(self): + def offset(self) -> int: return self._offset - def value(self, indexes, int_as_index=False): + def value(self, indexes: int | Any | list | list[bool], int_as_index: bool = False) -> Any | list: """ Wrapper function for get. It will return a list, no index. If the indexes are integers it will be assumed that they are locations unless int_as_index = True. If the indexes are locations then they will be rotated to @@ -771,9 +818,9 @@ def value(self, indexes, int_as_index=False): stop = indexes.stop - self._offset + 1 # to capture the last value # check locations are valid and will not return empty if start > stop: - raise IndexError('end of slice is before start of slice') + raise IndexError("end of slice is before start of slice") if (start > 0 > stop) or (start < 0 < stop): - raise IndexError('slide indexes invalid with given offset:%f' % self._offset) + raise IndexError("slide indexes invalid with given offset:%f" % self._offset) # where end is the last element if (start < 0) and stop == 0: return self._data[start:] @@ -798,7 +845,7 @@ def value(self, indexes, int_as_index=False): else: return self.get(indexes) - def __getitem__(self, index): + def __getitem__(self, index: Any | list | slice) -> Self: """ Convenience wrapper around the value() method for using srs[]. This will treat all integers as locations @@ -815,7 +862,7 @@ def __getitem__(self, index): # Series creation functions @classmethod - def from_dataframe(cls, dataframe, column, offset=0): + def from_dataframe(cls, dataframe: DataFrame, column: str | tuple | None, offset: int = 0) -> Self: """ Creates and return a Series from a DataFrame and specific column @@ -824,11 +871,17 @@ def from_dataframe(cls, dataframe, column, offset=0): :param offset: offset value must be provided as there is no equivalent for a DataFrame :return: Series """ - return cls(data=dataframe.get_entire_column(column, as_list=True), index=dataframe.index, - data_name=column, index_name=dataframe.index_name, sort=dataframe.sort, offset=offset) + return cls( + data=dataframe.get_entire_column(column, as_list=True), + index=dataframe.index, + data_name=column, + index_name=dataframe.index_name, + sort=dataframe.sort, + offset=offset, + ) @classmethod - def from_series(cls, series, offset=0): + def from_series(cls, series: Series, offset: int = 0) -> Self: """ Creates and return a Series from a Series @@ -836,5 +889,11 @@ def from_series(cls, series, offset=0): :param offset: offset value must be provided as there is no equivalent for a DataFrame :return: Series """ - return cls(data=series.data, index=series.index, data_name=series.data_name, index_name=series.index_name, - sort=series.sort, offset=offset) + return cls( + data=series.data, + index=series.index, + data_name=series.data_name, + index_name=series.index_name, + sort=series.sort, + offset=offset, + ) diff --git a/raccoon/sort_utils.py b/raccoon/sort_utils.py index cf688d4..a5dee6e 100644 --- a/raccoon/sort_utils.py +++ b/raccoon/sort_utils.py @@ -3,9 +3,10 @@ """ from bisect import bisect_left, bisect_right +from typing import Any, Callable -def sorted_exists(values, x): +def sorted_exists(values: list, x: Any) -> tuple[bool, int]: """ For list, values, returns the insert position for item x and whether the item already exists in the list. This allows one function call to return either the index to overwrite an existing value in the list, or the index to @@ -21,7 +22,7 @@ def sorted_exists(values, x): return exists, i -def sorted_index(values, x): +def sorted_index(values: list, x: Any) -> int: """ For list, values, returns the index location of element x. If x does not exist will raise an error. @@ -34,7 +35,7 @@ def sorted_index(values, x): return values[i:j].index(x) + i -def sorted_list_indexes(list_to_sort, key=None, reverse=False): +def sorted_list_indexes(list_to_sort: list, key: Callable | Any = None, reverse: bool = False) -> list[int]: """ Sorts a list but returns the order of the index values of the list for the sort and not the values themselves. For example is the list provided is ['b', 'a', 'c'] then the result will be [2, 1, 3] @@ -46,6 +47,7 @@ def sorted_list_indexes(list_to_sort, key=None, reverse=False): :return: list of sorted index values """ if key is not None: + def key_func(i): return key(list_to_sort.__getitem__(i)) else: diff --git a/raccoon/utils.py b/raccoon/utils.py index f03deea..01918b6 100644 --- a/raccoon/utils.py +++ b/raccoon/utils.py @@ -5,7 +5,7 @@ import raccoon as rc -def assert_frame_equal(left, right, data_function=None, data_args=None): +def assert_frame_equal(left: rc.DataFrame, right: rc.DataFrame, data_function=None, data_args=None) -> None: """ For unit testing equality of two DataFrames. @@ -27,7 +27,9 @@ def assert_frame_equal(left, right, data_function=None, data_args=None): assert left.dropin == right.dropin -def assert_series_equal(left, right, data_function=None, data_args=None): +def assert_series_equal( + left: rc.Series | rc.ViewSeries, right: rc.Series | rc.ViewSeries, data_function=None, data_args=None +) -> None: """ For unit testing equality of two Series. diff --git a/tests/test_dataframe/test_dataframe.py b/tests/test_dataframe/test_dataframe.py index e325f21..1eb00ee 100644 --- a/tests/test_dataframe/test_dataframe.py +++ b/tests/test_dataframe/test_dataframe.py @@ -205,7 +205,7 @@ def test_input_data_mutability(): assert df.data != orig_data assert df.get(0, 'a') == [1, 11] - # using set to change the DataFrame data does not effect the input data + # using set to change the DataFrame data does not affect the input data df[1, 'a'] = [2, 22] assert input_data['a'] == [[1, 11], [2], [3]] diff --git a/tests/test_dataframe/test_delete.py b/tests/test_dataframe/test_delete.py index d664be4..813dabd 100644 --- a/tests/test_dataframe/test_delete.py +++ b/tests/test_dataframe/test_delete.py @@ -5,102 +5,102 @@ def test_delete_row(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) - df.delete_rows(['a', 'c']) - assert_frame_equal(df, rc.DataFrame({'a': [2], 'b': [5]}, columns=['b', 'a'], index=['b'])) + df.delete_rows(["a", "c"]) + assert_frame_equal(df, rc.DataFrame({"a": [2], "b": [5]}, columns=["b", "a"], index=["b"])) - df.delete_rows('b') - assert_frame_equal(df, rc.DataFrame(columns=['b', 'a'], sort=False)) + df.delete_rows("b") + assert_frame_equal(df, rc.DataFrame(columns=["b", "a"], sort=False)) # insert back in data - df[1, 'a'] = 9 + df[1, "a"] = 9 assert df.data == [[None], [9]] - df[2, 'b'] = 8 + df[2, "b"] = 8 assert df.data == [[None, 8], [9, None]] - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) # cannot delete values not in index with pytest.raises(ValueError): - df.delete_rows(['bad']) + df.delete_rows(["bad"]) # length of boolean must be len of index with pytest.raises(ValueError): df.delete_rows([True, False]) df.delete_rows([True, False, True]) - assert_frame_equal(df, rc.DataFrame({'a': [2], 'b': [5]}, columns=['b', 'a'], index=['b'])) + assert_frame_equal(df, rc.DataFrame({"a": [2], "b": [5]}, columns=["b", "a"], index=["b"])) df.delete_rows([True]) - assert_frame_equal(df, rc.DataFrame(columns=['b', 'a'], sort=False)) + assert_frame_equal(df, rc.DataFrame(columns=["b", "a"], sort=False)) def test_delete_row_sorted(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a'], sort=True) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"], sort=True) - df.delete_rows(['a', 'c']) - assert_frame_equal(df, rc.DataFrame({'a': [2], 'b': [5]}, columns=['b', 'a'], index=['b'], sort=True)) + df.delete_rows(["a", "c"]) + assert_frame_equal(df, rc.DataFrame({"a": [2], "b": [5]}, columns=["b", "a"], index=["b"], sort=True)) - df.delete_rows('b') - assert_frame_equal(df, rc.DataFrame(columns=['b', 'a'], sort=True)) + df.delete_rows("b") + assert_frame_equal(df, rc.DataFrame(columns=["b", "a"], sort=True)) # insert back in data - df[1, 'a'] = 9 + df[1, "a"] = 9 assert df.data == [[None], [9]] - df[2, 'b'] = 8 + df[2, "b"] = 8 assert df.data == [[None, 8], [9, None]] - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) # cannot delete values not in index with pytest.raises(ValueError): - df.delete_rows(['bad']) + df.delete_rows(["bad"]) # length of boolean must be len of index with pytest.raises(ValueError): df.delete_rows([True, False]) df.delete_rows([True, False, True]) - assert_frame_equal(df, rc.DataFrame({'a': [2], 'b': [5]}, columns=['b', 'a'], index=['b'])) + assert_frame_equal(df, rc.DataFrame({"a": [2], "b": [5]}, columns=["b", "a"], index=["b"])) df.delete_rows([True]) - assert_frame_equal(df, rc.DataFrame(columns=['b', 'a'], sort=False)) + assert_frame_equal(df, rc.DataFrame(columns=["b", "a"], sort=False)) def test_delete_all_rows(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) - link_col = df.get_entire_column('b', as_list=True) + link_col = df.get_entire_column("b", as_list=True) link_index = df.index df.delete_all_rows() - assert_frame_equal(df, rc.DataFrame(columns=['b', 'a'], sort=False)) + assert_frame_equal(df, rc.DataFrame(columns=["b", "a"], sort=False)) assert link_col == [] assert link_index == [] def test_delete_columns(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, columns=['a', 'b', 'c']) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, columns=["a", "b", "c"]) # cannot delete bad column with pytest.raises(ValueError): - df.delete_columns(['bad', 'a']) + df.delete_columns(["bad", "a"]) - df.delete_columns(['a', 'c']) - assert_frame_equal(df, rc.DataFrame({'b': [4, 5, 6]})) + df.delete_columns(["a", "c"]) + assert_frame_equal(df, rc.DataFrame({"b": [4, 5, 6]})) assert df.index == [0, 1, 2] # insert some data back in - df[1, 'a'] = 77 + df[1, "a"] = 77 assert df.data == [[4, 5, 6], [None, 77, None]] - df.delete_columns(['b', 'a']) + df.delete_columns(["b", "a"]) assert_frame_equal(df, rc.DataFrame()) assert df.columns == [] assert df.index == [] # insert some data back in, fresh columns and index - df[1, 'e'] = 77 + df[1, "e"] = 77 assert df.data == [[77]] diff --git a/tests/test_dataframe/test_get.py b/tests/test_dataframe/test_get.py index 982deb4..6b05cb8 100644 --- a/tests/test_dataframe/test_get.py +++ b/tests/test_dataframe/test_get.py @@ -5,127 +5,144 @@ def test_get_cell(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 13], columns=['a', 'b', 'c'], - sort=False) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 13], columns=["a", "b", "c"], sort=False + ) - assert actual.get(10, 'a') == 1 - assert actual.get(11, 'a') == 2 - assert actual.get(13, 'c') == 9 + assert actual.get(10, "a") == 1 + assert actual.get(11, "a") == 2 + assert actual.get(13, "c") == 9 # test items not in index raise errors with pytest.raises(ValueError): - actual.get(1, 'a') + actual.get(1, "a") with pytest.raises(ValueError): - actual.get(100, 'a') + actual.get(100, "a") with pytest.raises(ValueError): - actual.get(12, 'a') + actual.get(12, "a") def test_get_cell_sorted(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 13], columns=['a', 'b', 'c'], - sort=True) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 13], columns=["a", "b", "c"], sort=True + ) - assert actual.get(10, 'a') == 1 - assert actual.get(11, 'a') == 2 - assert actual.get(13, 'c') == 9 + assert actual.get(10, "a") == 1 + assert actual.get(11, "a") == 2 + assert actual.get(13, "c") == 9 # test items not in index raise errors with pytest.raises(ValueError): - actual.get(1, 'a') + actual.get(1, "a") with pytest.raises(ValueError): - actual.get(100, 'a') + actual.get(100, "a") with pytest.raises(ValueError): - actual.get(12, 'a') + actual.get(12, "a") def test_get_rows(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], - columns=['a', 'b', 'c'], index_name='start_10', sort=False) - - expected = rc.DataFrame({'c': [8, 9]}, index=[11, 12], index_name='start_10', sort=False) - actual = df.get([11, 12], 'c') + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=False, + ) + + expected = rc.DataFrame({"c": [8, 9]}, index=[11, 12], index_name="start_10", sort=False) + actual = df.get([11, 12], "c") assert_frame_equal(actual, expected) # test with boolean list - actual = df.get([False, True, True, False], 'c') + actual = df.get([False, True, True, False], "c") assert_frame_equal(actual, expected) # index out of order - expected = rc.DataFrame({'c': [None, 7]}, index=[99, 10], index_name='start_10', sort=False) - actual = df.get([99, 10], 'c') + expected = rc.DataFrame({"c": [None, 7]}, index=[99, 10], index_name="start_10", sort=False) + actual = df.get([99, 10], "c") assert_frame_equal(actual, expected) # get as a list - assert df.get([11, 12], 'c', as_list=True) == [8, 9] + assert df.get([11, 12], "c", as_list=True) == [8, 9] # get as a list - assert df.get([False, True, True, False], 'c', as_list=True) == [8, 9] + assert df.get([False, True, True, False], "c", as_list=True) == [8, 9] # get entire column - assert df.get(columns='b', as_list=True) == [4, 5, 6, 7] + assert df.get(columns="b", as_list=True) == [4, 5, 6, 7] # items not in index raise errors with pytest.raises(ValueError): - df.get([11, 88], 'c', as_list=True) + df.get([11, 88], "c", as_list=True) # not enough items in boolean list with pytest.raises(ValueError): - df.get([True, True], 'c') + df.get([True, True], "c") def test_get_rows_sorted(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], - columns=['a', 'b', 'c'], index_name='start_10', sort=True) - - expected = rc.DataFrame({'c': [8, 9]}, index=[11, 12], index_name='start_10', sort=True) - actual = df.get([11, 12], 'c') + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=True, + ) + + expected = rc.DataFrame({"c": [8, 9]}, index=[11, 12], index_name="start_10", sort=True) + actual = df.get([11, 12], "c") assert_frame_equal(actual, expected) # get as a list - assert df.get([11, 12], 'c', as_list=True) == [8, 9] + assert df.get([11, 12], "c", as_list=True) == [8, 9] # test with boolean list - actual = df.get([False, True, True, False], 'c') + actual = df.get([False, True, True, False], "c") assert_frame_equal(actual, expected) # index out of order - expected = rc.DataFrame({'c': [7, None]}, index=[10, 99], index_name='start_10', sort=True) - actual = df.get([99, 10], 'c') + expected = rc.DataFrame({"c": [7, None]}, index=[10, 99], index_name="start_10", sort=True) + actual = df.get([99, 10], "c") assert_frame_equal(actual, expected) # get as a list - assert df.get([False, True, True, False], 'c', as_list=True) == [8, 9] + assert df.get([False, True, True, False], "c", as_list=True) == [8, 9] # get entire column - assert df.get(columns='b', as_list=True) == [4, 5, 6, 7] + assert df.get(columns="b", as_list=True) == [4, 5, 6, 7] # items not in index raise errors with pytest.raises(ValueError): - df.get([11, 88], 'c', as_list=True) + df.get([11, 88], "c", as_list=True) # not enough items in boolean list with pytest.raises(ValueError): - df.get([True, True], 'c') + df.get([True, True], "c") def test_get_columns(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], - columns=['a', 'b', 'c'], index_name='start_10', sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=False, + ) # no columns given - expected = rc.DataFrame({'a': [4], 'b': [7], 'c': [None]}, index=[99], columns=['a', 'b', 'c'], - index_name='start_10', sort=False) + expected = rc.DataFrame( + {"a": [4], "b": [7], "c": [None]}, index=[99], columns=["a", "b", "c"], index_name="start_10", sort=False + ) actual = df.get_columns(99) assert_frame_equal(actual, expected) # specific columns - expected = rc.DataFrame({'a': [4], 'c': [None]}, index=[99], columns=['a', 'c'], index_name='start_10', - sort=False) - actual = df.get(99, ['a', 'c']) + expected = rc.DataFrame({"a": [4], "c": [None]}, index=[99], columns=["a", "c"], index_name="start_10", sort=False) + actual = df.get(99, ["a", "c"]) assert_frame_equal(actual, expected) # test with boolean list @@ -133,14 +150,13 @@ def test_get_columns(): assert_frame_equal(actual, expected) # columns out of order - expected = rc.DataFrame({'c': [8], 'b': [5]}, index=[11], columns=['c', 'b'], index_name='start_10', - sort=False) - actual = df.get(11, ['c', 'b']) + expected = rc.DataFrame({"c": [8], "b": [5]}, index=[11], columns=["c", "b"], index_name="start_10", sort=False) + actual = df.get(11, ["c", "b"]) assert_frame_equal(actual, expected) # as_dict - assert df.get(11, ['b', 'c'], as_dict=True) == {'start_10': 11, 'b': 5, 'c': 8} - assert df.get_columns(11, ['b', 'c'], as_dict=True) == {'start_10': 11, 'b': 5, 'c': 8} + assert df.get(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} + assert df.get_columns(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} # test boolean list not same length as columns with pytest.raises(ValueError): @@ -148,16 +164,20 @@ def test_get_columns(): # test index out of bounds with pytest.raises(ValueError): - df.get(88, ['a', 'c']) + df.get(88, ["a", "c"]) def test_get_columns_sorted(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], - columns=['a', 'b', 'c'], index_name='start_10', sort=True) - - expected = rc.DataFrame({'a': [4], 'c': [None]}, index=[99], columns=['a', 'c'], index_name='start_10', - sort=True) - actual = df.get(99, ['a', 'c']) + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=True, + ) + + expected = rc.DataFrame({"a": [4], "c": [None]}, index=[99], columns=["a", "c"], index_name="start_10", sort=True) + actual = df.get(99, ["a", "c"]) assert_frame_equal(actual, expected) # test with boolean list @@ -165,9 +185,8 @@ def test_get_columns_sorted(): assert_frame_equal(actual, expected) # columns out of order - expected = rc.DataFrame({'c': [8], 'b': [5]}, index=[11], columns=['c', 'b'], index_name='start_10', - sort=True) - actual = df.get(11, ['c', 'b']) + expected = rc.DataFrame({"c": [8], "b": [5]}, index=[11], columns=["c", "b"], index_name="start_10", sort=True) + actual = df.get(11, ["c", "b"]) assert_frame_equal(actual, expected) # test boolean list not same length as columns @@ -176,16 +195,22 @@ def test_get_columns_sorted(): # test index out of bounds with pytest.raises(ValueError): - df.get(88, ['a', 'c']) + df.get(88, ["a", "c"]) def test_get_matrix(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, index=['x', 'y', 'z'], - columns=['a', 'b', 'c', 'd'], index_name='letters', sort=False) - - expected = rc.DataFrame({'b': [4, 6], 'd': [10, 12]}, index=['x', 'z'], columns=['b', 'd'], index_name='letters', - sort=False) - actual = df.get(['x', 'z'], ['b', 'd']) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, + index=["x", "y", "z"], + columns=["a", "b", "c", "d"], + index_name="letters", + sort=False, + ) + + expected = rc.DataFrame( + {"b": [4, 6], "d": [10, 12]}, index=["x", "z"], columns=["b", "d"], index_name="letters", sort=False + ) + actual = df.get(["x", "z"], ["b", "d"]) assert_frame_equal(actual, expected) # test with booleans @@ -193,9 +218,10 @@ def test_get_matrix(): assert_frame_equal(actual, expected) # columns out of order - expected = rc.DataFrame({'d': [10, 12], 'c': [7, 9]}, index=['x', 'z'], columns=['d', 'c'], index_name='letters', - sort=False) - actual = df.get(['x', 'z'], ['d', 'c']) + expected = rc.DataFrame( + {"d": [10, 12], "c": [7, 9]}, index=["x", "z"], columns=["d", "c"], index_name="letters", sort=False + ) + actual = df.get(["x", "z"], ["d", "c"]) assert_frame_equal(actual, expected) # get everything @@ -212,20 +238,26 @@ def test_get_matrix(): # missing index with pytest.raises(ValueError): - df.get_matrix(['BAD', 'x'], ['a', 'b']) + df.get_matrix(["BAD", "x"], ["a", "b"]) # missing column with pytest.raises(ValueError): - df.get_matrix(['x', 'y'], ['a', 'b', 'BAD']) + df.get_matrix(["x", "y"], ["a", "b", "BAD"]) def test_get_matrix_sorted(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6], 'c': [8, 7, 9], 'd': [11, 10, 12]}, index=['y', 'x', 'z'], - columns=['a', 'b', 'c', 'd'], index_name='letters', sort=True) - - expected = rc.DataFrame({'b': [4, 6], 'd': [10, 12]}, index=['x', 'z'], columns=['b', 'd'], index_name='letters', - sort=True) - actual = df.get(['x', 'z'], ['b', 'd']) + df = rc.DataFrame( + {"a": [2, 1, 3], "b": [5, 4, 6], "c": [8, 7, 9], "d": [11, 10, 12]}, + index=["y", "x", "z"], + columns=["a", "b", "c", "d"], + index_name="letters", + sort=True, + ) + + expected = rc.DataFrame( + {"b": [4, 6], "d": [10, 12]}, index=["x", "z"], columns=["b", "d"], index_name="letters", sort=True + ) + actual = df.get(["x", "z"], ["b", "d"]) assert_frame_equal(actual, expected) # test with booleans @@ -233,9 +265,10 @@ def test_get_matrix_sorted(): assert_frame_equal(actual, expected) # columns out of order - expected = rc.DataFrame({'d': [10, 12], 'c': [7, 9]}, index=['x', 'z'], columns=['d', 'c'], index_name='letters', - sort=True) - actual = df.get(['x', 'z'], ['d', 'c']) + expected = rc.DataFrame( + {"d": [10, 12], "c": [7, 9]}, index=["x", "z"], columns=["d", "c"], index_name="letters", sort=True + ) + actual = df.get(["x", "z"], ["d", "c"]) assert_frame_equal(actual, expected) # get everything @@ -252,57 +285,57 @@ def test_get_matrix_sorted(): # missing index with pytest.raises(ValueError): - df.get_matrix(['BAD', 'x'], ['a', 'b']) + df.get_matrix(["BAD", "x"], ["a", "b"]) # missing column with pytest.raises(ValueError): - df.get_matrix(['x', 'y'], ['a', 'b', 'BAD']) + df.get_matrix(["x", "y"], ["a", "b", "BAD"]) def test_get_location(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) # forward indexing, all columns - assert_frame_equal(df.get_location(2), rc.DataFrame({'a': [3], 'b': [7]}, index=[6])) - assert df.get_location(2, as_dict=True) == {'index': 6, 'a': 3, 'b': 7} - assert df.get_location(2, as_dict=True, index=False) == {'a': 3, 'b': 7} + assert_frame_equal(df.get_location(2), rc.DataFrame({"a": [3], "b": [7]}, index=[6])) + assert df.get_location(2, as_dict=True) == {"index": 6, "a": 3, "b": 7} + assert df.get_location(2, as_dict=True, index=False) == {"a": 3, "b": 7} # reverse indexing, all columns - assert_frame_equal(df.get_location(-1), rc.DataFrame({'a': [4], 'b': [8]}, index=[8])) - assert df.get_location(-1, as_dict=True) == {'index': 8, 'a': 4, 'b': 8} - assert df.get_location(-1, as_dict=True, index=False) == {'a': 4, 'b': 8} + assert_frame_equal(df.get_location(-1), rc.DataFrame({"a": [4], "b": [8]}, index=[8])) + assert df.get_location(-1, as_dict=True) == {"index": 8, "a": 4, "b": 8} + assert df.get_location(-1, as_dict=True, index=False) == {"a": 4, "b": 8} # forward indexing, one column - assert_frame_equal(df.get_location(0, ['a']), rc.DataFrame({'a': [1]}, index=[2])) - assert df.get_location(0, ['a'], as_dict=True) == {'index': 2, 'a': 1} - assert df.get_location(0, ['a'], as_dict=True, index=False) == {'a': 1} + assert_frame_equal(df.get_location(0, ["a"]), rc.DataFrame({"a": [1]}, index=[2])) + assert df.get_location(0, ["a"], as_dict=True) == {"index": 2, "a": 1} + assert df.get_location(0, ["a"], as_dict=True, index=False) == {"a": 1} # reverse indexing, all columns - assert_frame_equal(df.get_location(-2, ['b']), rc.DataFrame({'b': [7]}, index=[6])) - assert df.get_location(-2, ['b'], as_dict=True) == {'index': 6, 'b': 7} - assert df.get_location(-2, ['b'], as_dict=True, index=False) == {'b': 7} + assert_frame_equal(df.get_location(-2, ["b"]), rc.DataFrame({"b": [7]}, index=[6])) + assert df.get_location(-2, ["b"], as_dict=True) == {"index": 6, "b": 7} + assert df.get_location(-2, ["b"], as_dict=True, index=False) == {"b": 7} # single value for column and not list returns just the value - assert df.get_location(1, 'b') == 6 + assert df.get_location(1, "b") == 6 def test_get_locations(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) # multi row, multi columns - assert_frame_equal(df.get_locations([0, 2]), rc.DataFrame({'a': [1, 3], 'b': [5, 7]}, index=[2, 6])) + assert_frame_equal(df.get_locations([0, 2]), rc.DataFrame({"a": [1, 3], "b": [5, 7]}, index=[2, 6])) # multiple rows, single columns - assert_frame_equal(df.get_locations([1, 3], 'a'), rc.DataFrame({'a': [2, 4]}, index=[4, 8])) - assert df.get_locations([0, 2], 'b', as_list=True) == [5, 7] + assert_frame_equal(df.get_locations([1, 3], "a"), rc.DataFrame({"a": [2, 4]}, index=[4, 8])) + assert df.get_locations([0, 2], "b", as_list=True) == [5, 7] # single row, multiple columns - assert_frame_equal(df.get_locations([2]), rc.DataFrame({'a': [3], 'b': [7]}, index=[6])) + assert_frame_equal(df.get_locations([2]), rc.DataFrame({"a": [3], "b": [7]}, index=[6])) def test_get_slice(): # fails for non-sort DataFrame - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) with pytest.raises(RuntimeError): df.get_slice(2, 4) @@ -310,198 +343,250 @@ def test_get_slice(): df = rc.DataFrame(sort=True) assert_frame_equal(df.get_slice(3, 3), rc.DataFrame(sort=True)) - df = rc.DataFrame(sort=True, columns=['a', 'b']) - assert_frame_equal(df.get_slice(3, 3), rc.DataFrame(sort=True, columns=['a', 'b'])) + df = rc.DataFrame(sort=True, columns=["a", "b"]) + assert_frame_equal(df.get_slice(3, 3), rc.DataFrame(sort=True, columns=["a", "b"])) # full DataFrame - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, columns=['a', 'b'], index=[2, 4, 6, 8], sort=True) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, columns=["a", "b"], index=[2, 4, 6, 8], sort=True) assert_frame_equal(df.get_slice(2, 8), df) assert_frame_equal(df.get_slice(1, 8), df) assert_frame_equal(df.get_slice(2, 10), df) assert_frame_equal(df.get_slice(1, 10), df) - assert_frame_equal(df.get_slice(4, 4, ['b']), rc.DataFrame({'b': [6]}, index=[4], sort=True)) - assert_frame_equal(df.get_slice(3, 4, ['b']), rc.DataFrame({'b': [6]}, index=[4], sort=True)) - assert_frame_equal(df.get_slice(4, 5, ['b']), rc.DataFrame({'b': [6]}, index=[4], sort=True)) - assert_frame_equal(df.get_slice(3, 5, ['b']), rc.DataFrame({'b': [6]}, index=[4], sort=True)) + assert_frame_equal(df.get_slice(4, 4, ["b"]), rc.DataFrame({"b": [6]}, index=[4], sort=True)) + assert_frame_equal(df.get_slice(3, 4, ["b"]), rc.DataFrame({"b": [6]}, index=[4], sort=True)) + assert_frame_equal(df.get_slice(4, 5, ["b"]), rc.DataFrame({"b": [6]}, index=[4], sort=True)) + assert_frame_equal(df.get_slice(3, 5, ["b"]), rc.DataFrame({"b": [6]}, index=[4], sort=True)) - assert_frame_equal(df.get_slice(4, 6, ['a']), rc.DataFrame({'a': [2, 3]}, index=[4, 6], sort=True)) - assert_frame_equal(df.get_slice(3, 6, ['a']), rc.DataFrame({'a': [2, 3]}, index=[4, 6], sort=True)) - assert_frame_equal(df.get_slice(4, 7, ['a']), rc.DataFrame({'a': [2, 3]}, index=[4, 6], sort=True)) - assert_frame_equal(df.get_slice(3, 7, ['a']), rc.DataFrame({'a': [2, 3]}, index=[4, 6], sort=True)) + assert_frame_equal(df.get_slice(4, 6, ["a"]), rc.DataFrame({"a": [2, 3]}, index=[4, 6], sort=True)) + assert_frame_equal(df.get_slice(3, 6, ["a"]), rc.DataFrame({"a": [2, 3]}, index=[4, 6], sort=True)) + assert_frame_equal(df.get_slice(4, 7, ["a"]), rc.DataFrame({"a": [2, 3]}, index=[4, 6], sort=True)) + assert_frame_equal(df.get_slice(3, 7, ["a"]), rc.DataFrame({"a": [2, 3]}, index=[4, 6], sort=True)) - assert_frame_equal(df.get_slice(None, 5, ['a']), rc.DataFrame({'a': [1, 2]}, index=[2, 4], sort=True)) - assert_frame_equal(df.get_slice(5, None, [True, False]), rc.DataFrame({'a': [3, 4]}, index=[6, 8], sort=True)) + assert_frame_equal(df.get_slice(None, 5, ["a"]), rc.DataFrame({"a": [1, 2]}, index=[2, 4], sort=True)) + assert_frame_equal(df.get_slice(5, None, [True, False]), rc.DataFrame({"a": [3, 4]}, index=[6, 8], sort=True)) # boolean column list not the right size with pytest.raises(ValueError): df.get_slice(5, None, [True]) - assert_frame_equal(df.get_slice(3, 3), rc.DataFrame({'a': [], 'b': []}, columns=['a', 'b'], sort=True)) - assert_frame_equal(df.get_slice(0, 0), rc.DataFrame({'a': [], 'b': []}, columns=['a', 'b'], sort=True)) - assert_frame_equal(df.get_slice(10, 10), rc.DataFrame({'a': [], 'b': []}, columns=['a', 'b'], sort=True)) + assert_frame_equal(df.get_slice(3, 3), rc.DataFrame({"a": [], "b": []}, columns=["a", "b"], sort=True)) + assert_frame_equal(df.get_slice(0, 0), rc.DataFrame({"a": [], "b": []}, columns=["a", "b"], sort=True)) + assert_frame_equal(df.get_slice(10, 10), rc.DataFrame({"a": [], "b": []}, columns=["a", "b"], sort=True)) def test_get_slice_as_dict(): # fails for non-sort DataFrame - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) with pytest.raises(RuntimeError): df.get_slice(2, 4) - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8], sort=True) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8], sort=True) - assert df.get_slice(2, 8, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) - assert df.get_slice(1, 8, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) - assert df.get_slice(2, 10, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) - assert df.get_slice(1, 10, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) + assert df.get_slice(2, 8, as_dict=True) == ([2, 4, 6, 8], {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + assert df.get_slice(1, 8, as_dict=True) == ([2, 4, 6, 8], {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + assert df.get_slice(2, 10, as_dict=True) == ([2, 4, 6, 8], {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + assert df.get_slice(1, 10, as_dict=True) == ([2, 4, 6, 8], {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - assert df.get_slice(4, 4, ['b'], as_dict=True) == ([4], {'b': [6]}) - assert df.get_slice(3, 4, ['b'], as_dict=True) == ([4], {'b': [6]}) - assert df.get_slice(4, 5, ['b'], as_dict=True) == ([4], {'b': [6]}) - assert df.get_slice(3, 5, ['b'], as_dict=True) == ([4], {'b': [6]}) + assert df.get_slice(4, 4, ["b"], as_dict=True) == ([4], {"b": [6]}) + assert df.get_slice(3, 4, ["b"], as_dict=True) == ([4], {"b": [6]}) + assert df.get_slice(4, 5, ["b"], as_dict=True) == ([4], {"b": [6]}) + assert df.get_slice(3, 5, ["b"], as_dict=True) == ([4], {"b": [6]}) - assert df.get_slice(4, 6, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) - assert df.get_slice(3, 6, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) - assert df.get_slice(4, 7, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) - assert df.get_slice(3, 7, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) + assert df.get_slice(4, 6, ["a"], as_dict=True) == ([4, 6], {"a": [2, 3]}) + assert df.get_slice(3, 6, ["a"], as_dict=True) == ([4, 6], {"a": [2, 3]}) + assert df.get_slice(4, 7, ["a"], as_dict=True) == ([4, 6], {"a": [2, 3]}) + assert df.get_slice(3, 7, ["a"], as_dict=True) == ([4, 6], {"a": [2, 3]}) - assert df.get_slice(None, 5, ['a'], as_dict=True) == ([2, 4], {'a': [1, 2]}) - assert df.get_slice(5, None, ['a'], as_dict=True) == ([6, 8], {'a': [3, 4]}) + assert df.get_slice(None, 5, ["a"], as_dict=True) == ([2, 4], {"a": [1, 2]}) + assert df.get_slice(5, None, ["a"], as_dict=True) == ([6, 8], {"a": [3, 4]}) - assert df.get_slice(3, 3, as_dict=True) == ([], {'a': [], 'b': []}) - assert df.get_slice(0, 0, as_dict=True) == ([], {'a': [], 'b': []}) - assert df.get_slice(10, 10, as_dict=True) == ([], {'a': [], 'b': []}) + assert df.get_slice(3, 3, as_dict=True) == ([], {"a": [], "b": []}) + assert df.get_slice(0, 0, as_dict=True) == ([], {"a": [], "b": []}) + assert df.get_slice(10, 10, as_dict=True) == ([], {"a": [], "b": []}) def test_get_square_brackets(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, columns=['a', 'b', 'c', 'd'], - sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, columns=["a", "b", "c", "d"], sort=False + ) # df['b'] -- get column - assert_frame_equal(df['b'], rc.DataFrame({'b': [4, 5, 6]}, sort=False)) + assert_frame_equal(df["b"], rc.DataFrame({"b": [4, 5, 6]}, sort=False)) # df[['a', 'b', c']] -- get columns - assert_frame_equal(df[['a', 'b', 'c']], rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, - columns=['a', 'b', 'c'], sort=False)) + assert_frame_equal( + df[["a", "b", "c"]], + rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, columns=["a", "b", "c"], sort=False), + ) - assert_frame_equal(df[['c', 'a']], rc.DataFrame({'c': [7, 8, 9], 'a': [1, 2, 3]}, columns=['c', 'a'], sort=False)) + assert_frame_equal(df[["c", "a"]], rc.DataFrame({"c": [7, 8, 9], "a": [1, 2, 3]}, columns=["c", "a"], sort=False)) # df[1, 'd'] -- get cell at index = 5, column = 'b' - assert df[1, 'd'] == 11 + assert df[1, "d"] == 11 # df[[0, 2]] -- get indexes = [0, 2] all columns - assert_frame_equal(df[[0, 2], df.columns], - rc.DataFrame({'a': [1, 3], 'b': [4, 6], 'c': [7, 9], 'd': [10, 12]}, - columns=['a', 'b', 'c', 'd'], index=[0, 2], sort=False)) - - assert_frame_equal(df[[2, 1], df.columns], - rc.DataFrame({'a': [3, 2], 'b': [6, 5], 'c': [9, 8], 'd': [12, 11]}, - columns=['a', 'b', 'c', 'd'], index=[2, 1], sort=False)) + assert_frame_equal( + df[[0, 2], df.columns], + rc.DataFrame( + {"a": [1, 3], "b": [4, 6], "c": [7, 9], "d": [10, 12]}, + columns=["a", "b", "c", "d"], + index=[0, 2], + sort=False, + ), + ) + + assert_frame_equal( + df[[2, 1], df.columns], + rc.DataFrame( + {"a": [3, 2], "b": [6, 5], "c": [9, 8], "d": [12, 11]}, + columns=["a", "b", "c", "d"], + index=[2, 1], + sort=False, + ), + ) # df[[0, 2], 'c'] -- get indexes = [4, 5], column = 'b' - assert_frame_equal(df[[0, 2], 'c'], rc.DataFrame({'c': [7, 9]}, index=[0, 2], sort=False)) + assert_frame_equal(df[[0, 2], "c"], rc.DataFrame({"c": [7, 9]}, index=[0, 2], sort=False)) - assert_frame_equal(df[[2, 0], 'c'], rc.DataFrame({'c': [9, 7]}, index=[2, 0], sort=False)) + assert_frame_equal(df[[2, 0], "c"], rc.DataFrame({"c": [9, 7]}, index=[2, 0], sort=False)) # df[[1, 2], ['a', 'd']] -- get indexes = [4, 5], columns = ['a', 'b'] - assert_frame_equal(df[[1, 2], ['a', 'd']], rc.DataFrame({'a': [2, 3], 'd': [11, 12]}, columns=['a', 'd'], - index=[1, 2], sort=False)) + assert_frame_equal( + df[[1, 2], ["a", "d"]], rc.DataFrame({"a": [2, 3], "d": [11, 12]}, columns=["a", "d"], index=[1, 2], sort=False) + ) - assert_frame_equal(df[[2, 0], ['d', 'a']], rc.DataFrame({'d': [12, 10], 'a': [3, 1]}, columns=['d', 'a'], - index=[2, 0], sort=False)) + assert_frame_equal( + df[[2, 0], ["d", "a"]], rc.DataFrame({"d": [12, 10], "a": [3, 1]}, columns=["d", "a"], index=[2, 0], sort=False) + ) def test_get_square_brackets_sorted(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, columns=['a', 'b', 'c', 'd'], - sort=True) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, columns=["a", "b", "c", "d"], sort=True + ) # df['b'] -- get column - assert_frame_equal(df['b'], rc.DataFrame({'b': [4, 5, 6]}, sort=True)) + assert_frame_equal(df["b"], rc.DataFrame({"b": [4, 5, 6]}, sort=True)) # df[['a', 'b', c']] -- get columns - assert_frame_equal(df[['a', 'b', 'c']], rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, - columns=['a', 'b', 'c'], sort=True)) + assert_frame_equal( + df[["a", "b", "c"]], + rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, columns=["a", "b", "c"], sort=True), + ) - assert_frame_equal(df[['c', 'a']], rc.DataFrame({'c': [7, 8, 9], 'a': [1, 2, 3]}, columns=['c', 'a'], sort=True)) + assert_frame_equal(df[["c", "a"]], rc.DataFrame({"c": [7, 8, 9], "a": [1, 2, 3]}, columns=["c", "a"], sort=True)) # df[1, 'd'] -- get cell at index = 5, column = 'b' - assert df[1, 'd'] == 11 + assert df[1, "d"] == 11 # df[[0, 2]] -- get indexes = [0, 2] all columns - assert_frame_equal(df[[0, 2], df.columns], - rc.DataFrame({'a': [1, 3], 'b': [4, 6], 'c': [7, 9], 'd': [10, 12]}, - columns=['a', 'b', 'c', 'd'], index=[0, 2], sort=True)) - - assert_frame_equal(df[[2, 1], df.columns], - rc.DataFrame({'a': [2, 3], 'b': [5, 6], 'c': [8, 9], 'd': [11, 12]}, - columns=['a', 'b', 'c', 'd'], index=[1, 2], sort=True)) + assert_frame_equal( + df[[0, 2], df.columns], + rc.DataFrame( + {"a": [1, 3], "b": [4, 6], "c": [7, 9], "d": [10, 12]}, + columns=["a", "b", "c", "d"], + index=[0, 2], + sort=True, + ), + ) + + assert_frame_equal( + df[[2, 1], df.columns], + rc.DataFrame( + {"a": [2, 3], "b": [5, 6], "c": [8, 9], "d": [11, 12]}, + columns=["a", "b", "c", "d"], + index=[1, 2], + sort=True, + ), + ) # df[[0, 2], 'c'] -- get indexes = [4, 5], column = 'b' - assert_frame_equal(df[[0, 2], 'c'], rc.DataFrame({'c': [7, 9]}, index=[0, 2], sort=True)) + assert_frame_equal(df[[0, 2], "c"], rc.DataFrame({"c": [7, 9]}, index=[0, 2], sort=True)) - assert_frame_equal(df[[2, 0], 'c'], rc.DataFrame({'c': [9, 7]}, index=[2, 0], sort=True)) + assert_frame_equal(df[[2, 0], "c"], rc.DataFrame({"c": [9, 7]}, index=[2, 0], sort=True)) # df[[1, 2], ['a', 'd']] -- get indexes = [4, 5], columns = ['a', 'b'] - assert_frame_equal(df[[1, 2], ['a', 'd']], rc.DataFrame({'a': [2, 3], 'd': [11, 12]}, columns=['a', 'd'], - index=[1, 2], sort=True)) + assert_frame_equal( + df[[1, 2], ["a", "d"]], rc.DataFrame({"a": [2, 3], "d": [11, 12]}, columns=["a", "d"], index=[1, 2], sort=True) + ) - assert_frame_equal(df[[2, 0], ['d', 'a']], rc.DataFrame({'d': [10, 12], 'a': [1, 3]}, columns=['d', 'a'], - index=[0, 2], sort=True)) + assert_frame_equal( + df[[2, 0], ["d", "a"]], rc.DataFrame({"d": [10, 12], "a": [1, 3]}, columns=["d", "a"], index=[0, 2], sort=True) + ) def test_get_slicer(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, columns=['a', 'b', 'c', 'd'], - sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, columns=["a", "b", "c", "d"], sort=False + ) # df[1:2] -- get slice from index 1 to 2, all columns - assert_frame_equal(df[1:2], - rc.DataFrame({'a': [2, 3], 'b': [5, 6], 'c': [8, 9], 'd': [11, 12]}, - columns=['a', 'b', 'c', 'd'], index=[1, 2], sort=False)) + assert_frame_equal( + df[1:2], + rc.DataFrame( + {"a": [2, 3], "b": [5, 6], "c": [8, 9], "d": [11, 12]}, + columns=["a", "b", "c", "d"], + index=[1, 2], + sort=False, + ), + ) # df[0:1, ['c', 'd']] -- get slice from index 0 to 1, columns ['c', 'd'] - assert_frame_equal(df[0:1, ['c', 'd']], rc.DataFrame({'c': [7, 8], 'd': [10, 11]}, - columns=['c', 'd'], index=[0, 1], sort=False)) + assert_frame_equal( + df[0:1, ["c", "d"]], rc.DataFrame({"c": [7, 8], "d": [10, 11]}, columns=["c", "d"], index=[0, 1], sort=False) + ) - assert_frame_equal(df[0:1, ['d', 'c']], rc.DataFrame({'d': [10, 11], 'c': [7, 8]}, - columns=['d', 'c'], index=[0, 1], sort=False)) + assert_frame_equal( + df[0:1, ["d", "c"]], rc.DataFrame({"d": [10, 11], "c": [7, 8]}, columns=["d", "c"], index=[0, 1], sort=False) + ) # df[1:1, 'c'] -- get slice 1 to 1 and column 'c' - assert_frame_equal(df[1:1, 'c'], rc.DataFrame({'c': [8]}, index=[1], sort=False)) + assert_frame_equal(df[1:1, "c"], rc.DataFrame({"c": [8]}, index=[1], sort=False)) # test indexes not in the range with pytest.raises(IndexError): - _ = df[4:5, 'c'] + _ = df[4:5, "c"] with pytest.raises(IndexError): - _ = df[0:8, 'c'] + _ = df[0:8, "c"] with pytest.raises(IndexError): - _ = df[2:1, 'c'] + _ = df[2:1, "c"] def test_get_slicer_sorted(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, columns=['a', 'b', 'c', 'd'], - sort=True) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, columns=["a", "b", "c", "d"], sort=True + ) # df[1:2] -- get slice from index 1 to 2, all columns - assert_frame_equal(df[1:2], - rc.DataFrame({'a': [2, 3], 'b': [5, 6], 'c': [8, 9], 'd': [11, 12]}, - columns=['a', 'b', 'c', 'd'], index=[1, 2], sort=True)) + assert_frame_equal( + df[1:2], + rc.DataFrame( + {"a": [2, 3], "b": [5, 6], "c": [8, 9], "d": [11, 12]}, + columns=["a", "b", "c", "d"], + index=[1, 2], + sort=True, + ), + ) # df[0:1, ['c', 'd']] -- get slice from index 0 to 1, columns ['c', 'd'] - assert_frame_equal(df[0:1, ['c', 'd']], rc.DataFrame({'c': [7, 8], 'd': [10, 11]}, - columns=['c', 'd'], index=[0, 1], sort=True)) + assert_frame_equal( + df[0:1, ["c", "d"]], rc.DataFrame({"c": [7, 8], "d": [10, 11]}, columns=["c", "d"], index=[0, 1], sort=True) + ) - assert_frame_equal(df[0:1, ['d', 'c']], rc.DataFrame({'d': [10, 11], 'c': [7, 8]}, - columns=['d', 'c'], index=[0, 1], sort=True)) + assert_frame_equal( + df[0:1, ["d", "c"]], rc.DataFrame({"d": [10, 11], "c": [7, 8]}, columns=["d", "c"], index=[0, 1], sort=True) + ) # df[1:1, 'c'] -- get slice 1 to 1 and column 'c' - assert_frame_equal(df[1:1, 'c'], rc.DataFrame({'c': [8]}, index=[1], sort=True)) + assert_frame_equal(df[1:1, "c"], rc.DataFrame({"c": [8]}, index=[1], sort=True)) # test indexes not in the range - assert_frame_equal(df[4:5], rc.DataFrame(columns=['a', 'b', 'c', 'd'], sort=True)) - assert_frame_equal(df[2:1], rc.DataFrame(columns=['a', 'b', 'c', 'd'], sort=True)) + assert_frame_equal(df[4:5], rc.DataFrame(columns=["a", "b", "c", "d"], sort=True)) + assert_frame_equal(df[2:1], rc.DataFrame(columns=["a", "b", "c", "d"], sort=True)) assert_frame_equal(df[0:8], df) assert_frame_equal(df[1.5:3.5], df.get_slice(1.5, 3.5)) diff --git a/tests/test_dataframe/test_getters.py b/tests/test_dataframe/test_getters.py index 6e2f6c8..8795d8d 100644 --- a/tests/test_dataframe/test_getters.py +++ b/tests/test_dataframe/test_getters.py @@ -4,32 +4,32 @@ def test_columns(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) names = actual.columns - assert names == ['b', 'a'] + assert names == ["b", "a"] assert isinstance(names, list) # test that a copy is returned - names.append('bad') - assert actual.columns == ['b', 'a'] + names.append("bad") + assert actual.columns == ["b", "a"] - actual.columns = ['new1', 'new2'] - assert actual.columns == ['new1', 'new2'] + actual.columns = ["new1", "new2"] + assert actual.columns == ["new1", "new2"] assert isinstance(actual.columns, list) with pytest.raises(ValueError): - actual.columns = ['list', 'too', 'long'] + actual.columns = ["list", "too", "long"] def test_index(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) result = actual.index - assert result == ['a', 'b', 'c'] + assert result == ["a", "b", "c"] assert isinstance(result, list) # test that a view is returned - result.append('bad') - assert actual.index == ['a', 'b', 'c', 'bad'] + result.append("bad") + assert actual.index == ["a", "b", "c", "bad"] actual.index = [9, 10, 11] assert actual.index == [9, 10, 11] @@ -39,17 +39,22 @@ def test_index(): with pytest.raises(ValueError): actual.index = [1, 3, 4, 5, 6] - assert actual.index_name == 'index' - actual.index_name = 'new name' - assert actual.index_name == 'new name' + assert actual.index_name == "index" + actual.index_name = "new name" + assert actual.index_name == "new name" - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], index_name='letters') - assert actual.index_name == 'letters' + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], index_name="letters") + assert actual.index_name == "letters" def test_get_index(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], - columns=['a', 'b', 'c'], index_name='start_10', sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=False, + ) # test that then using .index returns a view res = df.index @@ -59,7 +64,7 @@ def test_get_index(): def test_data(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) assert actual.data == [[4, 5, 6], [1, 2, 3]] # test shallow copy diff --git a/tests/test_dataframe/test_initialize.py b/tests/test_dataframe/test_initialize.py index ebf6577..39750ad 100644 --- a/tests/test_dataframe/test_initialize.py +++ b/tests/test_dataframe/test_initialize.py @@ -22,9 +22,9 @@ def test_default_empty_init(): assert isinstance(actual.data, list) assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) - actual = rc.DataFrame(columns=['a', 'b', 'c']) + actual = rc.DataFrame(columns=["a", "b", "c"]) assert actual.data == [[], [], []] - assert actual.columns == ['a', 'b', 'c'] + assert actual.columns == ["a", "b", "c"] assert actual.index == [] assert actual.sort is True assert isinstance(actual.index, list) @@ -32,9 +32,9 @@ def test_default_empty_init(): assert isinstance(actual.data, list) assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) - actual = rc.DataFrame(index=[1, 2, 3], columns=['a', 'b']) + actual = rc.DataFrame(index=[1, 2, 3], columns=["a", "b"]) assert actual.data == [[None, None, None], [None, None, None]] - assert actual.columns == ['a', 'b'] + assert actual.columns == ["a", "b"] assert actual.index == [1, 2, 3] assert actual.sort is False assert isinstance(actual.index, list) @@ -42,7 +42,7 @@ def test_default_empty_init(): assert isinstance(actual.data, list) assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) - actual = rc.DataFrame(index=[1, 2, 3], columns=['a', 'b'], sort=True) + actual = rc.DataFrame(index=[1, 2, 3], columns=["a", "b"], sort=True) assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.columns, list) @@ -52,9 +52,9 @@ def test_default_empty_init(): def test_default_init(): # solid matrix, no columns, no index - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) assert set(tuple(x) for x in actual.data) == {(1, 2, 3), (4, 5, 6)} - assert set(actual.columns) == {'a', 'b'} + assert set(actual.columns) == {"a", "b"} assert actual.index == [0, 1, 2] assert actual.sort is True assert isinstance(actual.index, list) @@ -63,11 +63,11 @@ def test_default_init(): assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # solid matrix, no columns, with index - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], index_name='letters') + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], index_name="letters") assert set(tuple(x) for x in actual.data) == {(1, 2, 3), (4, 5, 6)} - assert set(actual.columns) == {'a', 'b'} - assert actual.index == ['a', 'b', 'c'] - assert actual.index_name == 'letters' + assert set(actual.columns) == {"a", "b"} + assert actual.index == ["a", "b", "c"] + assert actual.index_name == "letters" assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.columns, list) @@ -75,10 +75,10 @@ def test_default_init(): assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # solid matrix, columns, index - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a']) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"]) assert actual.data == [[4, 5, 6], [1, 2, 3]] - assert actual.columns == ['b', 'a'] - assert actual.index == ['a', 'b', 'c'] + assert actual.columns == ["b", "a"] + assert actual.index == ["a", "b", "c"] assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.columns, list) @@ -86,8 +86,8 @@ def test_default_init(): assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # dict values are not lists - actual = rc.DataFrame({'a': 1, 'b': 2, 'c': [1, 2, 3]}, columns=['b', 'c', 'a']) - assert actual.columns == ['b', 'c', 'a'] + actual = rc.DataFrame({"a": 1, "b": 2, "c": [1, 2, 3]}, columns=["b", "c", "a"]) + assert actual.columns == ["b", "c", "a"] assert actual.index == [0, 1, 2] assert actual.data == [[2, None, None], [1, 2, 3], [1, None, None]] assert actual.sort is True @@ -99,28 +99,28 @@ def test_default_init(): def test_sorted_init(): # initialized with index defaults to False - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b'], index=[12, 11, 13]) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"], index=[12, 11, 13]) assert df.sort is False - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b'], index=[12, 11, 13], sort=True) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"], index=[12, 11, 13], sort=True) assert df.sort is True assert df.index == [11, 12, 13] assert df.data == [[1, 2, 3], [4, 5, 6]] # initialized with no index defaults to True - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b']) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"]) assert df.sort is True - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b'], sort=False) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"], sort=False) assert df.sort is False # if sort is true, but no index provided it will assume already in sort order - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b'], sort=True) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"], sort=True) assert df.sort is True assert df.index == [0, 1, 2] assert df.data == [[2, 1, 3], [5, 4, 6]] # start un-sort, then set to sort - df = rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, columns=['a', 'b'], index=[12, 11, 13], sort=False) + df = rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, columns=["a", "b"], index=[12, 11, 13], sort=False) assert df.sort is False assert df.index == [12, 11, 13] assert df.data == [[2, 1, 3], [5, 4, 6]] @@ -131,13 +131,13 @@ def test_sorted_init(): # mixed type index will bork on sort=True with pytest.raises(TypeError): - rc.DataFrame({'a': [2, 1, 3], 'b': [5, 4, 6]}, index=[1, 'b', 3], sort=True) + rc.DataFrame({"a": [2, 1, 3], "b": [5, 4, 6]}, index=[1, "b", 3], sort=True) def test_jagged_data(): - actual = rc.DataFrame({'a': [], 'b': [1], 'c': [1, 2], 'd': [1, 2, 3]}, columns=['a', 'b', 'c', 'd']) + actual = rc.DataFrame({"a": [], "b": [1], "c": [1, 2], "d": [1, 2, 3]}, columns=["a", "b", "c", "d"]) assert actual.data == [[None, None, None], [1, None, None], [1, 2, None], [1, 2, 3]] - assert actual.columns == ['a', 'b', 'c', 'd'] + assert actual.columns == ["a", "b", "c", "d"] assert actual.index == [0, 1, 2] assert actual.sort is True assert isinstance(actual.index, list) @@ -151,26 +151,26 @@ def test_bad_initialization(): # wrong number in index with pytest.raises(ValueError): - rc.DataFrame({'a': [1, 2, 3]}, index=[1]) + rc.DataFrame({"a": [1, 2, 3]}, index=[1]) # wrong number of columns with pytest.raises(ValueError): - rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a']) + rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a"]) with pytest.raises(ValueError): - rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b', 'c', 'TOO', 'MANY']) + rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b", "c", "TOO", "MANY"]) # columns does not match dict keys with pytest.raises(ValueError): - rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['BAD', 'VALUE']) + rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["BAD", "VALUE"]) # index is not a list with pytest.raises(TypeError): - rc.DataFrame({'a': [1]}, index=1) + rc.DataFrame({"a": [1]}, index=1) - # columns is not a list + # columns are not a list with pytest.raises(TypeError): - rc.DataFrame({'a': [1]}, columns='a') + rc.DataFrame({"a": [1]}, columns="a") # bad data type with pytest.raises(TypeError): diff --git a/tests/test_dataframe/test_iterators.py b/tests/test_dataframe/test_iterators.py index 31cff20..9f46376 100644 --- a/tests/test_dataframe/test_iterators.py +++ b/tests/test_dataframe/test_iterators.py @@ -4,13 +4,15 @@ def test_iterrows(): - df = rc.DataFrame({'first': [1, 2, 3, 4, 5], 'second': ['a', 2, 'b', None, 5]}) - - expected = [{'index': 0, 'first': 1, 'second': 'a'}, - {'index': 1, 'first': 2, 'second': 2}, - {'index': 2, 'first': 3, 'second': 'b'}, - {'index': 3, 'first': 4, 'second': None}, - {'index': 4, 'first': 5, 'second': 5}] + df = rc.DataFrame({"first": [1, 2, 3, 4, 5], "second": ["a", 2, "b", None, 5]}) + + expected = [ + {"index": 0, "first": 1, "second": "a"}, + {"index": 1, "first": 2, "second": 2}, + {"index": 2, "first": 3, "second": "b"}, + {"index": 3, "first": 4, "second": None}, + {"index": 4, "first": 5, "second": 5}, + ] actual = list() for x in df.iterrows(): actual.append(x) @@ -18,13 +20,15 @@ def test_iterrows(): assert actual == expected # index = False - df = rc.DataFrame({'first': [1, 2, 3, 4, 5], 'second': ['a', 2, 'b', None, 5]}) - - expected = [{'first': 1, 'second': 'a'}, - {'first': 2, 'second': 2}, - {'first': 3, 'second': 'b'}, - {'first': 4, 'second': None}, - {'first': 5, 'second': 5}] + df = rc.DataFrame({"first": [1, 2, 3, 4, 5], "second": ["a", 2, "b", None, 5]}) + + expected = [ + {"first": 1, "second": "a"}, + {"first": 2, "second": 2}, + {"first": 3, "second": "b"}, + {"first": 4, "second": None}, + {"first": 5, "second": 5}, + ] actual = list() for x in df.iterrows(index=False): actual.append(x) @@ -33,12 +37,12 @@ def test_iterrows(): def test_itertuples(): - df = rc.DataFrame({'first': [1, 2], 'second': ['a', 2]}, index=['hi', 'bye'], index_name='greet', - columns=['first', 'second']) + df = rc.DataFrame( + {"first": [1, 2], "second": ["a", 2]}, index=["hi", "bye"], index_name="greet", columns=["first", "second"] + ) - name_tup = namedtuple('Raccoon', ['greet', 'first', 'second']) - expected = [name_tup(greet='hi', first=1, second='a'), - name_tup(greet='bye', first=2, second=2)] + name_tup = namedtuple("Raccoon", ["greet", "first", "second"]) + expected = [name_tup(greet="hi", first=1, second="a"), name_tup(greet="bye", first=2, second=2)] actual = list() for x in df.itertuples(): actual.append(x) @@ -46,12 +50,12 @@ def test_itertuples(): assert actual == expected # index == False - df = rc.DataFrame({'first': [1, 2], 'second': ['a', 2]}, index=['hi', 'bye'], index_name='greet', - columns=['first', 'second']) + df = rc.DataFrame( + {"first": [1, 2], "second": ["a", 2]}, index=["hi", "bye"], index_name="greet", columns=["first", "second"] + ) - name_tup = namedtuple('Raccoon', ['first', 'second']) - expected = [name_tup(first=1, second='a'), - name_tup(first=2, second=2)] + name_tup = namedtuple("Raccoon", ["first", "second"]) + expected = [name_tup(first=1, second="a"), name_tup(first=2, second=2)] actual = list() for x in df.itertuples(index=False): actual.append(x) diff --git a/tests/test_dataframe/test_set.py b/tests/test_dataframe/test_set.py index 4edac56..5a7cec6 100644 --- a/tests/test_dataframe/test_set.py +++ b/tests/test_dataframe/test_set.py @@ -5,114 +5,134 @@ def test_set_cell(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=False) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=False + ) # change existing value - actual.set(11, 'b', 55) - assert actual.get(11, 'b') == 55 - actual.set(10, 'a', 11) - assert actual.get(10, 'a') == 11 - actual.set(10, 'c', 13) - assert actual.get(10, 'c') == 13 + actual.set(11, "b", 55) + assert actual.get(11, "b") == 55 + actual.set(10, "a", 11) + assert actual.get(10, "a") == 11 + actual.set(10, "c", 13) + assert actual.get(10, "c") == 13 assert actual.data == [[11, 2, 3], [4, 55, 6], [13, 8, 9]] # add a new row - actual.set(13, 'b', 14) + actual.set(13, "b", 14) assert actual.data == [[11, 2, 3, None], [4, 55, 6, 14], [13, 8, 9, None]] # add a new column - actual.set(13, 'd', 88) + actual.set(13, "d", 88) assert actual.data == [[11, 2, 3, None], [4, 55, 6, 14], [13, 8, 9, None], [None, None, None, 88]] # add a new row and column - actual.set(14, 'e', 999) - assert actual.data == [[11, 2, 3, None, None], [4, 55, 6, 14, None], [13, 8, 9, None, None], - [None, None, None, 88, None], [None, None, None, None, 999]] + actual.set(14, "e", 999) + assert actual.data == [ + [11, 2, 3, None, None], + [4, 55, 6, 14, None], + [13, 8, 9, None, None], + [None, None, None, 88, None], + [None, None, None, None, 999], + ] # add a new row note that index does not sort - actual.set(1, 'a', -100) - assert actual.data == [[11, 2, 3, None, None, -100], [4, 55, 6, 14, None, None], [13, 8, 9, None, None, None], - [None, None, None, 88, None, None], [None, None, None, None, 999, None]] + actual.set(1, "a", -100) + assert actual.data == [ + [11, 2, 3, None, None, -100], + [4, 55, 6, 14, None, None], + [13, 8, 9, None, None, None], + [None, None, None, 88, None, None], + [None, None, None, None, 999, None], + ] assert actual.index == [10, 11, 12, 13, 14, 1] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) def test_set_cell_sorted(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 12, 13], columns=['a', 'b', 'c'], - sort=True) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 12, 13], columns=["a", "b", "c"], sort=True + ) # change existing value - actual.set(12, 'b', 55) - assert actual.get(12, 'b') == 55 - actual.set(10, 'a', 11) - assert actual.get(10, 'a') == 11 - actual.set(10, 'c', 13) - assert actual.get(10, 'c') == 13 + actual.set(12, "b", 55) + assert actual.get(12, "b") == 55 + actual.set(10, "a", 11) + assert actual.get(10, "a") == 11 + actual.set(10, "c", 13) + assert actual.get(10, "c") == 13 assert actual.data == [[11, 2, 3], [4, 55, 6], [13, 8, 9]] # add a new row - actual.set(14, 'b', 14) + actual.set(14, "b", 14) assert actual.index == [10, 12, 13, 14] assert actual.data == [[11, 2, 3, None], [4, 55, 6, 14], [13, 8, 9, None]] - actual.set(11, 'a', -1) + actual.set(11, "a", -1) assert actual.index == [10, 11, 12, 13, 14] assert actual.data == [[11, -1, 2, 3, None], [4, None, 55, 6, 14], [13, None, 8, 9, None]] # add a new column - actual.set(13, 'd', 88) - assert actual.data == [[11, -1, 2, 3, None], [4, None, 55, 6, 14], [13, None, 8, 9, None], - [None, None, None, 88, None]] + actual.set(13, "d", 88) + assert actual.data == [ + [11, -1, 2, 3, None], + [4, None, 55, 6, 14], + [13, None, 8, 9, None], + [None, None, None, 88, None], + ] # add a new row and column - actual.set(15, 'e', 999) + actual.set(15, "e", 999) assert actual.index == [10, 11, 12, 13, 14, 15] - assert actual.data == [[11, -1, 2, 3, None, None], [4, None, 55, 6, 14, None], [13, None, 8, 9, None, None], - [None, None, None, 88, None, None], [None, None, None, None, None, 999]] + assert actual.data == [ + [11, -1, 2, 3, None, None], + [4, None, 55, 6, 14, None], + [13, None, 8, 9, None, None], + [None, None, None, 88, None, None], + [None, None, None, None, None, 999], + ] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # fails for mixed index type with pytest.raises(TypeError): - actual.set('Z', 'e', 60) + actual.set("Z", "e", 60) def test_set_row(): - actual = rc.DataFrame({'a': [1, 3], 'b': [4, 6], 'c': [7, 9]}, index=[10, 12], columns=['a', 'b', 'c'], - sort=True) + actual = rc.DataFrame({"a": [1, 3], "b": [4, 6], "c": [7, 9]}, index=[10, 12], columns=["a", "b", "c"], sort=True) # change existing row - actual.set(indexes=10, values={'a': 11, 'b': 44, 'c': 77}) + actual.set(indexes=10, values={"a": 11, "b": 44, "c": 77}) assert actual.data == [[11, 3], [44, 6], [77, 9]] - actual.set(indexes=12, values={'a': 33, 'b': 66, 'c': 99}) + actual.set(indexes=12, values={"a": 33, "b": 66, "c": 99}) assert actual.data == [[11, 33], [44, 66], [77, 99]] # insert new row in the middle - actual.set(indexes=11, values={'a': 22, 'b': 5, 'c': 88}) + actual.set(indexes=11, values={"a": 22, "b": 5, "c": 88}) assert actual.data == [[11, 22, 33], [44, 5, 66], [77, 88, 99]] # add a new row to end - actual.set(indexes=13, values={'a': 4, 'b': 7, 'c': 10}) + actual.set(indexes=13, values={"a": 4, "b": 7, "c": 10}) assert actual.data == [[11, 22, 33, 4], [44, 5, 66, 7], [77, 88, 99, 10]] - actual.set(indexes=14, values={'b': 8, 'c': 11}) + actual.set(indexes=14, values={"b": 8, "c": 11}) assert actual.data == [[11, 22, 33, 4, None], [44, 5, 66, 7, 8], [77, 88, 99, 10, 11]] assert actual.index == [10, 11, 12, 13, 14] # add a new row to beginning - actual.set(indexes=9, values={'a': -1, 'b': -2, 'c': -3}) + actual.set(indexes=9, values={"a": -1, "b": -2, "c": -3}) assert actual.data == [[-1, 11, 22, 33, 4, None], [-2, 44, 5, 66, 7, 8], [-3, 77, 88, 99, 10, 11]] assert actual.index == [9, 10, 11, 12, 13, 14] - actual.set(indexes=8, values={'b': -3, 'c': -4}) + actual.set(indexes=8, values={"b": -3, "c": -4}) assert actual.data == [[None, -1, 11, 22, 33, 4, None], [-3, -2, 44, 5, 66, 7, 8], [-4, -3, 77, 88, 99, 10, 11]] assert actual.index == [8, 9, 10, 11, 12, 13, 14] # bad column names with pytest.raises(ValueError): - actual.set(indexes=14, values={'a': 0, 'bad': 1}) + actual.set(indexes=14, values={"a": 0, "bad": 1}) # bad values type with pytest.raises(TypeError): @@ -120,31 +140,32 @@ def test_set_row(): def test_set_row_sorted(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=False) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=False + ) # change existing row - actual.set(indexes=10, values={'a': 11, 'b': 44, 'c': 77}) + actual.set(indexes=10, values={"a": 11, "b": 44, "c": 77}) assert actual.data == [[11, 2, 3], [44, 5, 6], [77, 8, 9]] - actual.set(indexes=12, values={'a': 33, 'b': 66, 'c': 99}) + actual.set(indexes=12, values={"a": 33, "b": 66, "c": 99}) assert actual.data == [[11, 2, 33], [44, 5, 66], [77, 8, 99]] # change subset of existing row - actual.set(indexes=11, values={'a': 22, 'c': 88}) + actual.set(indexes=11, values={"a": 22, "c": 88}) assert actual.data == [[11, 22, 33], [44, 5, 66], [77, 88, 99]] # add a new row - actual.set(indexes=13, values={'a': 4, 'b': 7, 'c': 10}) + actual.set(indexes=13, values={"a": 4, "b": 7, "c": 10}) assert actual.data == [[11, 22, 33, 4], [44, 5, 66, 7], [77, 88, 99, 10]] - actual.set(indexes=14, values={'b': 8, 'c': 11}) + actual.set(indexes=14, values={"b": 8, "c": 11}) assert actual.data == [[11, 22, 33, 4, None], [44, 5, 66, 7, 8], [77, 88, 99, 10, 11]] assert actual.index == [10, 11, 12, 13, 14] # bad column names with pytest.raises(ValueError): - actual.set(indexes=14, values={'a': 0, 'bad': 1}) + actual.set(indexes=14, values={"a": 0, "bad": 1}) # bad values type with pytest.raises(TypeError): @@ -152,218 +173,235 @@ def test_set_row_sorted(): def test_set_column(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=False) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=False + ) # change existing column - actual.set(columns='b', values=[44, 55, 66]) + actual.set(columns="b", values=[44, 55, 66]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9]] # add a new column - actual.set(columns='e', values=[10, 11, 12]) + actual.set(columns="e", values=[10, 11, 12]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9], [10, 11, 12]] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # not enough values with pytest.raises(ValueError): - actual.set(columns='e', values=[1, 2]) + actual.set(columns="e", values=[1, 2]) # number of values must equal number of True indexes with pytest.raises(ValueError): - actual.set(indexes=[True, False, True], columns='e', values=[1, 2, 3]) + actual.set(indexes=[True, False, True], columns="e", values=[1, 2, 3]) # too many values with pytest.raises(ValueError): - actual.set(columns='e', values=[1, 2, 3, 4]) + actual.set(columns="e", values=[1, 2, 3, 4]) def test_set_column_sorted(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=True) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=True + ) # change existing column - actual.set(columns='b', values=[44, 55, 66]) + actual.set(columns="b", values=[44, 55, 66]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9]] # add a new column - actual.set(columns='e', values=[10, 11, 12]) + actual.set(columns="e", values=[10, 11, 12]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9], [10, 11, 12]] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # not enough values with pytest.raises(ValueError): - actual.set(columns='e', values=[1, 2]) + actual.set(columns="e", values=[1, 2]) # too many values with pytest.raises(ValueError): - actual.set(columns='e', values=[1, 2, 3, 4]) + actual.set(columns="e", values=[1, 2, 3, 4]) def test_set_col_index_subset(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=False) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=False + ) # by index value - actual.set(columns='b', indexes=[12, 11, 10], values=[66, 55, 44]) + actual.set(columns="b", indexes=[12, 11, 10], values=[66, 55, 44]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9]] - actual.set(columns='a', indexes=[12, 10], values=[33, 11]) + actual.set(columns="a", indexes=[12, 10], values=[33, 11]) assert actual.data == [[11, 2, 33], [44, 55, 66], [7, 8, 9]] # new rows - actual.set(columns='c', indexes=[12, 13, 14], values=[120, 130, 140]) + actual.set(columns="c", indexes=[12, 13, 14], values=[120, 130, 140]) assert actual.data == [[11, 2, 33, None, None], [44, 55, 66, None, None], [7, 8, 120, 130, 140]] assert actual.index == [10, 11, 12, 13, 14] # new row new columns - actual.set(columns='z', indexes=[14, 15, 16], values=['zoo', 'boo', 'hoo']) - assert actual.data == [[11, 2, 33, None, None, None, None], [44, 55, 66, None, None, None, None], - [7, 8, 120, 130, 140, None, None], [None, None, None, None, 'zoo', 'boo', 'hoo']] + actual.set(columns="z", indexes=[14, 15, 16], values=["zoo", "boo", "hoo"]) + assert actual.data == [ + [11, 2, 33, None, None, None, None], + [44, 55, 66, None, None, None, None], + [7, 8, 120, 130, 140, None, None], + [None, None, None, None, "zoo", "boo", "hoo"], + ] assert actual.index == [10, 11, 12, 13, 14, 15, 16] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # values list shorter than indexes, raise error with pytest.raises(ValueError): - actual.set(indexes=[10, 11], columns='a', values=[1]) + actual.set(indexes=[10, 11], columns="a", values=[1]) # by boolean list - actual = rc.DataFrame({'c': [1, 2], 'a': [4, 5], 'b': [7, 8]}, index=['first', 'second'], columns=['a', 'b', 'c'], - sort=False) - actual.set(columns='c', indexes=[False, True], values=[99]) + actual = rc.DataFrame( + {"c": [1, 2], "a": [4, 5], "b": [7, 8]}, index=["first", "second"], columns=["a", "b", "c"], sort=False + ) + actual.set(columns="c", indexes=[False, True], values=[99]) assert actual.data == [[4, 5], [7, 8], [1, 99]] # boolean list not size of existing index with pytest.raises(ValueError): - actual.set(indexes=[True, False, True], columns='a', values=[1, 2]) + actual.set(indexes=[True, False, True], columns="a", values=[1, 2]) # boolean list True entries not same size as values list with pytest.raises(ValueError): - actual.set(indexes=[True, True, False], columns='b', values=[4, 5, 6]) + actual.set(indexes=[True, True, False], columns="b", values=[4, 5, 6]) with pytest.raises(ValueError): - actual.set(indexes=[True, True, False], columns='b', values=[4]) + actual.set(indexes=[True, True, False], columns="b", values=[4]) def test_set_col_index_subset_sorted(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=True) + actual = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=True + ) # by index value - actual.set(columns='b', indexes=[12, 11, 10], values=[66, 55, 44]) + actual.set(columns="b", indexes=[12, 11, 10], values=[66, 55, 44]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9]] - actual.set(columns='a', indexes=[12, 10], values=[33, 11]) + actual.set(columns="a", indexes=[12, 10], values=[33, 11]) assert actual.data == [[11, 2, 33], [44, 55, 66], [7, 8, 9]] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # new rows at end - actual.set(columns='c', indexes=[12, 14, 15], values=[120, 130, 140]) + actual.set(columns="c", indexes=[12, 14, 15], values=[120, 130, 140]) assert actual.data == [[11, 2, 33, None, None], [44, 55, 66, None, None], [7, 8, 120, 130, 140]] assert actual.index == [10, 11, 12, 14, 15] # new rows at beginning - actual.set(columns='a', indexes=[10, 4, 5], values=[-140, -120, -130]) - assert actual.data == [[-120, -130, -140, 2, 33, None, None], - [None, None, 44, 55, 66, None, None], - [None, None, 7, 8, 120, 130, 140]] + actual.set(columns="a", indexes=[10, 4, 5], values=[-140, -120, -130]) + assert actual.data == [ + [-120, -130, -140, 2, 33, None, None], + [None, None, 44, 55, 66, None, None], + [None, None, 7, 8, 120, 130, 140], + ] assert actual.index == [4, 5, 10, 11, 12, 14, 15] # new rows in middle - actual.set(columns='b', indexes=[13, 6], values=[3131, 6060]) - assert actual.data == [[-120, -130, None, -140, 2, 33, None, None, None], - [None, None, 6060, 44, 55, 66, 3131, None, None], - [None, None, None, 7, 8, 120, None, 130, 140]] + actual.set(columns="b", indexes=[13, 6], values=[3131, 6060]) + assert actual.data == [ + [-120, -130, None, -140, 2, 33, None, None, None], + [None, None, 6060, 44, 55, 66, 3131, None, None], + [None, None, None, 7, 8, 120, None, 130, 140], + ] assert actual.index == [4, 5, 6, 10, 11, 12, 13, 14, 15] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # new row new columns - actual.set(columns='z', indexes=[14, 15, 16], values=['zoo', 'boo', 'hoo']) - assert actual.data == [[-120, -130, None, -140, 2, 33, None, None, None, None], - [None, None, 6060, 44, 55, 66, 3131, None, None, None], - [None, None, None, 7, 8, 120, None, 130, 140, None], - [None, None, None, None, None, None, None, 'zoo', 'boo', 'hoo']] + actual.set(columns="z", indexes=[14, 15, 16], values=["zoo", "boo", "hoo"]) + assert actual.data == [ + [-120, -130, None, -140, 2, 33, None, None, None, None], + [None, None, 6060, 44, 55, 66, 3131, None, None, None], + [None, None, None, 7, 8, 120, None, 130, 140, None], + [None, None, None, None, None, None, None, "zoo", "boo", "hoo"], + ] assert actual.index == [4, 5, 6, 10, 11, 12, 13, 14, 15, 16] - assert actual.columns == ['a', 'b', 'c', 'z'] + assert actual.columns == ["a", "b", "c", "z"] assert all([isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # values list shorter than indexes, raise error with pytest.raises(ValueError): - actual.set(indexes=[10, 11], columns='a', values=[1]) + actual.set(indexes=[10, 11], columns="a", values=[1]) # by boolean list - actual = rc.DataFrame({'c': [1, 2], 'a': [4, 5], 'b': [7, 8]}, index=['first', 'second'], columns=['a', 'b', 'c'], - sort=True) - actual.set(columns='c', indexes=[False, True], values=[99]) + actual = rc.DataFrame( + {"c": [1, 2], "a": [4, 5], "b": [7, 8]}, index=["first", "second"], columns=["a", "b", "c"], sort=True + ) + actual.set(columns="c", indexes=[False, True], values=[99]) assert actual.data == [[4, 5], [7, 8], [1, 99]] # boolean list not size of existing index with pytest.raises(ValueError): - actual.set(indexes=[True, False, True], columns='a', values=[1, 2]) + actual.set(indexes=[True, False, True], columns="a", values=[1, 2]) # boolean list True entries not same size as values list with pytest.raises(ValueError): - actual.set(indexes=[True, True, False], columns='b', values=[4, 5, 6]) + actual.set(indexes=[True, True, False], columns="b", values=[4, 5, 6]) with pytest.raises(ValueError): - actual.set(indexes=[True, True, False], columns='b', values=[4]) + actual.set(indexes=[True, True, False], columns="b", values=[4]) def test_set_single_value(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[10, 11, 12], columns=['a', 'b', 'c'], - sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=[10, 11, 12], columns=["a", "b", "c"], sort=False + ) # set multiple index to one value - df.set([10, 12], 'a', 99) + df.set([10, 12], "a", 99) assert df.data == [[99, 2, 99], [4, 5, 6], [7, 8, 9]] # set entire column to one value - df.set(columns='c', values=88) + df.set(columns="c", values=88) assert df.data == [[99, 2, 99], [4, 5, 6], [88, 88, 88]] # can be anything that isn't a list - df.set(columns='e', values={1, 2, 3}) + df.set(columns="e", values={1, 2, 3}) assert df.data == [[99, 2, 99], [4, 5, 6], [88, 88, 88], [{1, 2, 3}, {1, 2, 3}, {1, 2, 3}]] def test_set_location(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) - df.set_location(2, {'a': -3}) - assert_frame_equal(df, rc.DataFrame({'a': [1, 2, -3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8])) + df.set_location(2, {"a": -3}) + assert_frame_equal(df, rc.DataFrame({"a": [1, 2, -3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8])) - df.set_location(3, {'a': -10, 'b': -88}) - assert_frame_equal(df, rc.DataFrame({'a': [1, 2, -3, -10], 'b': [5, 6, 7, -88]}, index=[2, 4, 6, 8])) + df.set_location(3, {"a": -10, "b": -88}) + assert_frame_equal(df, rc.DataFrame({"a": [1, 2, -3, -10], "b": [5, 6, 7, -88]}, index=[2, 4, 6, 8])) - df.set_location(0, {'b': -55}, missing_to_none=True) - assert_frame_equal(df, rc.DataFrame({'a': [None, 2, -3, -10], 'b': [-55, 6, 7, -88]}, index=[2, 4, 6, 8])) + df.set_location(0, {"b": -55}, missing_to_none=True) + assert_frame_equal(df, rc.DataFrame({"a": [None, 2, -3, -10], "b": [-55, 6, 7, -88]}, index=[2, 4, 6, 8])) # location out of bounds with pytest.raises(IndexError): - df.set_location(99, {'a': 10}) + df.set_location(99, {"a": 10}) def test_set_locations(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) - df.set_locations([0, 2], 'a', [-1, -3]) - assert_frame_equal(df, rc.DataFrame({'a': [-1, 2, -3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8])) + df.set_locations([0, 2], "a", [-1, -3]) + assert_frame_equal(df, rc.DataFrame({"a": [-1, 2, -3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8])) - df.set_locations([1, 3], 'a', -10) - assert_frame_equal(df, rc.DataFrame({'a': [-1, -10, -3, -10], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8])) + df.set_locations([1, 3], "a", -10) + assert_frame_equal(df, rc.DataFrame({"a": [-1, -10, -3, -10], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8])) def test_set_from_blank_df(): # single cell df = rc.DataFrame(sort=False) - df.set(indexes=1, columns='a', values=9) - assert df.columns == ['a'] + df.set(indexes=1, columns="a", values=9) + assert df.columns == ["a"] assert df.index == [1] assert df.data == [[9]] # single column df = rc.DataFrame(sort=False) - df.set(indexes=[1, 2, 3], columns='a', values=[9, 10, 11]) - assert df.columns == ['a'] + df.set(indexes=[1, 2, 3], columns="a", values=[9, 10, 11]) + assert df.columns == ["a"] assert df.index == [1, 2, 3] assert df.data == [[9, 10, 11]] @@ -375,84 +413,107 @@ def test_set_from_blank_df(): def test_set_square_brackets(): df = rc.DataFrame(sort=False) - df[1, 'a'] = 2 + df[1, "a"] = 2 assert df.data == [[2]] # df[[0, 3], 'b'] - - set index = [0, 3], column = b - df[[0, 3], 'b'] = 4 + df[[0, 3], "b"] = 4 assert df.data == [[2, None, None], [None, 4, 4]] # df[1:2, 'b'] - - set index slice 1:2, column = b - df[1:3, 'b'] = 5 + df[1:3, "b"] = 5 assert df.data == [[2, None, None], [5, 5, 5]] assert df.sort is False # with sort = True df = rc.DataFrame(sort=True) - df[1, 'a'] = 2 + df[1, "a"] = 2 assert df.data == [[2]] # df[[0, 3], 'b'] - - set index = [0, 3], column = b - df[[0, 3], 'b'] = 4 + df[[0, 3], "b"] = 4 assert df.data == [[None, 2, None], [4, None, 4]] # df[1:2, 'b'] - - set index slice 1:2, column = b - df[1:3, 'b'] = 5 + df[1:3, "b"] = 5 assert df.data == [[None, 2, None], [4, 5, 5]] assert df.sort is True def test_append_row(): - actual = rc.DataFrame({'a': [1, 3], 'b': [4, 6], 'c': [7, 9]}, index=[10, 12], columns=['a', 'b', 'c']) + actual = rc.DataFrame({"a": [1, 3], "b": [4, 6], "c": [7, 9]}, index=[10, 12], columns=["a", "b", "c"]) # append row with new columns, ignore new columns - actual.append_row(14, {'a': 10, 'c': 13, 'd': 99}, new_cols=False) - expected = rc.DataFrame({'a': [1, 3, 10], 'b': [4, 6, None], 'c': [7, 9, 13]}, index=[10, 12, 14], - columns=['a', 'b', 'c']) + actual.append_row(14, {"a": 10, "c": 13, "d": 99}, new_cols=False) + expected = rc.DataFrame( + {"a": [1, 3, 10], "b": [4, 6, None], "c": [7, 9, 13]}, index=[10, 12, 14], columns=["a", "b", "c"] + ) assert_frame_equal(actual, expected) # append row with new columns, add new columns - actual.append_row(16, {'a': 14, 'b': 15, 'd': 100}) - expected = rc.DataFrame({'a': [1, 3, 10, 14], 'b': [4, 6, None, 15], 'c': [7, 9, 13, None], - 'd': [None, None, None, 100]}, index=[10, 12, 14, 16], columns=['a', 'b', 'c', 'd']) + actual.append_row(16, {"a": 14, "b": 15, "d": 100}) + expected = rc.DataFrame( + {"a": [1, 3, 10, 14], "b": [4, 6, None, 15], "c": [7, 9, 13, None], "d": [None, None, None, 100]}, + index=[10, 12, 14, 16], + columns=["a", "b", "c", "d"], + ) assert_frame_equal(actual, expected) # try to append existing row with pytest.raises(IndexError): - actual.append_row(10, {'a': 9}) + actual.append_row(10, {"a": 9}) def test_append_rows(): - actual = rc.DataFrame({'a': [1, 3], 'b': [4, 6], 'c': [7, 9]}, index=[10, 12], columns=['a', 'b', 'c']) + actual = rc.DataFrame({"a": [1, 3], "b": [4, 6], "c": [7, 9]}, index=[10, 12], columns=["a", "b", "c"]) # append rows with new columns, ignore new columns - actual.append_rows([14, 15], {'a': [10, 11], 'c': [13, 14], 'd': [99, 100]}, new_cols=False) - expected = rc.DataFrame({'a': [1, 3, 10, 11], 'b': [4, 6, None, None], 'c': [7, 9, 13, 14]}, - index=[10, 12, 14, 15], columns=['a', 'b', 'c']) + actual.append_rows([14, 15], {"a": [10, 11], "c": [13, 14], "d": [99, 100]}, new_cols=False) + expected = rc.DataFrame( + {"a": [1, 3, 10, 11], "b": [4, 6, None, None], "c": [7, 9, 13, 14]}, + index=[10, 12, 14, 15], + columns=["a", "b", "c"], + ) assert_frame_equal(actual, expected) # append row with new columns, add new columns - actual.append_rows([16, 17], {'a': [14, 15], 'b': [15, 16], 'd': [100, 101]}) - expected = rc.DataFrame({'a': [1, 3, 10, 11, 14, 15], 'b': [4, 6, None, None, 15, 16], - 'c': [7, 9, 13, 14, None, None], 'd': [None, None, None, None, 100, 101]}, - index=[10, 12, 14, 15, 16, 17], columns=['a', 'b', 'c', 'd']) + actual.append_rows([16, 17], {"a": [14, 15], "b": [15, 16], "d": [100, 101]}) + expected = rc.DataFrame( + { + "a": [1, 3, 10, 11, 14, 15], + "b": [4, 6, None, None, 15, 16], + "c": [7, 9, 13, 14, None, None], + "d": [None, None, None, None, 100, 101], + }, + index=[10, 12, 14, 15, 16, 17], + columns=["a", "b", "c", "d"], + ) assert_frame_equal(actual, expected) # try to append existing row with pytest.raises(IndexError): - actual.append_rows([10, 11], {'a': [8, 9]}) + actual.append_rows([10, 11], {"a": [8, 9]}) with pytest.raises(ValueError): - actual.append_rows([16, 17], {'a': [14, 15, 999]}) + actual.append_rows([16, 17], {"a": [14, 15, 999]}) def test_bar(): - df = rc.DataFrame(columns=['datetime', 'open', 'high', 'low', 'close', 'volume'], sort=True) + df = rc.DataFrame(columns=["datetime", "open", "high", "low", "close", "volume"], sort=True) for x in range(10): - df.set(indexes=x, values={'datetime': '2001-01-01', 'open': 100.0, 'high': 101.0, 'low': 99.5, - 'close': 99.75, 'volume': 10000}) + df.set( + indexes=x, + values={ + "datetime": "2001-01-01", + "open": 100.0, + "high": 101.0, + "low": 99.5, + "close": 99.75, + "volume": 10000, + }, + ) assert df.index == list(range(10)) - assert df.columns == ['datetime', 'open', 'high', 'low', 'close', 'volume'] - assert df.data[0] == ['2001-01-01'] * 10 + assert df.columns == ["datetime", "open", "high", "low", "close", "volume"] + assert df.data[0] == ["2001-01-01"] * 10 diff --git a/tests/test_dataframe/test_sort.py b/tests/test_dataframe/test_sort.py index 2928374..35b456b 100644 --- a/tests/test_dataframe/test_sort.py +++ b/tests/test_dataframe/test_sort.py @@ -6,69 +6,78 @@ def test_sort_index(): # test on list - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[10, 8, 9], sort=False) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[10, 8, 9], sort=False) df.sort_index() assert isinstance(df.index, list) - assert_frame_equal(df, rc.DataFrame({'a': [2, 3, 1], 'b': [5, 6, 4]}, columns=['a', 'b'], index=[8, 9, 10], - sort=False)) + assert_frame_equal( + df, rc.DataFrame({"a": [2, 3, 1], "b": [5, 6, 4]}, columns=["a", "b"], index=[8, 9, 10], sort=False) + ) # fails on mixed type columns - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[10, 'a', 9]) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[10, "a", 9]) with pytest.raises(TypeError): df.sort_index() def test_sort_multi_index(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[(10, 'c'), (10, 'a'), (10, 'b')], - sort=False) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[(10, "c"), (10, "a"), (10, "b")], sort=False + ) df.sort_index() assert isinstance(df.index, list) - assert_frame_equal(df, rc.DataFrame({'a': [2, 3, 1], 'b': [5, 6, 4]}, columns=['a', 'b'], - index=[(10, 'a'), (10, 'b'), (10, 'c')], sort=False)) + assert_frame_equal( + df, + rc.DataFrame( + {"a": [2, 3, 1], "b": [5, 6, 4]}, columns=["a", "b"], index=[(10, "a"), (10, "b"), (10, "c")], sort=False + ), + ) # fails on mixed type columns - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[(10, 'c'), 'a', (10, 'b')]) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[(10, "c"), "a", (10, "b")]) with pytest.raises(TypeError): df.sort_index() def test_sort_column(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) # cannot sort multiple columns with pytest.raises(TypeError): - df.sort_columns(['a', 'b']) + df.sort_columns(["a", "b"]) - df.sort_columns('a') + df.sort_columns("a") assert isinstance(df.index, list) - assert_frame_equal(df, rc.DataFrame({'a': [1, 2, 3], 'b': ['c', 'a', 'b']}, columns=['a', 'b'], index=[8, 10, 9])) + assert_frame_equal(df, rc.DataFrame({"a": [1, 2, 3], "b": ["c", "a", "b"]}, columns=["a", "b"], index=[8, 10, 9])) - df.sort_columns('b') - assert_frame_equal(df, rc.DataFrame({'a': [2, 3, 1], 'b': ['a', 'b', 'c']}, columns=['a', 'b'], index=[10, 9, 8])) + df.sort_columns("b") + assert_frame_equal(df, rc.DataFrame({"a": [2, 3, 1], "b": ["a", "b", "c"]}, columns=["a", "b"], index=[10, 9, 8])) - df.sort_columns('b', reverse=True) - assert_frame_equal(df, rc.DataFrame({'a': [1, 3, 2], 'b': ['c', 'b', 'a']}, columns=['a', 'b'], index=[8, 9, 10])) + df.sort_columns("b", reverse=True) + assert_frame_equal(df, rc.DataFrame({"a": [1, 3, 2], "b": ["c", "b", "a"]}, columns=["a", "b"], index=[8, 9, 10])) def test_sort_column_w_key(): - df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']}, columns=['a', 'b'], index=[8, 9, 10, 11]) + df = rc.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}, columns=["a", "b"], index=[8, 9, 10, 11]) # No key, reverse - df.sort_columns('a', key=None, reverse=True) - assert_frame_equal(df, rc.DataFrame({'a': [4, 3, 2, 1], 'b': ['d', 'c', 'b', 'a']}, columns=['a', 'b'], - index=[11, 10, 9, 8])) + df.sort_columns("a", key=None, reverse=True) + assert_frame_equal( + df, rc.DataFrame({"a": [4, 3, 2, 1], "b": ["d", "c", "b", "a"]}, columns=["a", "b"], index=[11, 10, 9, 8]) + ) - # a key function that turns evens into a odds into a negative number + # a key function that turns evens into an odds into a negative number def even_to_neg(i): return i * -1 if i % 2 == 0 else i - df.sort_columns('a', key=even_to_neg) - assert_frame_equal(df, rc.DataFrame({'a': [4, 2, 1, 3], 'b': ['d', 'b', 'a', 'c']}, columns=['a', 'b'], - index=[11, 9, 8, 10])) + df.sort_columns("a", key=even_to_neg) + assert_frame_equal( + df, rc.DataFrame({"a": [4, 2, 1, 3], "b": ["d", "b", "a", "c"]}, columns=["a", "b"], index=[11, 9, 8, 10]) + ) # with key and reverse - df.sort_columns('a', key=even_to_neg, reverse=True) - assert_frame_equal(df, rc.DataFrame({'a': [3, 1, 2, 4], 'b': ['c', 'a', 'b', 'd']}, columns=['a', 'b'], - index=[10, 8, 9, 11])) + df.sort_columns("a", key=even_to_neg, reverse=True) + assert_frame_equal( + df, rc.DataFrame({"a": [3, 1, 2, 4], "b": ["c", "a", "b", "d"]}, columns=["a", "b"], index=[10, 8, 9, 11]) + ) diff --git a/tests/test_dataframe/test_utils.py b/tests/test_dataframe/test_utils.py index c134f45..2f363a8 100644 --- a/tests/test_dataframe/test_utils.py +++ b/tests/test_dataframe/test_utils.py @@ -9,22 +9,22 @@ def test_assert_frame_equal(): - df1 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[1, 2, 3]) + df1 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[1, 2, 3]) assert_frame_equal(df1, df1) - df2 = rc.DataFrame({'a': [1, 1, 1], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[1, 2, 3]) + df2 = rc.DataFrame({"a": [1, 1, 1], "b": [4, 5, 6]}, columns=["a", "b"], index=[1, 2, 3]) with pytest.raises(AssertionError): assert_frame_equal(df1, df2) - df2 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['b', 'a'], index=[1, 2, 3]) + df2 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["b", "a"], index=[1, 2, 3]) with pytest.raises(AssertionError): assert_frame_equal(df1, df2) - df2 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[11, 12, 13]) + df2 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[11, 12, 13]) with pytest.raises(AssertionError): assert_frame_equal(df1, df2) - df2 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[1, 2, 3], sort=True) + df2 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[1, 2, 3], sort=True) with pytest.raises(AssertionError): assert_frame_equal(df1, df2) @@ -36,16 +36,16 @@ def assert_approx_equal(left_data, right_data, precision=0.00001): for j in range(len(left_data[i])): assert abs(left_data[i][j] - right_data[i][j]) <= precision - df1 = rc.DataFrame({'a': [1.0, 3.0], 'b': [4.0, 6.0]}, columns=['a', 'b'], index=[1, 3]) - df2 = rc.DataFrame({'a': [1.0, 3.001], 'b': [4.0, 6.001]}, columns=['a', 'b'], index=[1, 3]) + df1 = rc.DataFrame({"a": [1.0, 3.0], "b": [4.0, 6.0]}, columns=["a", "b"], index=[1, 3]) + df2 = rc.DataFrame({"a": [1.0, 3.001], "b": [4.0, 6.001]}, columns=["a", "b"], index=[1, 3]) # confirm fails with standard compare with pytest.raises(AssertionError): assert_frame_equal(df1, df2) # passes with function and proper parameters - assert_frame_equal(df1, df2, assert_approx_equal, {'precision': 0.01}) + assert_frame_equal(df1, df2, assert_approx_equal, {"precision": 0.01}) # fails with function and precision parameter to low with pytest.raises(AssertionError): - assert_frame_equal(df1, df2, assert_approx_equal, {'precision': 0.00001}) + assert_frame_equal(df1, df2, assert_approx_equal, {"precision": 0.00001}) diff --git a/tests/test_dataframe/test_validate.py b/tests/test_dataframe/test_validate.py index 6828256..43f28a4 100644 --- a/tests/test_dataframe/test_validate.py +++ b/tests/test_dataframe/test_validate.py @@ -4,62 +4,62 @@ def test_validate_index(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) df.validate_integrity() # index not right length with pytest.raises(ValueError): - rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9, 11, 12]) + rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9, 11, 12]) - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) df._index = [1, 2, 3, 4] with pytest.raises(ValueError): df.validate_integrity() # duplicate index with pytest.raises(ValueError): - rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 10, 9]) + rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 10, 9]) - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) with pytest.raises(ValueError): df.index = [10, 10, 10] - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) df._index = [10, 10, 9] with pytest.raises(ValueError): df.validate_integrity() def test_validate_columns(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) df.validate_integrity() # columns not right length with pytest.raises(ValueError): - rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b', 'extra']) + rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b", "extra"]) - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b']) - df._columns = ['a', 'b', 'extra'] + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"]) + df._columns = ["a", "b", "extra"] with pytest.raises(ValueError): df.validate_integrity() # duplicate columns - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}) with pytest.raises(ValueError): - df.columns = ['dup', 'dup'] + df.columns = ["dup", "dup"] - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) - df._columns = ['dup', 'dup'] + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) + df._columns = ["dup", "dup"] with pytest.raises(ValueError): df.validate_integrity() def test_validate_data(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9]) df.validate_integrity() - df._data[1] = ['a', 'c'] - assert df.data == [[2, 1, 3], ['a', 'c']] + df._data[1] = ["a", "c"] + assert df.data == [[2, 1, 3], ["a", "c"]] with pytest.raises(ValueError): df.validate_integrity() diff --git a/tests/test_dropin/test_dataframe_blist.py b/tests/test_dropin/test_dataframe_blist.py index 4d832b7..a25df16 100644 --- a/tests/test_dropin/test_dataframe_blist.py +++ b/tests/test_dropin/test_dataframe_blist.py @@ -26,46 +26,54 @@ def check_blist(): check_blist() # add a new row and col - df.set_cell(1, 'a', 1) + df.set_cell(1, "a", 1) check_blist() # add a new row - df.set_cell(2, 'a', 2) + df.set_cell(2, "a", 2) check_blist() # add a new col - df.set_cell(1, 'b', 3) + df.set_cell(1, "b", 3) check_blist() # add a complete new row - df.set_row(3, {'a': 4, 'b': 5}) + df.set_row(3, {"a": 4, "b": 5}) check_blist() # add a complete new col - df.set_column([2, 3], 'c', [6, 7]) + df.set_column([2, 3], "c", [6, 7]) check_blist() def test_assert_frame_equal(): - df1 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[1, 2, 3]) - df2 = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[1, 2, 3], dropin=blist) + df1 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[1, 2, 3]) + df2 = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[1, 2, 3], dropin=blist) with pytest.raises(AssertionError): assert_frame_equal(df1, df2) def test_print(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [1.0, 2.55, 3.1], 'c': ['first', 'second', None]}, columns=['b', 'c', 'a'], - index=['row1', 'row2', 'row3'], dropin=blist) + df = rc.DataFrame( + {"a": [1, 2, 3], "b": [1.0, 2.55, 3.1], "c": ["first", "second", None]}, + columns=["b", "c", "a"], + index=["row1", "row2", "row3"], + dropin=blist, + ) # __repr__ produces a simple representation - expected = "object id: %s\ncolumns:\nblist(['b', 'c', 'a'])\ndata:\nblist([blist([1.0, 2.55, 3.1]), blist([" \ - "'first', 'second', None]), blist([1, 2, 3])])\nindex:\nblist(['row1', 'row2', 'row3'])\n" % id(df) + expected = ( + "object id: %s\ncolumns:\nblist(['b', 'c', 'a'])\ndata:\nblist([blist([1.0, 2.55, 3.1]), blist([" + "'first', 'second', None]), blist([1, 2, 3])])\nindex:\nblist(['row1', 'row2', 'row3'])\n" % id(df) + ) actual = df.__repr__() assert actual == expected # __string__ produces the standard table - expected = 'index b c a\n------- ---- ------ ---\nrow1 1 first 1\n' \ - 'row2 2.55 second 2\nrow3 3.1 3' + expected = ( + "index b c a\n------- ---- ------ ---\nrow1 1 first 1\n" + "row2 2.55 second 2\nrow3 3.1 3" + ) actual = df.__str__() assert actual == expected @@ -74,7 +82,7 @@ def test_print(): def test_json(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, sort=False, dropin=blist) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, sort=False, dropin=blist) string = df.to_json() actual = rc.DataFrame.from_json(string, blist) @@ -83,19 +91,23 @@ def test_json(): # fails with no dropin supplied with pytest.raises(AttributeError) as e: rc.DataFrame.from_json(string) - assert e == "AttributeError: the JSON has a dropin : : " \ - "but the dropin parameter was not supplied" + assert ( + e == "AttributeError: the JSON has a dropin : : " + "but the dropin parameter was not supplied" + ) # fails with the wrong dropin supplied with pytest.raises(AttributeError) as e: rc.DataFrame.from_json(string, list) - assert e == "AttributeError: the supplied dropin parameter: : does not match the value" \ - " in the JSON: " + assert ( + e == "AttributeError: the supplied dropin parameter: : does not match the value" + " in the JSON: " + ) def test_json_objects(): # test with a compound object returning a representation - df = rc.DataFrame({'a': [1, 2], 'b': [4, blist([5, 6])]}) # noqa + df = rc.DataFrame({"a": [1, 2], "b": [4, blist([5, 6])]}) # noqa string = df.to_json() actual = rc.DataFrame.from_json(string) @@ -104,50 +116,48 @@ def test_json_objects(): with pytest.raises(AssertionError): assert_frame_equal(df, actual) - assert actual[1, 'b'] != blist([5, 6]) # noqa - assert actual[1, 'b'] == 'blist([5, 6])' + assert actual[1, "b"] != blist([5, 6]) # noqa + assert actual[1, "b"] == "blist([5, 6])" def test_select_index(): # simple index, not sort, blist - df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=['a', 'b', 'c', 'd', 'e', 'f'], dropin=blist) + df = rc.DataFrame({"a": [1, 2, 3, 4, 5, 6]}, index=["a", "b", "c", "d", "e", "f"], dropin=blist) - actual = df.select_index('c', 'value') - assert actual == ['c'] + actual = df.select_index("c", "value") + assert actual == ["c"] - actual = df.select_index('d', 'boolean') + actual = df.select_index("d", "boolean") assert actual == [False, False, False, True, False, False] def test_columns_blist(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a'], - dropin=blist) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"], dropin=blist) names = actual.columns - assert names == ['b', 'a'] + assert names == ["b", "a"] assert isinstance(names, blist) # test that a copy is returned - names.append('bad') - assert actual.columns == ['b', 'a'] + names.append("bad") + assert actual.columns == ["b", "a"] - actual.columns = ['new1', 'new2'] - assert actual.columns == ['new1', 'new2'] + actual.columns = ["new1", "new2"] + assert actual.columns == ["new1", "new2"] assert isinstance(actual.columns, blist) with pytest.raises(ValueError): - actual.columns = ['list', 'too', 'long'] + actual.columns = ["list", "too", "long"] def test_index_blist(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a'], - dropin=blist) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"], dropin=blist) result = actual.index - assert result == ['a', 'b', 'c'] + assert result == ["a", "b", "c"] assert isinstance(result, blist) # test that a view is returned - result.append('bad') - assert actual.index == ['a', 'b', 'c', 'bad'] + result.append("bad") + assert actual.index == ["a", "b", "c", "bad"] actual.index = [9, 10, 11] assert actual.index == [9, 10, 11] @@ -159,16 +169,15 @@ def test_index_blist(): def test_data_blist(): - actual = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['b', 'a'], - dropin=blist) + actual = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"], columns=["b", "a"], dropin=blist) assert actual.data == [[4, 5, 6], [1, 2, 3]] assert all(isinstance(actual.data[x], blist) for x in range(len(actual.columns))) def test_default_empty_init(): - actual = rc.DataFrame(index=[1, 2, 3], columns=['a', 'b'], dropin=blist) + actual = rc.DataFrame(index=[1, 2, 3], columns=["a", "b"], dropin=blist) assert actual.data == [[None, None, None], [None, None, None]] - assert actual.columns == ['a', 'b'] + assert actual.columns == ["a", "b"] assert actual.index == [1, 2, 3] assert actual.sort is False assert isinstance(actual.index, blist) @@ -178,24 +187,27 @@ def test_default_empty_init(): def test_sort_index(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[10, 8, 9], sort=False, - dropin=blist) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, columns=["a", "b"], index=[10, 8, 9], sort=False, dropin=blist) df.sort_index() assert isinstance(df.index, blist) - assert_frame_equal(df, rc.DataFrame({'a': [2, 3, 1], 'b': [5, 6, 4]}, columns=['a', 'b'], index=[8, 9, 10], - sort=False, dropin=blist)) + assert_frame_equal( + df, + rc.DataFrame({"a": [2, 3, 1], "b": [5, 6, 4]}, columns=["a", "b"], index=[8, 9, 10], sort=False, dropin=blist), + ) def test_sort_column(): - df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9], dropin=blist) + df = rc.DataFrame({"a": [2, 1, 3], "b": ["a", "c", "b"]}, columns=["a", "b"], index=[10, 8, 9], dropin=blist) - df.sort_columns('a') + df.sort_columns("a") assert isinstance(df.index, blist) - assert_frame_equal(df, rc.DataFrame({'a': [1, 2, 3], 'b': ['c', 'a', 'b']}, columns=['a', 'b'], index=[8, 10, 9], - dropin=blist)) + assert_frame_equal( + df, rc.DataFrame({"a": [1, 2, 3], "b": ["c", "a", "b"]}, columns=["a", "b"], index=[8, 10, 9], dropin=blist) + ) - df.sort_columns('a', reverse=True) + df.sort_columns("a", reverse=True) assert isinstance(df.index, blist) - assert_frame_equal(df, rc.DataFrame({'a': [3, 2, 1], 'b': ['b', 'a', 'c']}, columns=['a', 'b'], index=[9, 10, 8], - dropin=blist)) + assert_frame_equal( + df, rc.DataFrame({"a": [3, 2, 1], "b": ["b", "a", "c"]}, columns=["a", "b"], index=[9, 10, 8], dropin=blist) + ) diff --git a/tests/test_dropin/test_series_blist.py b/tests/test_dropin/test_series_blist.py index 9b7c7e9..368444d 100644 --- a/tests/test_dropin/test_series_blist.py +++ b/tests/test_dropin/test_series_blist.py @@ -18,11 +18,11 @@ def test_assert_series_equal(): def test_default_empty_init(): - actual = rc.Series(index=[1, 2, 3], data_name='points', dropin=blist) + actual = rc.Series(index=[1, 2, 3], data_name="points", dropin=blist) assert actual.data == [None, None, None] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [1, 2, 3] - assert actual.index_name == 'index' + assert actual.index_name == "index" assert actual.sort is False assert isinstance(actual.index, blist) assert isinstance(actual.data, blist) @@ -58,14 +58,14 @@ def check_blist(): def test_index_blist(): - actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], dropin=blist) + actual = rc.Series([4, 5, 6], index=["a", "b", "c"], dropin=blist) result = actual.index - assert result == ['a', 'b', 'c'] + assert result == ["a", "b", "c"] assert isinstance(result, blist) # test that a view is returned - result.append('bad') - assert actual.index == ['a', 'b', 'c', 'bad'] + result.append("bad") + assert actual.index == ["a", "b", "c", "bad"] actual.index = [9, 10, 11] assert actual.index == [9, 10, 11] @@ -77,13 +77,13 @@ def test_index_blist(): def test_data_blist(): - actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], dropin=blist) + actual = rc.Series([4, 5, 6], index=["a", "b", "c"], dropin=blist) assert actual.data == [4, 5, 6] assert isinstance(actual.data, blist) def test_print(): - srs = rc.Series([1.0, 2.55, 3.1], data_name='boo', index=['row1', 'row2', 'row3'], dropin=blist) + srs = rc.Series([1.0, 2.55, 3.1], data_name="boo", index=["row1", "row2", "row3"], dropin=blist) # __repr__ produces a simple representation expected = "object id: %s\ndata:\nblist([1.0, 2.55, 3.1])\nindex:\nblist(['row1', 'row2', 'row3'])\n" % id(srs) @@ -91,7 +91,7 @@ def test_print(): assert actual == expected # __str__ produces the standard table - expected = 'index boo\n------- -----\nrow1 1\nrow2 2.55\nrow3 3.1' + expected = "index boo\n------- -----\nrow1 1\nrow2 2.55\nrow3 3.1" actual = srs.__str__() assert actual == expected @@ -108,20 +108,20 @@ def test_sort_index(): def test_select_index(): # simple index, not sort, blist - srs = rc.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'], dropin=blist) - actual = srs.select_index('c', 'value') - assert actual == ['c'] + srs = rc.Series([1, 2, 3, 4, 5, 6], index=["a", "b", "c", "d", "e", "f"], dropin=blist) + actual = srs.select_index("c", "value") + assert actual == ["c"] def test_from_dataframe(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 9], dropin=blist) - actual = rc.ViewSeries.from_dataframe(df, 'b') - expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", 9], dropin=blist) + actual = rc.ViewSeries.from_dataframe(df, "b") + expected = rc.ViewSeries([4, 5, 6], data_name="b", index=["a", "b", 9]) assert_series_equal(actual, expected) def test_from_series(): - srs = rc.Series(data=[4, 5, 6], data_name='b', index=['a', 'b', 9], dropin=blist) + srs = rc.Series(data=[4, 5, 6], data_name="b", index=["a", "b", 9], dropin=blist) actual = rc.ViewSeries.from_series(srs) - expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) + expected = rc.ViewSeries([4, 5, 6], data_name="b", index=["a", "b", 9]) assert_series_equal(actual, expected) diff --git a/tests/test_series/test_delete.py b/tests/test_series/test_delete.py index 3bb29c6..fb02352 100644 --- a/tests/test_series/test_delete.py +++ b/tests/test_series/test_delete.py @@ -5,12 +5,12 @@ def test_delete(): - srs = rc.Series([1, 2, 3], index=['a', 'b', 'c']) + srs = rc.Series([1, 2, 3], index=["a", "b", "c"]) - srs.delete(['a', 'c']) - assert_series_equal(srs, rc.Series([2], index=['b'])) + srs.delete(["a", "c"]) + assert_series_equal(srs, rc.Series([2], index=["b"])) - srs.delete('b') + srs.delete("b") assert_series_equal(srs, rc.Series(sort=False)) # insert back in data @@ -22,29 +22,29 @@ def test_delete(): assert srs.data == [9, 8] assert srs.index == [1, 2] - srs = rc.Series([4, 5, 6], index=['a', 'b', 'c']) + srs = rc.Series([4, 5, 6], index=["a", "b", "c"]) # cannot delete values not in index with pytest.raises(ValueError): - srs.delete(['bad']) + srs.delete(["bad"]) # length of boolean must be len of index with pytest.raises(ValueError): srs.delete([True, False]) srs.delete([True, False, True]) - assert_series_equal(srs, rc.Series([5], index=['b'])) + assert_series_equal(srs, rc.Series([5], index=["b"])) srs.delete([True]) assert_series_equal(srs, rc.Series(sort=False)) def test_delete_sort(): - srs = rc.Series([4, 5, 6], index=['a', 'b', 'c'], sort=True) + srs = rc.Series([4, 5, 6], index=["a", "b", "c"], sort=True) - srs.delete(['a', 'c']) - assert_series_equal(srs, rc.Series([5], index=['b'], sort=True)) + srs.delete(["a", "c"]) + assert_series_equal(srs, rc.Series([5], index=["b"], sort=True)) - srs.delete('b') + srs.delete("b") assert_series_equal(srs, rc.Series(sort=True)) # insert back in data @@ -56,17 +56,17 @@ def test_delete_sort(): assert srs.data == [8, 9] assert srs.index == [1, 2] - srs = rc.Series([4, 5, 6], index=['a', 'b', 'c']) + srs = rc.Series([4, 5, 6], index=["a", "b", "c"]) # cannot delete values not in index with pytest.raises(ValueError): - srs.delete(['bad']) + srs.delete(["bad"]) # length of boolean must be len of index with pytest.raises(ValueError): srs.delete([True, False]) srs.delete([True, False, True]) - assert_series_equal(srs, rc.Series([5], index=['b'])) + assert_series_equal(srs, rc.Series([5], index=["b"])) srs.delete([True]) assert_series_equal(srs, rc.Series(sort=False)) diff --git a/tests/test_series/test_get.py b/tests/test_series/test_get.py index b1ecfae..b2e58eb 100644 --- a/tests/test_series/test_get.py +++ b/tests/test_series/test_get.py @@ -41,9 +41,9 @@ def test_get_cell_sorted(): def test_get_rows(): - srs = rc.Series([1, 2, 3, 4], index=[10, 11, 12, 99], index_name='start_10', sort=False) + srs = rc.Series([1, 2, 3, 4], index=[10, 11, 12, 99], index_name="start_10", sort=False) - expected = rc.Series([2, 3], index=[11, 12], index_name='start_10', sort=False) + expected = rc.Series([2, 3], index=[11, 12], index_name="start_10", sort=False) actual = srs.get([11, 12]) assert_series_equal(actual, expected) @@ -52,7 +52,7 @@ def test_get_rows(): assert_series_equal(actual, expected) # index out of order - expected = rc.Series([4, 1], index=[99, 10], index_name='start_10', sort=False) + expected = rc.Series([4, 1], index=[99, 10], index_name="start_10", sort=False) actual = srs.get([99, 10]) assert_series_equal(actual, expected) @@ -72,9 +72,9 @@ def test_get_rows(): def test_get_rows_sorted(): - srs = rc.Series([1, 2, 3, 4], index=[10, 11, 12, 99], index_name='start_10', sort=True) + srs = rc.Series([1, 2, 3, 4], index=[10, 11, 12, 99], index_name="start_10", sort=True) - expected = rc.Series([2, 3], index=[11, 12], index_name='start_10', sort=True) + expected = rc.Series([2, 3], index=[11, 12], index_name="start_10", sort=True) actual = srs.get([11, 12]) assert_series_equal(actual, expected) @@ -83,7 +83,7 @@ def test_get_rows_sorted(): assert_series_equal(actual, expected) # index out of order - expected = rc.Series([4, 1], index=[99, 10], index_name='start_10', sort=True) + expected = rc.Series([4, 1], index=[99, 10], index_name="start_10", sort=True) actual = srs.get([99, 10]) assert_series_equal(actual, expected) @@ -105,8 +105,8 @@ def test_get_rows_sorted(): def test_get_location(): srs = rc.Series([5, 6, 7, 8], index=[2, 4, 6, 8]) - assert srs.get_location(2) == {'value': 7, 'index': 6} - assert srs.get_location(-1) == {'index': 8, 'value': 8} + assert srs.get_location(2) == {"value": 7, "index": 6} + assert srs.get_location(-1) == {"index": 8, "value": 8} def test_get_locations(): @@ -172,15 +172,15 @@ def test_get_slice_as_list(): def test_get_square_brackets(): - srs = rc.Series([10, 11, 12], index=['a', 'b', 'c'], sort=False) + srs = rc.Series([10, 11, 12], index=["a", "b", "c"], sort=False) - assert srs['b'] == 11 - assert_series_equal(srs[['a', 'c']], rc.Series([10, 12], ['a', 'c'])) + assert srs["b"] == 11 + assert_series_equal(srs[["a", "c"]], rc.Series([10, 12], ["a", "c"])) # get a series back - assert_series_equal(srs[['b']], rc.Series([11], ['b'], sort=False)) + assert_series_equal(srs[["b"]], rc.Series([11], ["b"], sort=False)) - assert_series_equal(srs[['c', 'a']], rc.Series([12, 10], ['c', 'a'], sort=False)) + assert_series_equal(srs[["c", "a"]], rc.Series([12, 10], ["c", "a"], sort=False)) # srs[[0, 2]] -- get indexes = [0, 2] srs = rc.Series([10, 11, 12], sort=False) @@ -190,15 +190,15 @@ def test_get_square_brackets(): def test_get_square_brackets_sorted(): - srs = rc.Series([10, 11, 12], index=['a', 'b', 'c'], sort=True) + srs = rc.Series([10, 11, 12], index=["a", "b", "c"], sort=True) - assert srs['b'] == 11 - assert_series_equal(srs[['a', 'c']], rc.Series([10, 12], ['a', 'c'], sort=True)) + assert srs["b"] == 11 + assert_series_equal(srs[["a", "c"]], rc.Series([10, 12], ["a", "c"], sort=True)) # get a series back - assert_series_equal(srs[['b']], rc.Series([11], ['b'], sort=True)) + assert_series_equal(srs[["b"]], rc.Series([11], ["b"], sort=True)) - assert_series_equal(srs[['c', 'a']], rc.Series([12, 10], ['c', 'a'], sort=True)) + assert_series_equal(srs[["c", "a"]], rc.Series([12, 10], ["c", "a"], sort=True)) # srs[[0, 2]] -- get indexes = [0, 2] srs = rc.Series([10, 11, 12], sort=True) diff --git a/tests/test_series/test_getters.py b/tests/test_series/test_getters.py index b7c2107..4e8234e 100644 --- a/tests/test_series/test_getters.py +++ b/tests/test_series/test_getters.py @@ -4,14 +4,14 @@ def test_index(): - actual = rc.Series([4, 5, 6], index=['a', 'b', 'c']) + actual = rc.Series([4, 5, 6], index=["a", "b", "c"]) result = actual.index - assert result == ['a', 'b', 'c'] + assert result == ["a", "b", "c"] assert isinstance(result, list) # test that a view is returned - result.append('bad') - assert actual.index == ['a', 'b', 'c', 'bad'] + result.append("bad") + assert actual.index == ["a", "b", "c", "bad"] actual.index = [9, 10, 11] assert actual.index == [9, 10, 11] @@ -21,21 +21,21 @@ def test_index(): with pytest.raises(ValueError): actual.index = [1, 3, 4, 5, 6] - assert actual.index_name == 'index' - actual.index_name = 'new name' - assert actual.index_name == 'new name' + assert actual.index_name == "index" + actual.index_name = "new name" + assert actual.index_name == "new name" - actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], index_name='letters') - assert actual.index_name == 'letters' + actual = rc.Series([4, 5, 6], index=["a", "b", "c"], index_name="letters") + assert actual.index_name == "letters" def test_index_view(): data = [4, 5, 6] - index = ['a', 'b', 'c'] + index = ["a", "b", "c"] actual = rc.ViewSeries(data, index) result = actual.index - assert result == ['a', 'b', 'c'] + assert result == ["a", "b", "c"] assert isinstance(result, list) # test that a view is returned @@ -43,25 +43,25 @@ def test_index_view(): assert result is actual.index # modify - result[1] = 'new' - assert actual.index == ['a', 'new', 'c'] - assert index == ['a', 'new', 'c'] + result[1] = "new" + assert actual.index == ["a", "new", "c"] + assert index == ["a", "new", "c"] # index too long with pytest.raises(ValueError): actual.index = [1, 3, 4, 5, 6] - assert actual.index_name == 'index' - actual.index_name = 'new name' - assert actual.index_name == 'new name' + assert actual.index_name == "index" + actual.index_name = "new name" + assert actual.index_name == "new name" - actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], index_name='letters') - assert actual.index_name == 'letters' + actual = rc.Series([4, 5, 6], index=["a", "b", "c"], index_name="letters") + assert actual.index_name == "letters" def test_data(): data = [4, 5, 6] - index = ['a', 'b', 'c'] + index = ["a", "b", "c"] actual = rc.Series(data, index) assert isinstance(actual.data, list) @@ -84,7 +84,7 @@ def test_data(): def test_data_view(): data = [4, 5, 6] - index = ['a', 'b', 'c'] + index = ["a", "b", "c"] actual = rc.ViewSeries(data, index) assert isinstance(actual.data, list) @@ -101,7 +101,7 @@ def test_data_view(): new.append(88) assert new == [99, 5, 6, 88] assert actual.data == [99, 5, 6, 88] - assert actual.index == ['a', 'b', 'c'] + assert actual.index == ["a", "b", "c"] with pytest.raises(AttributeError): # noinspection PyPropertyAccess diff --git a/tests/test_series/test_initialize.py b/tests/test_series/test_initialize.py index e7aca37..ff8b1d1 100644 --- a/tests/test_series/test_initialize.py +++ b/tests/test_series/test_initialize.py @@ -7,7 +7,7 @@ def test_default_empty_init(): actual = rc.Series() assert isinstance(actual, rc.Series) assert actual.data == [] - assert actual.data_name == 'value' + assert actual.data_name == "value" assert actual.index == [] assert actual.sort is True assert isinstance(actual.index, list) @@ -18,27 +18,27 @@ def test_default_empty_init(): assert isinstance(actual.index, list) assert isinstance(actual.data, list) - actual = rc.Series(data_name='points') + actual = rc.Series(data_name="points") assert actual.data == [] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [] assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.data, list) - actual = rc.Series(index=[1, 2, 3], data_name='points') + actual = rc.Series(index=[1, 2, 3], data_name="points") assert actual.data == [None, None, None] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [1, 2, 3] assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.data, list) - actual = rc.Series(index=[1, 2, 3], index_name='dates', data_name='points', sort=True) + actual = rc.Series(index=[1, 2, 3], index_name="dates", data_name="points", sort=True) assert actual.data == [None, None, None] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [1, 2, 3] - assert actual.index_name == 'dates' + assert actual.index_name == "dates" assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.data, list) @@ -48,7 +48,7 @@ def test_default_init(): # no index actual = rc.Series([4, 5, 6]) assert actual.data == [4, 5, 6] - assert actual.data_name == 'value' + assert actual.data_name == "value" assert actual.index == [0, 1, 2] assert actual.sort is True assert isinstance(actual.index, list) @@ -56,10 +56,10 @@ def test_default_init(): assert len(actual) == 3 # with index - actual = rc.Series(data=[4, 5, 6], index=['a', 'b', 'c'], index_name='letters') + actual = rc.Series(data=[4, 5, 6], index=["a", "b", "c"], index_name="letters") assert actual.data == [4, 5, 6] - assert actual.index == ['a', 'b', 'c'] - assert actual.index_name == 'letters' + assert actual.index == ["a", "b", "c"] + assert actual.index_name == "letters" assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.data, list) @@ -69,7 +69,7 @@ def test_default_init(): def test_views(): # assert that df.data is data and df.index are copies and do not alter input data data = [4, 5, 6] - index = ['a', 'b', 'c'] + index = ["a", "b", "c"] actual = rc.Series(data=data, index=index) assert actual.data is not data @@ -77,10 +77,10 @@ def test_views(): # change input data, no change to series data.append(7) - index.append('e') + index.append("e") assert actual.data == [4, 5, 6] - assert actual.index == ['a', 'b', 'c'] + assert actual.index == ["a", "b", "c"] def test_sorted_init(): @@ -117,7 +117,7 @@ def test_sorted_init(): # mixed type index will bork on sort=True with pytest.raises(TypeError): - rc.Series([5, 4, 6], index=[1, 'b', 3], sort=True) + rc.Series([5, 4, 6], index=[1, "b", 3], sort=True) def test_bad_initialization(): @@ -126,19 +126,19 @@ def test_bad_initialization(): rc.Series([1, 2, 3], index=[1]) with pytest.raises(ValueError): - rc.Series(data=[2], index=['b', 'c', 'a']) + rc.Series(data=[2], index=["b", "c", "a"]) # index is not a list with pytest.raises(TypeError): - rc.Series({'a': [1]}, index=1) + rc.Series({"a": [1]}, index=1) # bad data type with pytest.raises(TypeError): rc.Series(data=(1, 2, 3)) with pytest.raises(TypeError): - rc.Series(data={'data': [1, 2, 3]}) + rc.Series(data={"data": [1, 2, 3]}) # index not a list with pytest.raises(TypeError): - rc.Series(data=[2], index='b') + rc.Series(data=[2], index="b") diff --git a/tests/test_series/test_series.py b/tests/test_series/test_series.py index bcfdf50..cfc6a37 100644 --- a/tests/test_series/test_series.py +++ b/tests/test_series/test_series.py @@ -9,13 +9,13 @@ def test_names(): srs = rc.Series([1, 2]) - assert srs.index_name == 'index' - assert srs.data_name == 'value' + assert srs.index_name == "index" + assert srs.data_name == "value" - srs.index_name = 'new_index' - srs.data_name = 'data' - assert srs.index_name == 'new_index' - assert srs.data_name == 'data' + srs.index_name = "new_index" + srs.data_name = "data" + assert srs.index_name == "new_index" + assert srs.data_name == "data" def test_default_list(): @@ -44,24 +44,24 @@ def check_list(): def test_to_dict(): - srs = rc.Series([1, 2, 3], index=['a', 'b', 'c'], data_name='a') + srs = rc.Series([1, 2, 3], index=["a", "b", "c"], data_name="a") # with index actual = srs.to_dict(index=True) - assert actual == {'index': ['a', 'b', 'c'], 'a': [1, 2, 3]} + assert actual == {"index": ["a", "b", "c"], "a": [1, 2, 3]} # without index actual = srs.to_dict(index=False) - assert actual == {'a': [1, 2, 3]} + assert actual == {"a": [1, 2, 3]} # ordered act_order = srs.to_dict(ordered=True) - expected = OrderedDict([('index', ['a', 'b', 'c']), ('a', [1, 2, 3])]) + expected = OrderedDict([("index", ["a", "b", "c"]), ("a", [1, 2, 3])]) assert act_order == expected def test_print(): - srs = rc.Series([1.0, 2.55, 3.1], data_name='boo', index=['row1', 'row2', 'row3']) + srs = rc.Series([1.0, 2.55, 3.1], data_name="boo", index=["row1", "row2", "row3"]) # __repr__ produces a simple representation expected = "object id: %s\ndata:\n[1.0, 2.55, 3.1]\nindex:\n['row1', 'row2', 'row3']\n" % id(srs) @@ -69,7 +69,7 @@ def test_print(): assert actual == expected # __str__ produces the standard table - expected = 'index boo\n------- -----\nrow1 1\nrow2 2.55\nrow3 3.1' + expected = "index boo\n------- -----\nrow1 1\nrow2 2.55\nrow3 3.1" actual = srs.__str__() assert actual == expected @@ -164,66 +164,66 @@ def test_equality(): def test_select_index(): # simple index, not sort - srs = rc.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f']) + srs = rc.Series([1, 2, 3, 4, 5, 6], index=["a", "b", "c", "d", "e", "f"]) - actual = srs.select_index('c', 'value') - assert actual == ['c'] + actual = srs.select_index("c", "value") + assert actual == ["c"] - actual = srs.select_index('d', 'boolean') + actual = srs.select_index("d", "boolean") assert actual == [False, False, False, True, False, False] # simple index, sort - srs = rc.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'], sort=True) + srs = rc.Series([1, 2, 3, 4, 5, 6], index=["a", "b", "c", "d", "e", "f"], sort=True) - actual = srs.select_index('c', 'value') - assert actual == ['c'] + actual = srs.select_index("c", "value") + assert actual == ["c"] - actual = srs.select_index('d', 'boolean') + actual = srs.select_index("d", "boolean") assert actual == [False, False, False, True, False, False] with pytest.raises(ValueError): - srs.select_index('a', 'BAD') + srs.select_index("a", "BAD") # simple index, not sort - srs = rc.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f']) + srs = rc.Series([1, 2, 3, 4, 5, 6], index=["a", "b", "c", "d", "e", "f"]) - actual = srs.select_index('c', 'value') - assert actual == ['c'] + actual = srs.select_index("c", "value") + assert actual == ["c"] - actual = srs.select_index('d', 'boolean') + actual = srs.select_index("d", "boolean") assert actual == [False, False, False, True, False, False] # tuple index - tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)] + tuples = [("a", 1, 3), ("a", 1, 4), ("a", 2, 3), ("b", 1, 4), ("b", 2, 1), ("b", 3, 3)] srs = rc.Series([1, 2, 3, 4, 5, 6], index=tuples) - compare = ('a', None, None) + compare = ("a", None, None) assert srs.select_index(compare) == [True, True, True, False, False, False] - compare = ('a', None, 3) - assert srs.select_index(compare, 'boolean') == [True, False, True, False, False, False] + compare = ("a", None, 3) + assert srs.select_index(compare, "boolean") == [True, False, True, False, False, False] compare = (None, 2, None) - assert srs.select_index(compare, 'value') == [('a', 2, 3), ('b', 2, 1)] + assert srs.select_index(compare, "value") == [("a", 2, 3), ("b", 2, 1)] compare = (None, 3, 3) assert srs.select_index(compare) == [False, False, False, False, False, True] compare = (None, None, 3) - assert srs.select_index(compare, 'value') == [('a', 1, 3), ('a', 2, 3), ('b', 3, 3)] + assert srs.select_index(compare, "value") == [("a", 1, 3), ("a", 2, 3), ("b", 3, 3)] - compare = ('a', 1, 4) - assert srs.select_index(compare, 'value') == [('a', 1, 4)] + compare = ("a", 1, 4) + assert srs.select_index(compare, "value") == [("a", 1, 4)] - compare = ('a', 100, 99) - assert srs.select_index(compare, 'value') == [] + compare = ("a", 100, 99) + assert srs.select_index(compare, "value") == [] compare = (None, None, None) assert srs.select_index(compare) == [True] * 6 srs = rc.Series([1, 2, 3, 4, 5, 6]) assert srs.select_index(3) == [False, False, False, True, False, False] - assert srs.select_index(3, 'value') == [3] + assert srs.select_index(3, "value") == [3] def test_isin(): @@ -242,7 +242,7 @@ def test_reset_index(): assert_series_equal(srs, expected) # with index and index name defined - srs = rc.Series([1, 2, 3], index=['x', 'y', 'z'], index_name='jelo') + srs = rc.Series([1, 2, 3], index=["x", "y", "z"], index_name="jelo") srs.reset_index() expected = rc.Series([1, 2, 3], [0, 1, 2], sort=False) assert_series_equal(srs, expected) diff --git a/tests/test_series/test_set.py b/tests/test_series/test_set.py index 89723c5..5b88365 100644 --- a/tests/test_series/test_set.py +++ b/tests/test_series/test_set.py @@ -55,7 +55,7 @@ def test_set_cell_sort(): # fails for mixed index type with pytest.raises(TypeError): - actual.set('Z', 60) + actual.set("Z", 60) def test_set_rows(): @@ -146,7 +146,7 @@ def test_set_index_subset(): actual.set(indexes=[10, 11], values=[1]) # by boolean list - actual = rc.Series([7, 8], index=['first', 'second'], sort=False) + actual = rc.Series([7, 8], index=["first", "second"], sort=False) actual.set(indexes=[False, True], values=[99]) assert actual.data == [7, 99] @@ -188,16 +188,16 @@ def test_set_index_subset_sort(): assert actual.index == [4, 5, 6, 10, 11, 12, 13, 14, 15] # new row new columns - actual.set(indexes=[14, 15, 16], values=['zoo', 'boo', 'hoo']) + actual.set(indexes=[14, 15, 16], values=["zoo", "boo", "hoo"]) assert actual.index == [4, 5, 6, 10, 11, 12, 13, 14, 15, 16] - assert actual.data == [-120, -130, 6060, -140, 55, 120, 3131, 'zoo', 'boo', 'hoo'] + assert actual.data == [-120, -130, 6060, -140, 55, 120, 3131, "zoo", "boo", "hoo"] # values list shorter than indexes, raise error with pytest.raises(ValueError): actual.set(indexes=[10, 11], values=[1]) # by boolean list - actual = rc.Series([1, 2], index=['first', 'second'], sort=True) + actual = rc.Series([1, 2], index=["first", "second"], sort=True) actual.set(indexes=[False, True], values=[99]) assert actual.data == [1, 99] diff --git a/tests/test_series/test_sort.py b/tests/test_series/test_sort.py index e4b2e09..59dc848 100644 --- a/tests/test_series/test_sort.py +++ b/tests/test_series/test_sort.py @@ -13,19 +13,19 @@ def test_sort_index(): assert_series_equal(srs, rc.Series([5, 6, 4], index=[8, 9, 10], sort=False)) # fails on mixed type columns - srs = rc.Series([4, 5, 6], index=[10, 'a', 9]) + srs = rc.Series([4, 5, 6], index=[10, "a", 9]) with pytest.raises(TypeError): srs.sort_index() def test_sort_multi_index(): - srs = rc.Series([4, 5, 6], index=[(10, 'c'), (10, 'a'), (10, 'b')], sort=False) + srs = rc.Series([4, 5, 6], index=[(10, "c"), (10, "a"), (10, "b")], sort=False) srs.sort_index() assert isinstance(srs.index, list) - assert_series_equal(srs, rc.Series([5, 6, 4], index=[(10, 'a'), (10, 'b'), (10, 'c')], sort=False)) + assert_series_equal(srs, rc.Series([5, 6, 4], index=[(10, "a"), (10, "b"), (10, "c")], sort=False)) # fails on mixed type columns - srs = rc.Series([4, 5, 6], index=[(10, 'c'), 'a', (10, 'b')]) + srs = rc.Series([4, 5, 6], index=[(10, "c"), "a", (10, "b")]) with pytest.raises(TypeError): srs.sort_index() diff --git a/tests/test_series/test_utils.py b/tests/test_series/test_utils.py index 08bd553..eb89be9 100644 --- a/tests/test_series/test_utils.py +++ b/tests/test_series/test_utils.py @@ -59,8 +59,8 @@ def assert_approx_equal(left_data, right_data, precision=0.00001): assert_series_equal(srs1, srs2) # passes with function and proper parameters - assert_series_equal(srs1, srs2, assert_approx_equal, {'precision': 0.01}) + assert_series_equal(srs1, srs2, assert_approx_equal, {"precision": 0.01}) # fails with function and precision parameter to low with pytest.raises(AssertionError): - assert_series_equal(srs1, srs2, assert_approx_equal, {'precision': 0.00001}) + assert_series_equal(srs1, srs2, assert_approx_equal, {"precision": 0.00001}) diff --git a/tests/test_series/test_view_series.py b/tests/test_series/test_view_series.py index 2164880..53e41f0 100644 --- a/tests/test_series/test_view_series.py +++ b/tests/test_series/test_view_series.py @@ -7,32 +7,32 @@ def test_default_empty_init(): actual = rc.ViewSeries(data=[4, 5, 6], index=[1, 2, 3]) assert actual.data == [4, 5, 6] - assert actual.data_name == 'value' + assert actual.data_name == "value" assert actual.index == [1, 2, 3] - assert actual.index_name == 'index' + assert actual.index_name == "index" assert actual.sort is False assert actual.offset == 0 - actual = rc.ViewSeries(data=[4, 5, 6], index=[1, 2, 3], data_name='points', offset=1) + actual = rc.ViewSeries(data=[4, 5, 6], index=[1, 2, 3], data_name="points", offset=1) assert actual.data == [4, 5, 6] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [1, 2, 3] - assert actual.index_name == 'index' + assert actual.index_name == "index" assert actual.sort is False assert actual.offset == 1 - actual = rc.ViewSeries(data=[4, 5, 6], index=[1, 2, 3], index_name='dates', data_name='points', sort=True) + actual = rc.ViewSeries(data=[4, 5, 6], index=[1, 2, 3], index_name="dates", data_name="points", sort=True) assert actual.data == [4, 5, 6] - assert actual.data_name == 'points' + assert actual.data_name == "points" assert actual.index == [1, 2, 3] - assert actual.index_name == 'dates' + assert actual.index_name == "dates" assert actual.sort is True def test_views(): # assert that df.data is data and df.index are copies and do not alter input data data = [4, 5, 6] - index = ['a', 'b', 'c'] + index = ["a", "b", "c"] actual = rc.ViewSeries(data=data, index=index) assert actual.data is data @@ -40,10 +40,10 @@ def test_views(): # change input data, no change to ViewSeries data.append(7) - index.append('e') + index.append("e") assert actual.data == [4, 5, 6, 7] - assert actual.index == ['a', 'b', 'c', 'e'] + assert actual.index == ["a", "b", "c", "e"] assert actual.data is data assert actual.index is index @@ -84,18 +84,18 @@ def test_bad_initialization(): rc.ViewSeries([1, 2, 3], index=[1]) with pytest.raises(ValueError): - rc.ViewSeries(data=[2], index=['b', 'c', 'a']) + rc.ViewSeries(data=[2], index=["b", "c", "a"]) # index is not a list with pytest.raises(TypeError): - rc.ViewSeries({'a': [1]}, index=1) + rc.ViewSeries({"a": [1]}, index=1) # index not a list with pytest.raises(TypeError): - rc.ViewSeries(data=[2], index='b') + rc.ViewSeries(data=[2], index="b") with pytest.raises(ValueError): - rc.ViewSeries(data={'data': [1, 2, 3]}, index=[4, 5, 6]) + rc.ViewSeries(data={"data": [1, 2, 3]}, index=[4, 5, 6]) def test_mixed_type_init(): @@ -127,76 +127,76 @@ def test_not_implemented(): def test_from_dataframe(): - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 9]) - actual = rc.ViewSeries.from_dataframe(df, 'b') - expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", 9]) + actual = rc.ViewSeries.from_dataframe(df, "b") + expected = rc.ViewSeries([4, 5, 6], data_name="b", index=["a", "b", 9]) assert_series_equal(actual, expected) - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 'e'], sort=True, index_name='date') - actual = rc.ViewSeries.from_dataframe(df, 'a', -1) - expected = rc.ViewSeries([1, 2, 3], data_name='a', index=['a', 'b', 'e'], sort=True, offset=-1, index_name='date') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "e"], sort=True, index_name="date") + actual = rc.ViewSeries.from_dataframe(df, "a", -1) + expected = rc.ViewSeries([1, 2, 3], data_name="a", index=["a", "b", "e"], sort=True, offset=-1, index_name="date") assert_series_equal(actual, expected) def test_from_series(): - srs = rc.Series(data=[4, 5, 6], index=['a', 'b', 9], data_name='b') + srs = rc.Series(data=[4, 5, 6], index=["a", "b", 9], data_name="b") actual = rc.ViewSeries.from_series(srs) - expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) + expected = rc.ViewSeries([4, 5, 6], data_name="b", index=["a", "b", 9]) assert_series_equal(actual, expected) - srs = rc.Series(data=[1, 2, 3], data_name='a', index=['a', 'b', 'e'], sort=True, index_name='date') + srs = rc.Series(data=[1, 2, 3], data_name="a", index=["a", "b", "e"], sort=True, index_name="date") actual = rc.ViewSeries.from_series(srs, -1) - expected = rc.ViewSeries([1, 2, 3], data_name='a', index=['a', 'b', 'e'], sort=True, offset=-1, index_name='date') + expected = rc.ViewSeries([1, 2, 3], data_name="a", index=["a", "b", "e"], sort=True, offset=-1, index_name="date") assert_series_equal(actual, expected) def test_from_df_view(): # sort = False - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 9], sort=False) - srs = rc.ViewSeries.from_dataframe(df, 'b') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", 9], sort=False) + srs = rc.ViewSeries.from_dataframe(df, "b") assert srs.sort is False assert srs.index is df.index - assert srs.data is df.get_entire_column('b', True) + assert srs.data is df.get_entire_column("b", True) # change cell - df['a', 'b'] = 22 + df["a", "b"] = 22 assert srs.data == [22, 5, 6] - assert srs.index == ['a', 'b', 9] + assert srs.index == ["a", "b", 9] # add a row - df[11, 'b'] = -88 + df[11, "b"] = -88 assert srs.data == [22, 5, 6, -88] - assert srs.index == ['a', 'b', 9, 11] + assert srs.index == ["a", "b", 9, 11] # append row - df.append_row(12, {'a': 55, 'b': 77}) + df.append_row(12, {"a": 55, "b": 77}) assert srs.data == [22, 5, 6, -88, 77] - assert srs.index == ['a', 'b', 9, 11, 12] + assert srs.index == ["a", "b", 9, 11, 12] # sort = True - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 5], sort=True) - srs = rc.ViewSeries.from_dataframe(df, 'a') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 5], sort=True) + srs = rc.ViewSeries.from_dataframe(df, "a") assert srs.sort is True assert srs.index is df.index - assert srs.data is df.get_entire_column('a', True) + assert srs.data is df.get_entire_column("a", True) # change cell - df[1, 'a'] = 22 + df[1, "a"] = 22 assert srs.data == [1, 22, 3] assert srs.index == [0, 1, 5] # add a row end - df[6, 'a'] = 4 + df[6, "a"] = 4 assert srs.data == [1, 22, 3, 4] assert srs.index == [0, 1, 5, 6] # add value in middle - df[2, 'a'] = 12 + df[2, "a"] = 12 assert srs.data == [1, 22, 12, 3, 4] assert srs.index == [0, 1, 2, 5, 6] # append row - df.append_row(7, {'a': 55, 'b': 77}) + df.append_row(7, {"a": 55, "b": 77}) assert srs.data == [1, 22, 12, 3, 4, 55] assert srs.index == [0, 1, 2, 5, 6, 7] @@ -205,61 +205,61 @@ def test_from_df_view_breaks(): # These actions will break the view link between the DataFrame and the ViewSeries # changing index - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 5], sort=True) - srs = rc.ViewSeries.from_dataframe(df, 'a') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 5], sort=True) + srs = rc.ViewSeries.from_dataframe(df, "a") assert srs.index is df.index - assert srs.data is df.get_entire_column('a', True) + assert srs.data is df.get_entire_column("a", True) df.index = [1, 2, 3] assert srs.index is not df.index - assert srs.data is df.get_entire_column('a', True) + assert srs.data is df.get_entire_column("a", True) # sorting index - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 5], sort=True) - srs = rc.ViewSeries.from_dataframe(df, 'a') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 5], sort=True) + srs = rc.ViewSeries.from_dataframe(df, "a") assert srs.index is df.index - assert srs.data is df.get_entire_column('a', True) + assert srs.data is df.get_entire_column("a", True) df.sort_index() assert srs.index is not df.index - assert srs.data is not df.get_entire_column('a', True) + assert srs.data is not df.get_entire_column("a", True) # sorting column - df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 5], sort=True) - srs = rc.ViewSeries.from_dataframe(df, 'a') + df = rc.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 5], sort=True) + srs = rc.ViewSeries.from_dataframe(df, "a") assert srs.index is df.index - assert srs.data is df.get_entire_column('a', True) + assert srs.data is df.get_entire_column("a", True) - df.sort_columns('b') + df.sort_columns("b") assert srs.index is not df.index - assert srs.data is not df.get_entire_column('a', True) + assert srs.data is not df.get_entire_column("a", True) def test_from_series_view(): # sort = False - ins = rc.Series(data=[4, 5, 6], data_name='b', index=['a', 'b', 9], sort=False) + ins = rc.Series(data=[4, 5, 6], data_name="b", index=["a", "b", 9], sort=False) srs = rc.ViewSeries.from_series(ins) assert srs.sort is False assert srs.index is srs.index assert srs.data is ins.data # change cell - ins['a'] = 22 + ins["a"] = 22 assert srs.data == [22, 5, 6] - assert srs.index == ['a', 'b', 9] + assert srs.index == ["a", "b", 9] # add a row ins[11] = -88 assert srs.data == [22, 5, 6, -88] - assert srs.index == ['a', 'b', 9, 11] + assert srs.index == ["a", "b", 9, 11] # append row ins.append_row(12, 77) assert srs.data == [22, 5, 6, -88, 77] - assert srs.index == ['a', 'b', 9, 11, 12] + assert srs.index == ["a", "b", 9, 11, 12] # sort = True - ins = rc.Series(data=[1, 2, 3], data_name='a', index=[0, 1, 5], sort=True) + ins = rc.Series(data=[1, 2, 3], data_name="a", index=[0, 1, 5], sort=True) srs = rc.ViewSeries.from_series(ins) assert srs.sort is True assert srs.index is srs.index @@ -290,7 +290,7 @@ def test_from_series_view_breaks(): # These actions will break the view link between the Series and the ViewSeries # changing index - ins = rc.Series(data=[1, 2, 3], data_name='a', index=[0, 1, 5], sort=True) + ins = rc.Series(data=[1, 2, 3], data_name="a", index=[0, 1, 5], sort=True) srs = rc.ViewSeries.from_series(ins) assert srs.index is ins.index assert srs.data is ins.data @@ -300,7 +300,7 @@ def test_from_series_view_breaks(): assert srs.data is ins.data # sorting index - ins = rc.Series(data=[1, 2, 3], data_name='a', index=[0, 1, 5], sort=True) + ins = rc.Series(data=[1, 2, 3], data_name="a", index=[0, 1, 5], sort=True) srs = rc.ViewSeries.from_series(ins) assert srs.index is ins.index assert srs.data is ins.data @@ -375,13 +375,13 @@ def test_value(): def test_get_square_brackets(): - srs = rc.ViewSeries([10, 11, 12], index=['a', 'b', 'c'], sort=False) + srs = rc.ViewSeries([10, 11, 12], index=["a", "b", "c"], sort=False) # by index - assert srs['b'] == 11 - assert srs[['a', 'c']] == [10, 12] - assert srs['b':'c'] == [11, 12] - assert srs['a':'b'] == [10, 11] + assert srs["b"] == 11 + assert srs[["a", "c"]] == [10, 12] + assert srs["b":"c"] == [11, 12] + assert srs["a":"b"] == [10, 11] # by location assert srs[1] == 11 @@ -391,13 +391,13 @@ def test_get_square_brackets(): def test_get_square_brackets_offset(): - srs = rc.ViewSeries([10, 11, 12], index=['a', 'b', 'c'], sort=False, offset=1) + srs = rc.ViewSeries([10, 11, 12], index=["a", "b", "c"], sort=False, offset=1) # by index - assert srs['b'] == 11 - assert srs[['a', 'c']] == [10, 12] - assert srs['b':'c'] == [11, 12] - assert srs['a':'b'] == [10, 11] + assert srs["b"] == 11 + assert srs[["a", "c"]] == [10, 12] + assert srs["b":"c"] == [11, 12] + assert srs["a":"b"] == [10, 11] # by location assert srs[1] == 10 @@ -414,13 +414,13 @@ def test_get_square_brackets_offset(): assert srs[-2:0] == [10, 11, 12] # sort = True - srs = rc.ViewSeries([10, 11, 12], index=['a', 'b', 'c'], sort=True, offset=1) + srs = rc.ViewSeries([10, 11, 12], index=["a", "b", "c"], sort=True, offset=1) # by index - assert srs['b'] == 11 - assert srs[['a', 'c']] == [10, 12] - assert srs['b':'c'] == [11, 12] - assert srs['a':'b'] == [10, 11] + assert srs["b"] == 11 + assert srs[["a", "c"]] == [10, 12] + assert srs["b":"c"] == [11, 12] + assert srs["a":"b"] == [10, 11] # by location assert srs[1] == 10 From 1985dee4db3ba6fa552124b07800beef4c6fba62 Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 14:28:51 -0400 Subject: [PATCH 2/6] add as_namedtuple option to the dataframe.py get_columns() and get_location() --- raccoon/dataframe.py | 40 ++++++++++++++++++++++++----- tests/test_dataframe/test_get.py | 43 ++++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/raccoon/dataframe.py b/raccoon/dataframe.py index d0b5d37..0e6521b 100644 --- a/raccoon/dataframe.py +++ b/raccoon/dataframe.py @@ -337,18 +337,33 @@ def get_rows(self, indexes: list[bool | Any], column: Any, as_list: bool = False else DataFrame(data={column: data}, index=index, index_name=self._index_name, sort=self._sort) ) - def get_columns(self, index: Any, columns: list[Any] = None, as_dict: bool = False) -> Self | dict: + def get_columns( + self, + index: Any, + columns: list[Any] = None, + as_dict: bool = False, + as_namedtuple: bool = False, + name: str = "raccoon", + include_index: bool = True, + ) -> Self | dict | namedtuple: """ For a single index and list of column names return a DataFrame of the values in that index as either a dict - or a DataFrame + namedtuple or a DataFrame. :param index: single index value :param columns: list of column names :param as_dict: if True then return the result as a dictionary + :param as_namedtuple: if True then return the result as a named tuple + :param name: if as_namedtuple is True, this will be the name of the tuple + :param include_index: if True then include the index value in the result :return: DataFrame or dictionary """ + assert not (as_dict and as_namedtuple), "can only provide as_dict or as_namedtuple as True, not both" i = sorted_index(self._index, index) if self._sort else self._index.index(index) - return self.get_location(i, columns, as_dict) + if as_namedtuple: + dict_row = self.get_location(location=i, columns=columns, as_dict=True, index=include_index) + return namedtuple(name, dict_row.keys())(**dict_row) + return self.get_location(location=i, columns=columns, as_dict=as_dict, index=include_index) def get_entire_column(self, column: Any, as_list: bool = False) -> Self | list: """ @@ -409,8 +424,14 @@ def get_matrix(self, indexes: list[Any | bool], columns: list[Any]) -> Self: return DataFrame(data=data_dict, index=indexes, columns=columns, index_name=self._index_name, sort=self._sort) def get_location( - self, location: int, columns: Any | list | None = None, as_dict: bool = False, index: bool = True - ) -> Self | dict | Any: + self, + location: int, + columns: Any | list | None = None, + as_dict: bool = False, + as_namedtuple: bool = False, + name: str = "raccoon", + index: bool = True, + ) -> Self | dict | namedtuple | Any: """ For an index location and either (1) list of columns return a DataFrame or dictionary of the values or (2) single column name and return the value of that cell. This is optimized for speed because it does not need @@ -420,9 +441,12 @@ def get_location( :param location: index location in standard python form of positive or negative number :param columns: list of columns, single column name, or None to include all columns :param as_dict: if True then return a dictionary + :param as_namedtuple: if True then return the result as a named tuple + :param name: if as_namedtuple is True, this will be the name of the tuple :param index: if True then include the index in the dictionary if as_dict=True - :return: DataFrame or dictionary if columns is a list or value if columns is a single column name + :return: DataFrame, dictionary or namedtuple if columns is a list or value if columns is a single column name """ + assert not (as_dict and as_namedtuple), "can only provide as_dict or as_namedtuple as True, not both" if columns is None: columns = self._columns elif not isinstance(columns, list): # single value for columns @@ -441,6 +465,10 @@ def get_location( if index: data[self._index_name] = index_value return data + elif as_namedtuple: + if index: + data[self._index_name] = index_value + return namedtuple(name, data.keys())(**data) else: data = {k: [data[k]] for k in data} # this makes the dict items lists return DataFrame( diff --git a/tests/test_dataframe/test_get.py b/tests/test_dataframe/test_get.py index 6b05cb8..e5f1439 100644 --- a/tests/test_dataframe/test_get.py +++ b/tests/test_dataframe/test_get.py @@ -1,3 +1,5 @@ +from collections import namedtuple + import pytest import raccoon as rc @@ -154,10 +156,6 @@ def test_get_columns(): actual = df.get(11, ["c", "b"]) assert_frame_equal(actual, expected) - # as_dict - assert df.get(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} - assert df.get_columns(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} - # test boolean list not same length as columns with pytest.raises(ValueError): df.get(99, [True, False]) @@ -167,6 +165,37 @@ def test_get_columns(): df.get(88, ["a", "c"]) +def test_get_columns_as_dict_or_tuple(): + df = rc.DataFrame( + {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, + index=[10, 11, 12, 99], + columns=["a", "b", "c"], + index_name="start_10", + sort=False, + ) + + # as_dict + assert df.get(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} + assert df.get_columns(11, ["b", "c"], as_dict=True) == {"start_10": 11, "b": 5, "c": 8} + + # as dict no index + assert df.get_columns(11, ["b", "c"], as_dict=True, include_index=False) == {"b": 5, "c": 8} + + # as_namedtuple + expected = namedtuple("raccoon", ["b", "c", "start_10"])(**{"b": 5, "c": 8, "start_10": 11}) + assert df.get_columns(11, ["b", "c"], as_namedtuple=True) == expected + + expected = namedtuple("newname", ["b", "c"])(**{"b": 5, "c": 8}) + assert df.get_columns(11, ["b", "c"], as_namedtuple=True, name="newname", include_index=False) == expected + + expected = namedtuple("newname", ["a", "b", "c"])(**{"a": 4, "b": 7, "c": None}) + assert df.get_columns(99, None, as_namedtuple=True, name="newname", include_index=False) == expected + + # cannot be both as_dict and as_namedtuple + with pytest.raises(AssertionError): + df.get_columns(11, ["b", "c"], as_dict=True, as_namedtuple=True) + + def test_get_columns_sorted(): df = rc.DataFrame( {"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [7, 8, 9, None]}, @@ -292,13 +321,17 @@ def test_get_matrix_sorted(): df.get_matrix(["x", "y"], ["a", "b", "BAD"]) -def test_get_location(): +def test_get_location_as_dict_namedtuple(): df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) # forward indexing, all columns assert_frame_equal(df.get_location(2), rc.DataFrame({"a": [3], "b": [7]}, index=[6])) assert df.get_location(2, as_dict=True) == {"index": 6, "a": 3, "b": 7} assert df.get_location(2, as_dict=True, index=False) == {"a": 3, "b": 7} + expected = namedtuple("newname", ["a", "b", "index"])(**{"a": 3, "b": 7, "index": 6}) + assert df.get_location(2, as_namedtuple=True, name="newname") == expected + expected = namedtuple("raccoon", ["a", "b"])(**{"a": 3, "b": 7}) + assert df.get_location(2, as_namedtuple=True, index=False) == expected # reverse indexing, all columns assert_frame_equal(df.get_location(-1), rc.DataFrame({"a": [4], "b": [8]}, index=[8])) From dc179357a65fa9e3ad145a2f9c776cfb9c750f67 Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 19:00:14 -0400 Subject: [PATCH 3/6] update examples, and add github actions for pytest --- .github/workflows/python-test.yml | 34 + docs/change_log.rst | 1 + examples/usage_dataframe.ipynb | 1584 +++++++++++++++++++---------- pyproject.toml | 32 +- tests/test_dataframe/test_get.py | 4 + 5 files changed, 1083 insertions(+), 572 deletions(-) create mode 100644 .github/workflows/python-test.yml diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 0000000..b9278e5 --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,34 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "dev", "master" ] + pull_request: + branches: [ "dev", "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13" ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pytest tabulate + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with pytest + run: | + pytest diff --git a/docs/change_log.rst b/docs/change_log.rst index 0eb6265..a76b3d2 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -187,3 +187,4 @@ an installation requirement. 3.2.0 (04/14/25) ~~~~~~~~~~~~~~~~ - Add type hints +- Add as_namedtuple option to DataFrame get_columns() and get_location() diff --git a/examples/usage_dataframe.ipynb b/examples/usage_dataframe.ipynb index 06984ba..8584166 100644 --- a/examples/usage_dataframe.ipynb +++ b/examples/usage_dataframe.ipynb @@ -10,31 +10,41 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.746098Z", + "start_time": "2025-04-15T22:47:08.741814Z" } }, - "outputs": [], "source": [ "# remove comment to use latest development version\n", - "import sys; sys.path.insert(0, '../')" - ] + "import sys;\n", + "\n", + "sys.path.insert(0, '../')" + ], + "outputs": [], + "execution_count": 132 }, { "cell_type": "code", - "execution_count": 2, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.782962Z", + "start_time": "2025-04-15T22:47:08.777904Z" } }, - "outputs": [], "source": [ "# import libraries\n", "import raccoon as rc" - ] + ], + "outputs": [], + "execution_count": 133 }, { "cell_type": "markdown", @@ -46,17 +56,25 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.815176Z", + "start_time": "2025-04-15T22:47:08.808758Z" } }, + "source": [ + "# empty DataFrame\n", + "df = rc.DataFrame()\n", + "df" + ], "outputs": [ { "data": { "text/plain": [ - "object id: 2305959579080\n", + "object id: 1602323641696\n", "columns:\n", "[]\n", "data:\n", @@ -65,30 +83,34 @@ "[]" ] }, - "execution_count": 3, + "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# empty DataFrame\n", - "df = rc.DataFrame()\n", - "df" - ] + "execution_count": 134 }, { "cell_type": "code", - "execution_count": 4, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.834496Z", + "start_time": "2025-04-15T22:47:08.827370Z" } }, + "source": [ + "# with columns and indexes but no data\n", + "df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])\n", + "df" + ], "outputs": [ { "data": { "text/plain": [ - "object id: 2305959578792\n", + "object id: 1602323630096\n", "columns:\n", "['a', 'b', 'c']\n", "data:\n", @@ -97,30 +119,34 @@ "[1, 2, 3]" ] }, - "execution_count": 4, + "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# with columns and indexes but no data\n", - "df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])\n", - "df" - ] + "execution_count": 135 }, { "cell_type": "code", - "execution_count": 5, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.897683Z", + "start_time": "2025-04-15T22:47:08.890981Z" } }, + "source": [ + "# with data\n", + "df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])\n", + "df" + ], "outputs": [ { "data": { "text/plain": [ - "object id: 2305959818248\n", + "object id: 1602323182784\n", "columns:\n", "['a', 'b']\n", "data:\n", @@ -129,16 +155,12 @@ "[10, 11, 12]" ] }, - "execution_count": 5, + "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# with data\n", - "df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])\n", - "df" - ] + "execution_count": 136 }, { "cell_type": "markdown", @@ -150,12 +172,18 @@ }, { "cell_type": "code", - "execution_count": 6, "metadata": { "pycharm": { "is_executing": false + }, + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.929244Z", + "start_time": "2025-04-15T22:47:08.924065Z" } }, + "source": [ + "df.print()" + ], "outputs": [ { "name": "stdout", @@ -169,14 +197,19 @@ ] } ], - "source": [ - "df.print()" - ] + "execution_count": 137 }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:08.975453Z", + "start_time": "2025-04-15T22:47:08.970798Z" + } + }, + "source": [ + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -190,9 +223,7 @@ ] } ], - "source": [ - "print(df)" - ] + "execution_count": 138 }, { "cell_type": "markdown", @@ -204,8 +235,16 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.075689Z", + "start_time": "2025-04-15T22:47:09.069310Z" + } + }, + "source": [ + "# columns\n", + "df.columns" + ], "outputs": [ { "data": { @@ -213,20 +252,25 @@ "['a', 'b']" ] }, - "execution_count": 8, + "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# columns\n", - "df.columns" - ] + "execution_count": 139 }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.157721Z", + "start_time": "2025-04-15T22:47:09.149672Z" + } + }, + "source": [ + "df.columns = ['first', 'second']\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -240,15 +284,21 @@ ] } ], - "source": [ - "df.columns = ['first', 'second']\n", - "print(df)" - ] + "execution_count": 140 }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.222747Z", + "start_time": "2025-04-15T22:47:09.216632Z" + } + }, + "source": [ + "# columns can be renamed with a dict()\n", + "df.rename_columns({'second': 'b', 'first': 'a'})\n", + "df.columns" + ], "outputs": [ { "data": { @@ -256,21 +306,25 @@ "['a', 'b']" ] }, - "execution_count": 10, + "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# columns can be renamed with a dict()\n", - "df.rename_columns({'second': 'b', 'first': 'a'})\n", - "df.columns" - ] + "execution_count": 141 }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.276707Z", + "start_time": "2025-04-15T22:47:09.270218Z" + } + }, + "source": [ + "# index\n", + "df.index" + ], "outputs": [ { "data": { @@ -278,20 +332,26 @@ "[10, 11, 12]" ] }, - "execution_count": 11, + "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# index\n", - "df.index" - ] + "execution_count": 142 }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.345384Z", + "start_time": "2025-04-15T22:47:09.339360Z" + } + }, + "source": [ + "#indexes can be any non-repeating unique values\n", + "df.index = ['apple', 'pear', 7.7]\n", + "df.print()" + ], "outputs": [ { "name": "stdout", @@ -305,16 +365,20 @@ ] } ], - "source": [ - "#indexes can be any non-repeating unique values\n", - "df.index = ['apple', 'pear', 7.7]\n", - "df.print()" - ] + "execution_count": 143 }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.376508Z", + "start_time": "2025-04-15T22:47:09.370958Z" + } + }, + "source": [ + "df.index = [10, 11, 12]\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -328,15 +392,20 @@ ] } ], - "source": [ - "df.index = [10, 11, 12]\n", - "print(df)" - ] + "execution_count": 144 }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.424851Z", + "start_time": "2025-04-15T22:47:09.418477Z" + } + }, + "source": [ + "# the index can also have a name, befault it is \"index\"\n", + "df.index_name" + ], "outputs": [ { "data": { @@ -344,20 +413,25 @@ "'index'" ] }, - "execution_count": 14, + "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# the index can also have a name, befault it is \"index\"\n", - "df.index_name" - ] + "execution_count": 145 }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.499603Z", + "start_time": "2025-04-15T22:47:09.493327Z" + } + }, + "source": [ + "df.index_name = 'units'\n", + "df.index_name" + ], "outputs": [ { "data": { @@ -365,20 +439,26 @@ "'units'" ] }, - "execution_count": 15, + "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "df.index_name = 'units'\n", - "df.index_name" - ] + "execution_count": 146 }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.556073Z", + "start_time": "2025-04-15T22:47:09.546661Z" + } + }, + "source": [ + "# data is a shallow copy, be careful on how this is used\n", + "df.index_name = 'index'\n", + "df.data" + ], "outputs": [ { "data": { @@ -386,16 +466,12 @@ "[[1, 2, 3], [4, 5, 6]]" ] }, - "execution_count": 16, + "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# data is a shallow copy, be careful on how this is used\n", - "df.index_name = 'index'\n", - "df.data" - ] + "execution_count": 147 }, { "cell_type": "markdown", @@ -407,8 +483,15 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.614009Z", + "start_time": "2025-04-15T22:47:09.605998Z" + } + }, + "source": [ + "df.select_index(11)" + ], "outputs": [ { "data": { @@ -416,14 +499,12 @@ "[False, True, False]" ] }, - "execution_count": 17, + "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "df.select_index(11)" - ] + "execution_count": 148 }, { "cell_type": "markdown", @@ -435,8 +516,17 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.646115Z", + "start_time": "2025-04-15T22:47:09.638399Z" + } + }, + "source": [ + "# set a single cell\n", + "df.set(10, 'a', 100)\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -450,16 +540,21 @@ ] } ], - "source": [ - "# set a single cell\n", - "df.set(10, 'a', 100)\n", - "print(df)" - ] + "execution_count": 149 }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.721432Z", + "start_time": "2025-04-15T22:47:09.715612Z" + } + }, + "source": [ + "# set a value outside current range creates a new row and/or column. Can also use [] for setting\n", + "df[13, 'c'] = 9\n", + "df.print()" + ], "outputs": [ { "name": "stdout", @@ -474,16 +569,21 @@ ] } ], - "source": [ - "# set a value outside current range creates a new row and/or column. Can also use [] for setting\n", - "df[13, 'c'] = 9\n", - "df.print()" - ] + "execution_count": 150 }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.773982Z", + "start_time": "2025-04-15T22:47:09.766893Z" + } + }, + "source": [ + "# set column\n", + "df['b'] = 55\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -498,16 +598,21 @@ ] } ], - "source": [ - "# set column\n", - "df['b'] = 55\n", - "print(df)" - ] + "execution_count": 151 }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.845538Z", + "start_time": "2025-04-15T22:47:09.839361Z" + } + }, + "source": [ + "# set a subset of column\n", + "df[[10, 12], 'b'] = 66\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -522,16 +627,21 @@ ] } ], - "source": [ - "# set a subset of column\n", - "df[[10, 12], 'b'] = 66\n", - "print(df)" - ] + "execution_count": 152 }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.889245Z", + "start_time": "2025-04-15T22:47:09.883036Z" + } + }, + "source": [ + "# using boolean list\n", + "df.set([True, False, True, False], 'b', [88, 99])\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -546,17 +656,22 @@ ] } ], - "source": [ - "# using boolean list\n", - "df.set([True, False, True, False], 'b', [88, 99])\n", - "print(df)" - ] + "execution_count": 153 }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.919905Z", + "start_time": "2025-04-15T22:47:09.913766Z" + } + }, + "source": [ + "# setting with slices\n", + "df[12:13, 'a'] = 33\n", + "print(df)" + ], + "outputs": [ { "name": "stdout", "output_type": "stream", @@ -570,16 +685,20 @@ ] } ], - "source": [ - "# setting with slices\n", - "df[12:13, 'a'] = 33\n", - "print(df)" - ] + "execution_count": 154 }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:09.989443Z", + "start_time": "2025-04-15T22:47:09.983229Z" + } + }, + "source": [ + "df[10:12, 'c'] = [1, 2, 3]\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -594,15 +713,21 @@ ] } ], - "source": [ - "df[10:12, 'c'] = [1, 2, 3]\n", - "print(df)" - ] + "execution_count": 155 }, { "cell_type": "code", - "execution_count": 25, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.012289Z", + "start_time": "2025-04-15T22:47:10.005278Z" + } + }, + "source": [ + "# append a row, DANGEROUS as there is not validation checking, but can be used for speed\n", + "df.append_row(14, {'a': 44, 'c': 100, 'd': 99})\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -618,16 +743,21 @@ ] } ], - "source": [ - "# append a row, DANGEROUS as there is not validation checking, but can be used for speed\n", - "df.append_row(14, {'a': 44, 'c': 100, 'd': 99})\n", - "print(df)" - ] + "execution_count": 156 }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.073845Z", + "start_time": "2025-04-15T22:47:10.067595Z" + } + }, + "source": [ + "# append rows, again use caution\n", + "df.append_rows([15, 16], {'a': [55, 56], 'd': [100, 101]})\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -645,11 +775,7 @@ ] } ], - "source": [ - "# append rows, again use caution\n", - "df.append_rows([15, 16], {'a': [55, 56], 'd': [100,101]})\n", - "print(df)" - ] + "execution_count": 157 }, { "cell_type": "markdown", @@ -661,8 +787,16 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.107007Z", + "start_time": "2025-04-15T22:47:10.101364Z" + } + }, + "source": [ + "# get a single cell\n", + "df[10, 'a']" + ], "outputs": [ { "data": { @@ -670,20 +804,25 @@ "100" ] }, - "execution_count": 27, + "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# get a single cell\n", - "df[10, 'a']" - ] + "execution_count": 158 }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.152672Z", + "start_time": "2025-04-15T22:47:10.147193Z" + } + }, + "source": [ + "# get an entire column\n", + "df['c'].print()" + ], "outputs": [ { "name": "stdout", @@ -701,15 +840,20 @@ ] } ], - "source": [ - "# get an entire column\n", - "df['c'].print()" - ] + "execution_count": 159 }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.201198Z", + "start_time": "2025-04-15T22:47:10.195206Z" + } + }, + "source": [ + "# get list of columns\n", + "df[['a', 'c']].print()" + ], "outputs": [ { "name": "stdout", @@ -727,15 +871,20 @@ ] } ], - "source": [ - "# get list of columns\n", - "df[['a', 'c']].print()" - ] + "execution_count": 160 }, { "cell_type": "code", - "execution_count": 30, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.272100Z", + "start_time": "2025-04-15T22:47:10.267019Z" + } + }, + "source": [ + "# get subset of the index\n", + "df[[11, 12, 13], 'b'].print()" + ], "outputs": [ { "name": "stdout", @@ -749,15 +898,20 @@ ] } ], - "source": [ - "# get subset of the index\n", - "df[[11, 12, 13], 'b'].print()" - ] + "execution_count": 161 }, { "cell_type": "code", - "execution_count": 31, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.306719Z", + "start_time": "2025-04-15T22:47:10.301050Z" + } + }, + "source": [ + "# get using slices\n", + "df[11:13, 'b'].print()" + ], "outputs": [ { "name": "stdout", @@ -771,15 +925,20 @@ ] } ], - "source": [ - "# get using slices\n", - "df[11:13, 'b'].print()" - ] + "execution_count": 162 }, { "cell_type": "code", - "execution_count": 32, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.351164Z", + "start_time": "2025-04-15T22:47:10.345315Z" + } + }, + "source": [ + "# get a matrix\n", + "df[10:11, ['a', 'c']].print()" + ], "outputs": [ { "name": "stdout", @@ -792,15 +951,20 @@ ] } ], - "source": [ - "# get a matrix\n", - "df[10:11, ['a', 'c']].print()" - ] + "execution_count": 163 }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.420751Z", + "start_time": "2025-04-15T22:47:10.414126Z" + } + }, + "source": [ + "# get a column, return as a list\n", + "df.get(columns='a', as_list=True)" + ], "outputs": [ { "data": { @@ -808,20 +972,25 @@ "[100, 2, 33, 33, 44, 55, 56]" ] }, - "execution_count": 33, + "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# get a column, return as a list\n", - "df.get(columns='a', as_list=True)" - ] + "execution_count": 164 }, { "cell_type": "code", - "execution_count": 34, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.466573Z", + "start_time": "2025-04-15T22:47:10.459827Z" + } + }, + "source": [ + "# get a row and return as a dictionary\n", + "df.get_columns(index=13, columns=['a', 'b'], as_dict=True)" + ], "outputs": [ { "data": { @@ -829,15 +998,38 @@ "{'a': 33, 'b': 55, 'index': 13}" ] }, - "execution_count": 34, + "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], + "execution_count": 165 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.500533Z", + "start_time": "2025-04-15T22:47:10.492898Z" + } + }, + "cell_type": "code", "source": [ - "# get a row and return as a dictionary\n", - "df.get_columns(index=13, columns=['a', 'b'], as_dict=True)" - ] + "# get a row and return as a namedtuple, excluding the index\n", + "df.get_columns(index=13, columns=['a', 'b'], as_namedtuple=True, name=\"tuplename\", include_index=False)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "tuplename(a=33, b=55)" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 166 }, { "cell_type": "markdown", @@ -850,8 +1042,16 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.581191Z", + "start_time": "2025-04-15T22:47:10.575155Z" + } + }, + "source": [ + "# get a single cell\n", + "df.get_location(2, 'a')" + ], "outputs": [ { "data": { @@ -859,20 +1059,25 @@ "33" ] }, - "execution_count": 35, + "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# get a single cell\n", - "df.get_location(2, 'a')" - ] + "execution_count": 167 }, { "cell_type": "code", - "execution_count": 36, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.675921Z", + "start_time": "2025-04-15T22:47:10.670595Z" + } + }, + "source": [ + "# get an entire row when the columns is None\n", + "print(df.get_location(2))" + ], "outputs": [ { "name": "stdout", @@ -884,32 +1089,65 @@ ] } ], - "source": [ - "# get an entire row when the columns is None\n", - "print(df.get_location(2))" - ] + "execution_count": 168 }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.824274Z", + "start_time": "2025-04-15T22:47:10.818095Z" + } + }, + "source": "df.get_location(0, ['b', 'c'], as_dict=True)", "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'b': 88, 'c': 1, 'index': 10}\n" - ] + "data": { + "text/plain": [ + "{'b': 88, 'c': 1, 'index': 10}" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" } ], - "source": [ - "print(df.get_location(0, ['b', 'c'], as_dict=True))" - ] + "execution_count": 169 }, { + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:10.990392Z", + "start_time": "2025-04-15T22:47:10.983317Z" + } + }, "cell_type": "code", - "execution_count": 38, - "metadata": {}, + "source": "df.get_location(1, as_namedtuple=True, name=\"tuplename\", index=False)", + "outputs": [ + { + "data": { + "text/plain": [ + "tuplename(a=2, b=55, c=2, d=None)" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 170 + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.173318Z", + "start_time": "2025-04-15T22:47:11.168316Z" + } + }, + "source": [ + "df.get_location(-1).print()" + ], "outputs": [ { "name": "stdout", @@ -921,14 +1159,19 @@ ] } ], - "source": [ - "df.get_location(-1).print()" - ] + "execution_count": 171 }, { "cell_type": "code", - "execution_count": 39, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.251008Z", + "start_time": "2025-04-15T22:47:11.244620Z" + } + }, + "source": [ + "df.get_locations(locations=[0, 2]).print()" + ], "outputs": [ { "name": "stdout", @@ -941,14 +1184,20 @@ ] } ], - "source": [ - "df.get_locations(locations=[0, 2]).print()" - ] + "execution_count": 172 }, { "cell_type": "code", - "execution_count": 40, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.324973Z", + "start_time": "2025-04-15T22:47:11.318821Z" + } + }, + "source": [ + "df.set_locations(locations=[0, 2], column='a', values=-9)\n", + "df.print()" + ], "outputs": [ { "name": "stdout", @@ -966,10 +1215,7 @@ ] } ], - "source": [ - "df.set_locations(locations=[0, 2], column='a', values=-9)\n", - "df.print()" - ] + "execution_count": 173 }, { "cell_type": "markdown", @@ -981,8 +1227,15 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.465349Z", + "start_time": "2025-04-15T22:47:11.459938Z" + } + }, + "source": [ + "df.head(2).print()" + ], "outputs": [ { "name": "stdout", @@ -995,14 +1248,19 @@ ] } ], - "source": [ - "df.head(2).print()" - ] + "execution_count": 174 }, { "cell_type": "code", - "execution_count": 42, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.569526Z", + "start_time": "2025-04-15T22:47:11.564092Z" + } + }, + "source": [ + "df.tail(2).print()" + ], "outputs": [ { "name": "stdout", @@ -1015,9 +1273,7 @@ ] } ], - "source": [ - "df.tail(2).print()" - ] + "execution_count": 175 }, { "cell_type": "markdown", @@ -1029,8 +1285,16 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.640002Z", + "start_time": "2025-04-15T22:47:11.634285Z" + } + }, + "source": [ + "df.delete_rows([10, 13])\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1044,17 +1308,22 @@ " 15 55 100\n", " 16 56 101\n" ] - } - ], - "source": [ - "df.delete_rows([10, 13])\n", - "print(df)" - ] + } + ], + "execution_count": 176 }, { "cell_type": "code", - "execution_count": 44, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.677591Z", + "start_time": "2025-04-15T22:47:11.671222Z" + } + }, + "source": [ + "df.delete_columns('b')\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1070,10 +1339,7 @@ ] } ], - "source": [ - "df.delete_columns('b')\n", - "print(df)" - ] + "execution_count": 177 }, { "cell_type": "markdown", @@ -1085,8 +1351,16 @@ }, { "cell_type": "code", - "execution_count": 45, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.729564Z", + "start_time": "2025-04-15T22:47:11.721116Z" + } + }, + "source": [ + "# return a dict\n", + "df.to_dict()" + ], "outputs": [ { "data": { @@ -1097,20 +1371,25 @@ " 'd': [None, None, 99, 100, 101]}" ] }, - "execution_count": 45, + "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# return a dict\n", - "df.to_dict()" - ] + "execution_count": 178 }, { "cell_type": "code", - "execution_count": 46, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.794083Z", + "start_time": "2025-04-15T22:47:11.787165Z" + } + }, + "source": [ + "# exclude the index\n", + "df.to_dict(index=False)" + ], "outputs": [ { "data": { @@ -1120,20 +1399,25 @@ " 'd': [None, None, 99, 100, 101]}" ] }, - "execution_count": 46, + "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# exclude the index\n", - "df.to_dict(index=False)" - ] + "execution_count": 179 }, { "cell_type": "code", - "execution_count": 47, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.847297Z", + "start_time": "2025-04-15T22:47:11.840815Z" + } + }, + "source": [ + "# return an OrderedDict()\n", + "df.to_dict(ordered=True)" + ], "outputs": [ { "data": { @@ -1144,20 +1428,25 @@ " ('d', [None, None, 99, 100, 101])])" ] }, - "execution_count": 47, + "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# return an OrderedDict()\n", - "df.to_dict(ordered=True)" - ] + "execution_count": 180 }, { "cell_type": "code", - "execution_count": 48, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.903623Z", + "start_time": "2025-04-15T22:47:11.896528Z" + } + }, + "source": [ + "# return a list of just one column\n", + "df['c'].to_list()" + ], "outputs": [ { "data": { @@ -1165,20 +1454,26 @@ "[2, 3, 100, None, None]" ] }, - "execution_count": 48, + "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# return a list of just one column\n", - "df['c'].to_list()" - ] + "execution_count": 181 }, { "cell_type": "code", - "execution_count": 49, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.946982Z", + "start_time": "2025-04-15T22:47:11.942611Z" + } + }, + "source": [ + "# convert to JSON\n", + "string = df.to_json()\n", + "print(string)" + ], "outputs": [ { "name": "stdout", @@ -1188,16 +1483,21 @@ ] } ], - "source": [ - "# convert to JSON\n", - "string = df.to_json()\n", - "print(string)" - ] + "execution_count": 182 }, { "cell_type": "code", - "execution_count": 50, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:11.986301Z", + "start_time": "2025-04-15T22:47:11.981793Z" + } + }, + "source": [ + "# construct DataFrame from JSON\n", + "df_from_json = rc.DataFrame.from_json(string)\n", + "print(df_from_json)" + ], "outputs": [ { "name": "stdout", @@ -1213,11 +1513,7 @@ ] } ], - "source": [ - "# construct DataFrame from JSON\n", - "df_from_json = rc.DataFrame.from_json(string)\n", - "print(df_from_json)" - ] + "execution_count": 183 }, { "cell_type": "markdown", @@ -1229,8 +1525,16 @@ }, { "cell_type": "code", - "execution_count": 51, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.009287Z", + "start_time": "2025-04-15T22:47:12.004191Z" + } + }, + "source": [ + "df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1245,15 +1549,21 @@ ] } ], - "source": [ - "df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])\n", - "print(df)" - ] + "execution_count": 184 }, { "cell_type": "code", - "execution_count": 52, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.080437Z", + "start_time": "2025-04-15T22:47:12.073689Z" + } + }, + "source": [ + "# sort by index. Sorts are inplace\n", + "df.sort_index()\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1268,16 +1578,21 @@ ] } ], - "source": [ - "# sort by index. Sorts are inplace\n", - "df.sort_index()\n", - "print(df)" - ] + "execution_count": 185 }, { "cell_type": "code", - "execution_count": 53, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.162889Z", + "start_time": "2025-04-15T22:47:12.156099Z" + } + }, + "source": [ + "# sort by column\n", + "df.sort_columns('b')\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1292,16 +1607,21 @@ ] } ], - "source": [ - "# sort by column\n", - "df.sort_columns('b')\n", - "print(df)" - ] + "execution_count": 186 }, { "cell_type": "code", - "execution_count": 54, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.222185Z", + "start_time": "2025-04-15T22:47:12.216668Z" + } + }, + "source": [ + "# sort by column in reverse order\n", + "df.sort_columns('b', reverse=True)\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1316,20 +1636,21 @@ ] } ], - "source": [ - "# sort by column in reverse order\n", - "df.sort_columns('b', reverse=True)\n", - "print(df)" - ] + "execution_count": 187 }, { "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.270342Z", + "start_time": "2025-04-15T22:47:12.266284Z" + } + }, "source": [ "# sorting with a key function is avaialble, see tests for examples" - ] + ], + "outputs": [], + "execution_count": 188 }, { "cell_type": "markdown", @@ -1341,8 +1662,16 @@ }, { "cell_type": "code", - "execution_count": 56, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.303923Z", + "start_time": "2025-04-15T22:47:12.298653Z" + } + }, + "source": [ + "df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])\n", + "df1.print()" + ], "outputs": [ { "name": "stdout", @@ -1355,15 +1684,20 @@ ] } ], - "source": [ - "df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])\n", - "df1.print()" - ] + "execution_count": 189 }, { "cell_type": "code", - "execution_count": 57, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.340612Z", + "start_time": "2025-04-15T22:47:12.332333Z" + } + }, + "source": [ + "df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])\n", + "print(df2)" + ], "outputs": [ { "name": "stdout", @@ -1376,15 +1710,20 @@ ] } ], - "source": [ - "df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])\n", - "print(df2)" - ] + "execution_count": 190 }, { "cell_type": "code", - "execution_count": 58, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.374375Z", + "start_time": "2025-04-15T22:47:12.366020Z" + } + }, + "source": [ + "df1.append(df2)\n", + "print(df1)" + ], "outputs": [ { "name": "stdout", @@ -1399,10 +1738,7 @@ ] } ], - "source": [ - "df1.append(df2)\n", - "print(df1)" - ] + "execution_count": 191 }, { "cell_type": "markdown", @@ -1414,17 +1750,30 @@ }, { "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.421557Z", + "start_time": "2025-04-15T22:47:12.416340Z" + } + }, "source": [ "df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})" - ] + ], + "outputs": [], + "execution_count": 192 }, { "cell_type": "code", - "execution_count": 60, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.480489Z", + "start_time": "2025-04-15T22:47:12.474321Z" + } + }, + "source": [ + "# test for equality\n", + "df.equality('a', value=3)" + ], "outputs": [ { "data": { @@ -1432,20 +1781,25 @@ "[False, False, True]" ] }, - "execution_count": 60, + "execution_count": 193, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# test for equality\n", - "df.equality('a', value=3)" - ] + "execution_count": 193 }, { "cell_type": "code", - "execution_count": 61, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.502979Z", + "start_time": "2025-04-15T22:47:12.497639Z" + } + }, + "source": [ + "# all math methods can operate on a subset of the index\n", + "df.equality('b', indexes=[1, 2], value=2)" + ], "outputs": [ { "data": { @@ -1453,20 +1807,25 @@ "[False, False]" ] }, - "execution_count": 61, + "execution_count": 194, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# all math methods can operate on a subset of the index\n", - "df.equality('b', indexes=[1, 2], value=2)" - ] + "execution_count": 194 }, { "cell_type": "code", - "execution_count": 62, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.552408Z", + "start_time": "2025-04-15T22:47:12.546810Z" + } + }, + "source": [ + "# add two columns\n", + "df.add('a', 'b')" + ], "outputs": [ { "data": { @@ -1474,20 +1833,25 @@ "[3, 10, 12]" ] }, - "execution_count": 62, + "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# add two columns\n", - "df.add('a', 'b')" - ] + "execution_count": 195 }, { "cell_type": "code", - "execution_count": 63, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.596610Z", + "start_time": "2025-04-15T22:47:12.591328Z" + } + }, + "source": [ + "# subtract\n", + "df.subtract('b', 'a')" + ], "outputs": [ { "data": { @@ -1495,20 +1859,25 @@ "[1, 6, 6]" ] }, - "execution_count": 63, + "execution_count": 196, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# subtract\n", - "df.subtract('b', 'a')" - ] + "execution_count": 196 }, { "cell_type": "code", - "execution_count": 64, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.632380Z", + "start_time": "2025-04-15T22:47:12.626779Z" + } + }, + "source": [ + "# multiply\n", + "df.multiply('a', 'b', [0, 2])" + ], "outputs": [ { "data": { @@ -1516,20 +1885,25 @@ "[2, 27]" ] }, - "execution_count": 64, + "execution_count": 197, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# multiply\n", - "df.multiply('a', 'b', [0, 2])" - ] + "execution_count": 197 }, { "cell_type": "code", - "execution_count": 65, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.673467Z", + "start_time": "2025-04-15T22:47:12.668116Z" + } + }, + "source": [ + "# divide\n", + "df.divide('b', 'a')" + ], "outputs": [ { "data": { @@ -1537,15 +1911,12 @@ "[2.0, 4.0, 3.0]" ] }, - "execution_count": 65, + "execution_count": 198, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "# divide\n", - "df.divide('b', 'a')" - ] + "execution_count": 198 }, { "cell_type": "markdown", @@ -1559,8 +1930,17 @@ }, { "cell_type": "code", - "execution_count": 66, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.743156Z", + "start_time": "2025-04-15T22:47:12.737422Z" + } + }, + "source": [ + "tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]\n", + "df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1577,11 +1957,7 @@ ] } ], - "source": [ - "tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]\n", - "df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)\n", - "print(df)" - ] + "execution_count": 199 }, { "cell_type": "markdown", @@ -1592,8 +1968,16 @@ }, { "cell_type": "code", - "execution_count": 67, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.813260Z", + "start_time": "2025-04-15T22:47:12.807006Z" + } + }, + "source": [ + "compare = ('a', None, None)\n", + "df.select_index(compare)" + ], "outputs": [ { "data": { @@ -1601,20 +1985,25 @@ "[True, True, True, False, False, False]" ] }, - "execution_count": 67, + "execution_count": 200, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "compare = ('a', None, None)\n", - "df.select_index(compare)" - ] + "execution_count": 200 }, { "cell_type": "code", - "execution_count": 68, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.850419Z", + "start_time": "2025-04-15T22:47:12.844931Z" + } + }, + "source": [ + "compare = ('a', None, 3)\n", + "df.select_index(compare, 'boolean')" + ], "outputs": [ { "data": { @@ -1622,20 +2011,25 @@ "[True, False, True, False, False, False]" ] }, - "execution_count": 68, + "execution_count": 201, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "compare = ('a', None, 3)\n", - "df.select_index(compare, 'boolean')" - ] + "execution_count": 201 }, { "cell_type": "code", - "execution_count": 69, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.893226Z", + "start_time": "2025-04-15T22:47:12.887477Z" + } + }, + "source": [ + "compare = (None, 2, None)\n", + "df.select_index(compare, 'value')" + ], "outputs": [ { "data": { @@ -1643,20 +2037,25 @@ "[('a', 2, 3), ('b', 2, 1)]" ] }, - "execution_count": 69, + "execution_count": 202, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "compare = (None, 2, None)\n", - "df.select_index(compare, 'value')" - ] + "execution_count": 202 }, { "cell_type": "code", - "execution_count": 70, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.931270Z", + "start_time": "2025-04-15T22:47:12.925555Z" + } + }, + "source": [ + "compare = (None, None, 3)\n", + "df.select_index(compare, 'value')" + ], "outputs": [ { "data": { @@ -1664,20 +2063,25 @@ "[('a', 1, 3), ('a', 2, 3), ('b', 3, 3)]" ] }, - "execution_count": 70, + "execution_count": 203, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "compare = (None, None, 3)\n", - "df.select_index(compare, 'value')" - ] + "execution_count": 203 }, { "cell_type": "code", - "execution_count": 71, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.965439Z", + "start_time": "2025-04-15T22:47:12.960694Z" + } + }, + "source": [ + "compare = (None, None, None)\n", + "df.select_index(compare)" + ], "outputs": [ { "data": { @@ -1685,15 +2089,12 @@ "[True, True, True, True, True, True]" ] }, - "execution_count": 71, + "execution_count": 204, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "compare = (None, None, None)\n", - "df.select_index(compare)" - ] + "execution_count": 204 }, { "cell_type": "markdown", @@ -1705,8 +2106,16 @@ }, { "cell_type": "code", - "execution_count": 72, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:12.986721Z", + "start_time": "2025-04-15T22:47:12.982181Z" + } + }, + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1720,20 +2129,25 @@ ] } ], - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])\n", - "print(df)" - ] + "execution_count": 205 }, { "cell_type": "code", - "execution_count": 73, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.031902Z", + "start_time": "2025-04-15T22:47:13.026515Z" + } + }, + "source": [ + "df.reset_index()\n", + "df" + ], "outputs": [ { "data": { "text/plain": [ - "object id: 2305960012584\n", + "object id: 1602314513728\n", "columns:\n", "['a', 'b', 'index_0']\n", "data:\n", @@ -1742,20 +2156,25 @@ "[0, 1, 2]" ] }, - "execution_count": 73, + "execution_count": 206, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "df.reset_index()\n", - "df" - ] + "execution_count": 206 }, { "cell_type": "code", - "execution_count": 74, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.058276Z", + "start_time": "2025-04-15T22:47:13.053567Z" + } + }, + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1769,15 +2188,20 @@ ] } ], - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", - "print(df)" - ] + "execution_count": 207 }, { "cell_type": "code", - "execution_count": 75, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.107204Z", + "start_time": "2025-04-15T22:47:13.102194Z" + } + }, + "source": [ + "df.reset_index()\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1791,15 +2215,21 @@ ] } ], - "source": [ - "df.reset_index()\n", - "print(df)" - ] + "execution_count": 208 }, { "cell_type": "code", - "execution_count": 76, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.136737Z", + "start_time": "2025-04-15T22:47:13.131786Z" + } + }, + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],\n", + " index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1813,16 +2243,20 @@ ] } ], - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],\n", - " index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))\n", - "print(df)" - ] + "execution_count": 209 }, { "cell_type": "code", - "execution_count": 77, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.174199Z", + "start_time": "2025-04-15T22:47:13.169724Z" + } + }, + "source": [ + "df.reset_index()\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1836,15 +2270,20 @@ ] } ], - "source": [ - "df.reset_index()\n", - "print(df)" - ] + "execution_count": 210 }, { "cell_type": "code", - "execution_count": 78, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.212948Z", + "start_time": "2025-04-15T22:47:13.207218Z" + } + }, + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1858,15 +2297,20 @@ ] } ], - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", - "print(df)" - ] + "execution_count": 211 }, { "cell_type": "code", - "execution_count": 79, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.251260Z", + "start_time": "2025-04-15T22:47:13.246941Z" + } + }, + "source": [ + "df.reset_index(drop=True)\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -1880,10 +2324,7 @@ ] } ], - "source": [ - "df.reset_index(drop=True)\n", - "print(df)" - ] + "execution_count": 212 }, { "cell_type": "markdown", @@ -1895,17 +2336,30 @@ }, { "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.284325Z", + "start_time": "2025-04-15T22:47:13.280388Z" + } + }, "source": [ "df = rc.DataFrame({'a': [1, 2, 'c'], 'b': [5, 6, 'd']}, index=[1, 2, 3])" - ] + ], + "outputs": [], + "execution_count": 213 }, { "cell_type": "code", - "execution_count": 81, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.311413Z", + "start_time": "2025-04-15T22:47:13.305726Z" + } + }, + "source": [ + "for row in df.iterrows():\n", + " print(row)" + ], "outputs": [ { "name": "stdout", @@ -1917,15 +2371,20 @@ ] } ], - "source": [ - "for row in df.iterrows():\n", - " print(row)" - ] + "execution_count": 214 }, { "cell_type": "code", - "execution_count": 82, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.376046Z", + "start_time": "2025-04-15T22:47:13.371280Z" + } + }, + "source": [ + "for row in df.itertuples():\n", + " print(row)" + ], "outputs": [ { "name": "stdout", @@ -1937,10 +2396,7 @@ ] } ], - "source": [ - "for row in df.itertuples():\n", - " print(row)" - ] + "execution_count": 215 }, { "cell_type": "markdown", @@ -1953,12 +2409,17 @@ }, { "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.481935Z", + "start_time": "2025-04-15T22:47:13.477795Z" + } + }, "source": [ "df = rc.DataFrame({'a': [3, 5, 4], 'b': [6, 8, 7]}, index=[12, 15, 14], sort=True)" - ] + ], + "outputs": [], + "execution_count": 216 }, { "cell_type": "markdown", @@ -1969,8 +2430,15 @@ }, { "cell_type": "code", - "execution_count": 84, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.547862Z", + "start_time": "2025-04-15T22:47:13.542996Z" + } + }, + "source": [ + "df.print()" + ], "outputs": [ { "name": "stdout", @@ -1984,14 +2452,20 @@ ] } ], - "source": [ - "df.print()" - ] + "execution_count": 217 }, { "cell_type": "code", - "execution_count": 85, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.607717Z", + "start_time": "2025-04-15T22:47:13.602745Z" + } + }, + "source": [ + "df[16, 'b'] = 9\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -2006,15 +2480,20 @@ ] } ], - "source": [ - "df[16, 'b'] = 9\n", - "print(df)" - ] + "execution_count": 218 }, { "cell_type": "code", - "execution_count": 86, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-04-15T22:47:13.642524Z", + "start_time": "2025-04-15T22:47:13.637770Z" + } + }, + "source": [ + "df.set(indexes=13, values={'a': 3.5, 'b': 6.5})\n", + "print(df)" + ], "outputs": [ { "name": "stdout", @@ -2030,17 +2509,14 @@ ] } ], - "source": [ - "df.set(indexes=13, values={'a': 3.5, 'b': 6.5})\n", - "print(df)" - ] + "execution_count": 219 } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:.conda-raccoon]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "conda-env-.conda-raccoon-py" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/pyproject.toml b/pyproject.toml index 0257e88..a468065 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,32 +2,28 @@ name = "raccoon" version = "3.2.0" authors = [ - { name="Ryan Sheftel", email="rsheftel@alumni.upenn.edu" }, + { name = "Ryan Sheftel", email = "rsheftel@alumni.upenn.edu" }, ] description = 'Python DataFrame with fast insert and appends' readme = "README.rst" requires-python = ">=3.4" keywords = ['dataframe', 'data', 'structure'] -license = { text="MIT" } +license = { text = "MIT" } classifiers = [ - 'Development Status :: 5 - Production/Stable', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Topic :: Software Development', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', + 'Development Status :: 5 - Production/Stable', + # Indicate who your project is intended for + 'Intended Audience :: Developers', + 'Topic :: Software Development', + # Pick your license as you wish (should match "license" above) + 'License :: OSI Approved :: MIT License', + # Specify the Python versions you support here. In particular, ensure + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', - 'Programming Language :: Python :: 3.14', ] dependencies = ['tabulate'] diff --git a/tests/test_dataframe/test_get.py b/tests/test_dataframe/test_get.py index e5f1439..415ac6c 100644 --- a/tests/test_dataframe/test_get.py +++ b/tests/test_dataframe/test_get.py @@ -351,6 +351,10 @@ def test_get_location_as_dict_namedtuple(): # single value for column and not list returns just the value assert df.get_location(1, "b") == 6 + # cannot have both as_dict and as_namedtuple True + with pytest.raises(AssertionError): + df.get_location(2, as_dict=True, as_namedtuple=True) + def test_get_locations(): df = rc.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, index=[2, 4, 6, 8]) From 9e2e802776a3de9ad48190a44460f978840e13e1 Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 19:07:33 -0400 Subject: [PATCH 4/6] Test compatibility with different Python versions --- docs/change_log.rst | 1 + raccoon/dataframe.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/change_log.rst b/docs/change_log.rst index a76b3d2..eb4f761 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -188,3 +188,4 @@ an installation requirement. ~~~~~~~~~~~~~~~~ - Add type hints - Add as_namedtuple option to DataFrame get_columns() and get_location() +- minimum python version now 3.12 diff --git a/raccoon/dataframe.py b/raccoon/dataframe.py index 0e6521b..e82de04 100644 --- a/raccoon/dataframe.py +++ b/raccoon/dataframe.py @@ -6,7 +6,7 @@ from bisect import bisect_left, bisect_right from collections import OrderedDict, namedtuple from itertools import compress -from typing import Any, Callable, Generator, Literal, Self +from typing import Any, Callable, Iterator, Literal, Self from tabulate import tabulate @@ -1220,7 +1220,7 @@ def isin(self, column: Any, compare_list: list) -> list[bool]: """ return [x in compare_list for x in self._data[self._columns.index(column)]] - def iterrows(self, index: bool = True) -> Generator[dict]: + def iterrows(self, index: bool = True) -> Iterator[dict]: """ Iterates over DataFrame rows as dictionary of the values. The index will be included. @@ -1233,7 +1233,7 @@ def iterrows(self, index: bool = True) -> Generator[dict]: row[col] = self._data[c][i] yield row - def itertuples(self, index: bool = True, name: str = "Raccoon") -> Generator[namedtuple]: + def itertuples(self, index: bool = True, name: str = "Raccoon") -> Iterator[namedtuple]: """ Iterates over DataFrame rows as tuple of the values. From 4e9d177d6fc54caf246c3630495d1fc005156ac3 Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 19:09:37 -0400 Subject: [PATCH 5/6] Increase min python version to 3.11 --- .github/workflows/python-test.yml | 2 +- docs/change_log.rst | 2 +- pyproject.toml | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index b9278e5..eb64e0a 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13" ] + python-version: [ "3.11", "3.12", "3.13" ] steps: - uses: actions/checkout@v4 diff --git a/docs/change_log.rst b/docs/change_log.rst index eb4f761..1b7089f 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -188,4 +188,4 @@ an installation requirement. ~~~~~~~~~~~~~~~~ - Add type hints - Add as_namedtuple option to DataFrame get_columns() and get_location() -- minimum python version now 3.12 +- minimum python version now 3.11 diff --git a/pyproject.toml b/pyproject.toml index a468065..4594ba7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ ] description = 'Python DataFrame with fast insert and appends' readme = "README.rst" -requires-python = ">=3.4" +requires-python = ">=3.11" keywords = ['dataframe', 'data', 'structure'] license = { text = "MIT" } classifiers = [ @@ -17,10 +17,6 @@ classifiers = [ # Pick your license as you wish (should match "license" above) 'License :: OSI Approved :: MIT License', # Specify the Python versions you support here. In particular, ensure - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', From 83da13c3e1b4f0ab9d98eb3c5c706c496e62b43c Mon Sep 17 00:00:00 2001 From: Ryan Sheftel Date: Tue, 15 Apr 2025 21:13:24 -0400 Subject: [PATCH 6/6] final docs update --- README.rst | 2 +- docs/raccoon.rst | 11 +- docs/usage_dataframe.rst | 50 +- examples/usage_dataframe.ipynb | 920 +++++++++++++++++---------------- 4 files changed, 509 insertions(+), 474 deletions(-) diff --git a/README.rst b/README.rst index 6ea287b..e78e2fa 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ My hope is that one day Pandas solves the speed problem with growing DataFrames Python Version ~~~~~~~~~~~~~~ -Raccoon requires Python 3.4 or greater. Python 2.7 support was eliminated as of version 3.0. If you need to use raccoon +Raccoon requires Python 3.11 or greater. Python 2.7 support was eliminated as of version 3.0. If you need to use raccoon with Python 2.7 use any version less than 3.0 Helper scripts diff --git a/docs/raccoon.rst b/docs/raccoon.rst index 9ee0b9f..8ed6233 100644 --- a/docs/raccoon.rst +++ b/docs/raccoon.rst @@ -9,38 +9,37 @@ raccoon.dataframe module .. automodule:: raccoon.dataframe :members: - :undoc-members: :show-inheritance: + :undoc-members: raccoon.series module --------------------- .. automodule:: raccoon.series :members: - :undoc-members: :show-inheritance: + :undoc-members: raccoon.sort\_utils module -------------------------- .. automodule:: raccoon.sort_utils :members: - :undoc-members: :show-inheritance: + :undoc-members: raccoon.utils module -------------------- .. automodule:: raccoon.utils :members: - :undoc-members: :show-inheritance: - + :undoc-members: Module contents --------------- .. automodule:: raccoon :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/usage_dataframe.rst b/docs/usage_dataframe.rst index 4a3cad1..98d7e41 100644 --- a/docs/usage_dataframe.rst +++ b/docs/usage_dataframe.rst @@ -4,7 +4,9 @@ Example Usage for DataFrame .. code:: python # remove comment to use latest development version - import sys; sys.path.insert(0, '../') + import sys; + + sys.path.insert(0, '../') .. code:: python @@ -25,7 +27,7 @@ Initialize .. parsed-literal:: - object id: 2305959579080 + object id: 1602323641696 columns: [] data: @@ -46,7 +48,7 @@ Initialize .. parsed-literal:: - object id: 2305959578792 + object id: 1602323630096 columns: ['a', 'b', 'c'] data: @@ -67,7 +69,7 @@ Initialize .. parsed-literal:: - object id: 2305959818248 + object id: 1602323182784 columns: ['a', 'b'] data: @@ -400,7 +402,7 @@ Set Values .. code:: python # append rows, again use caution - df.append_rows([15, 16], {'a': [55, 56], 'd': [100,101]}) + df.append_rows([15, 16], {'a': [55, 56], 'd': [100, 101]}) print(df) @@ -544,6 +546,20 @@ Get Values +.. code:: python + + # get a row and return as a namedtuple, excluding the index + df.get_columns(index=13, columns=['a', 'b'], as_namedtuple=True, name="tuplename", include_index=False) + + + + +.. parsed-literal:: + + tuplename(a=33, b=55) + + + Set and Get by Location ----------------------- @@ -579,13 +595,29 @@ from 0…len(index) .. code:: python - print(df.get_location(0, ['b', 'c'], as_dict=True)) + df.get_location(0, ['b', 'c'], as_dict=True) + + .. parsed-literal:: {'b': 88, 'c': 1, 'index': 10} - + + + +.. code:: python + + df.get_location(1, as_namedtuple=True, name="tuplename", index=False) + + + + +.. parsed-literal:: + + tuplename(a=2, b=55, c=2, d=None) + + .. code:: python @@ -1134,7 +1166,7 @@ Reset Index .. parsed-literal:: - object id: 2305960012584 + object id: 1602314513728 columns: ['a', 'b', 'index_0'] data: @@ -1177,7 +1209,7 @@ Reset Index .. code:: python df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], - index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo')) + index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo')) print(df) diff --git a/examples/usage_dataframe.ipynb b/examples/usage_dataframe.ipynb index 8584166..0214751 100644 --- a/examples/usage_dataframe.ipynb +++ b/examples/usage_dataframe.ipynb @@ -10,41 +10,41 @@ }, { "cell_type": "code", + "execution_count": 132, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.746098Z", "start_time": "2025-04-15T22:47:08.741814Z" + }, + "pycharm": { + "is_executing": false } }, + "outputs": [], "source": [ "# remove comment to use latest development version\n", "import sys;\n", "\n", "sys.path.insert(0, '../')" - ], - "outputs": [], - "execution_count": 132 + ] }, { "cell_type": "code", + "execution_count": 133, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.782962Z", "start_time": "2025-04-15T22:47:08.777904Z" + }, + "pycharm": { + "is_executing": false } }, + "outputs": [], "source": [ "# import libraries\n", "import raccoon as rc" - ], - "outputs": [], - "execution_count": 133 + ] }, { "cell_type": "markdown", @@ -56,20 +56,16 @@ }, { "cell_type": "code", + "execution_count": 134, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.815176Z", "start_time": "2025-04-15T22:47:08.808758Z" + }, + "pycharm": { + "is_executing": false } }, - "source": [ - "# empty DataFrame\n", - "df = rc.DataFrame()\n", - "df" - ], "outputs": [ { "data": { @@ -88,24 +84,24 @@ "output_type": "execute_result" } ], - "execution_count": 134 + "source": [ + "# empty DataFrame\n", + "df = rc.DataFrame()\n", + "df" + ] }, { "cell_type": "code", + "execution_count": 135, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.834496Z", "start_time": "2025-04-15T22:47:08.827370Z" + }, + "pycharm": { + "is_executing": false } }, - "source": [ - "# with columns and indexes but no data\n", - "df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])\n", - "df" - ], "outputs": [ { "data": { @@ -124,24 +120,24 @@ "output_type": "execute_result" } ], - "execution_count": 135 + "source": [ + "# with columns and indexes but no data\n", + "df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])\n", + "df" + ] }, { "cell_type": "code", + "execution_count": 136, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.897683Z", "start_time": "2025-04-15T22:47:08.890981Z" + }, + "pycharm": { + "is_executing": false } }, - "source": [ - "# with data\n", - "df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])\n", - "df" - ], "outputs": [ { "data": { @@ -160,7 +156,11 @@ "output_type": "execute_result" } ], - "execution_count": 136 + "source": [ + "# with data\n", + "df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])\n", + "df" + ] }, { "cell_type": "markdown", @@ -172,18 +172,16 @@ }, { "cell_type": "code", + "execution_count": 137, "metadata": { - "pycharm": { - "is_executing": false - }, "ExecuteTime": { "end_time": "2025-04-15T22:47:08.929244Z", "start_time": "2025-04-15T22:47:08.924065Z" + }, + "pycharm": { + "is_executing": false } }, - "source": [ - "df.print()" - ], "outputs": [ { "name": "stdout", @@ -197,19 +195,19 @@ ] } ], - "execution_count": 137 + "source": [ + "df.print()" + ] }, { "cell_type": "code", + "execution_count": 138, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:08.975453Z", "start_time": "2025-04-15T22:47:08.970798Z" } }, - "source": [ - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -223,7 +221,9 @@ ] } ], - "execution_count": 138 + "source": [ + "print(df)" + ] }, { "cell_type": "markdown", @@ -235,16 +235,13 @@ }, { "cell_type": "code", + "execution_count": 139, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.075689Z", "start_time": "2025-04-15T22:47:09.069310Z" } }, - "source": [ - "# columns\n", - "df.columns" - ], "outputs": [ { "data": { @@ -257,20 +254,20 @@ "output_type": "execute_result" } ], - "execution_count": 139 + "source": [ + "# columns\n", + "df.columns" + ] }, { "cell_type": "code", + "execution_count": 140, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.157721Z", "start_time": "2025-04-15T22:47:09.149672Z" } }, - "source": [ - "df.columns = ['first', 'second']\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -284,21 +281,20 @@ ] } ], - "execution_count": 140 + "source": [ + "df.columns = ['first', 'second']\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 141, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.222747Z", "start_time": "2025-04-15T22:47:09.216632Z" } }, - "source": [ - "# columns can be renamed with a dict()\n", - "df.rename_columns({'second': 'b', 'first': 'a'})\n", - "df.columns" - ], "outputs": [ { "data": { @@ -311,20 +307,21 @@ "output_type": "execute_result" } ], - "execution_count": 141 + "source": [ + "# columns can be renamed with a dict()\n", + "df.rename_columns({'second': 'b', 'first': 'a'})\n", + "df.columns" + ] }, { "cell_type": "code", + "execution_count": 142, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.276707Z", "start_time": "2025-04-15T22:47:09.270218Z" } }, - "source": [ - "# index\n", - "df.index" - ], "outputs": [ { "data": { @@ -337,21 +334,20 @@ "output_type": "execute_result" } ], - "execution_count": 142 + "source": [ + "# index\n", + "df.index" + ] }, { "cell_type": "code", + "execution_count": 143, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.345384Z", "start_time": "2025-04-15T22:47:09.339360Z" } }, - "source": [ - "#indexes can be any non-repeating unique values\n", - "df.index = ['apple', 'pear', 7.7]\n", - "df.print()" - ], "outputs": [ { "name": "stdout", @@ -365,20 +361,21 @@ ] } ], - "execution_count": 143 + "source": [ + "#indexes can be any non-repeating unique values\n", + "df.index = ['apple', 'pear', 7.7]\n", + "df.print()" + ] }, { "cell_type": "code", + "execution_count": 144, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.376508Z", "start_time": "2025-04-15T22:47:09.370958Z" } }, - "source": [ - "df.index = [10, 11, 12]\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -392,20 +389,20 @@ ] } ], - "execution_count": 144 + "source": [ + "df.index = [10, 11, 12]\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 145, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.424851Z", "start_time": "2025-04-15T22:47:09.418477Z" } }, - "source": [ - "# the index can also have a name, befault it is \"index\"\n", - "df.index_name" - ], "outputs": [ { "data": { @@ -418,20 +415,20 @@ "output_type": "execute_result" } ], - "execution_count": 145 + "source": [ + "# the index can also have a name, befault it is \"index\"\n", + "df.index_name" + ] }, { "cell_type": "code", + "execution_count": 146, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.499603Z", "start_time": "2025-04-15T22:47:09.493327Z" } }, - "source": [ - "df.index_name = 'units'\n", - "df.index_name" - ], "outputs": [ { "data": { @@ -444,21 +441,20 @@ "output_type": "execute_result" } ], - "execution_count": 146 + "source": [ + "df.index_name = 'units'\n", + "df.index_name" + ] }, { "cell_type": "code", + "execution_count": 147, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.556073Z", "start_time": "2025-04-15T22:47:09.546661Z" } }, - "source": [ - "# data is a shallow copy, be careful on how this is used\n", - "df.index_name = 'index'\n", - "df.data" - ], "outputs": [ { "data": { @@ -471,7 +467,11 @@ "output_type": "execute_result" } ], - "execution_count": 147 + "source": [ + "# data is a shallow copy, be careful on how this is used\n", + "df.index_name = 'index'\n", + "df.data" + ] }, { "cell_type": "markdown", @@ -483,15 +483,13 @@ }, { "cell_type": "code", + "execution_count": 148, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.614009Z", "start_time": "2025-04-15T22:47:09.605998Z" } }, - "source": [ - "df.select_index(11)" - ], "outputs": [ { "data": { @@ -504,7 +502,9 @@ "output_type": "execute_result" } ], - "execution_count": 148 + "source": [ + "df.select_index(11)" + ] }, { "cell_type": "markdown", @@ -516,17 +516,13 @@ }, { "cell_type": "code", + "execution_count": 149, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.646115Z", "start_time": "2025-04-15T22:47:09.638399Z" } }, - "source": [ - "# set a single cell\n", - "df.set(10, 'a', 100)\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -540,21 +536,21 @@ ] } ], - "execution_count": 149 + "source": [ + "# set a single cell\n", + "df.set(10, 'a', 100)\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 150, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.721432Z", "start_time": "2025-04-15T22:47:09.715612Z" } }, - "source": [ - "# set a value outside current range creates a new row and/or column. Can also use [] for setting\n", - "df[13, 'c'] = 9\n", - "df.print()" - ], "outputs": [ { "name": "stdout", @@ -569,21 +565,21 @@ ] } ], - "execution_count": 150 + "source": [ + "# set a value outside current range creates a new row and/or column. Can also use [] for setting\n", + "df[13, 'c'] = 9\n", + "df.print()" + ] }, { "cell_type": "code", + "execution_count": 151, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.773982Z", "start_time": "2025-04-15T22:47:09.766893Z" } }, - "source": [ - "# set column\n", - "df['b'] = 55\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -598,21 +594,21 @@ ] } ], - "execution_count": 151 + "source": [ + "# set column\n", + "df['b'] = 55\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 152, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.845538Z", "start_time": "2025-04-15T22:47:09.839361Z" } }, - "source": [ - "# set a subset of column\n", - "df[[10, 12], 'b'] = 66\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -627,21 +623,21 @@ ] } ], - "execution_count": 152 + "source": [ + "# set a subset of column\n", + "df[[10, 12], 'b'] = 66\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 153, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.889245Z", "start_time": "2025-04-15T22:47:09.883036Z" } }, - "source": [ - "# using boolean list\n", - "df.set([True, False, True, False], 'b', [88, 99])\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -656,21 +652,21 @@ ] } ], - "execution_count": 153 + "source": [ + "# using boolean list\n", + "df.set([True, False, True, False], 'b', [88, 99])\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 154, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.919905Z", "start_time": "2025-04-15T22:47:09.913766Z" } }, - "source": [ - "# setting with slices\n", - "df[12:13, 'a'] = 33\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -685,20 +681,21 @@ ] } ], - "execution_count": 154 + "source": [ + "# setting with slices\n", + "df[12:13, 'a'] = 33\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 155, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:09.989443Z", "start_time": "2025-04-15T22:47:09.983229Z" } }, - "source": [ - "df[10:12, 'c'] = [1, 2, 3]\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -713,21 +710,20 @@ ] } ], - "execution_count": 155 + "source": [ + "df[10:12, 'c'] = [1, 2, 3]\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 156, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.012289Z", "start_time": "2025-04-15T22:47:10.005278Z" } }, - "source": [ - "# append a row, DANGEROUS as there is not validation checking, but can be used for speed\n", - "df.append_row(14, {'a': 44, 'c': 100, 'd': 99})\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -743,21 +739,21 @@ ] } ], - "execution_count": 156 + "source": [ + "# append a row, DANGEROUS as there is not validation checking, but can be used for speed\n", + "df.append_row(14, {'a': 44, 'c': 100, 'd': 99})\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 157, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.073845Z", "start_time": "2025-04-15T22:47:10.067595Z" } }, - "source": [ - "# append rows, again use caution\n", - "df.append_rows([15, 16], {'a': [55, 56], 'd': [100, 101]})\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -775,7 +771,11 @@ ] } ], - "execution_count": 157 + "source": [ + "# append rows, again use caution\n", + "df.append_rows([15, 16], {'a': [55, 56], 'd': [100, 101]})\n", + "print(df)" + ] }, { "cell_type": "markdown", @@ -787,16 +787,13 @@ }, { "cell_type": "code", + "execution_count": 158, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.107007Z", "start_time": "2025-04-15T22:47:10.101364Z" } }, - "source": [ - "# get a single cell\n", - "df[10, 'a']" - ], "outputs": [ { "data": { @@ -809,20 +806,20 @@ "output_type": "execute_result" } ], - "execution_count": 158 + "source": [ + "# get a single cell\n", + "df[10, 'a']" + ] }, { "cell_type": "code", + "execution_count": 159, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.152672Z", "start_time": "2025-04-15T22:47:10.147193Z" } }, - "source": [ - "# get an entire column\n", - "df['c'].print()" - ], "outputs": [ { "name": "stdout", @@ -840,20 +837,20 @@ ] } ], - "execution_count": 159 + "source": [ + "# get an entire column\n", + "df['c'].print()" + ] }, { "cell_type": "code", + "execution_count": 160, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.201198Z", "start_time": "2025-04-15T22:47:10.195206Z" } }, - "source": [ - "# get list of columns\n", - "df[['a', 'c']].print()" - ], "outputs": [ { "name": "stdout", @@ -871,20 +868,20 @@ ] } ], - "execution_count": 160 + "source": [ + "# get list of columns\n", + "df[['a', 'c']].print()" + ] }, { "cell_type": "code", + "execution_count": 161, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.272100Z", "start_time": "2025-04-15T22:47:10.267019Z" } }, - "source": [ - "# get subset of the index\n", - "df[[11, 12, 13], 'b'].print()" - ], "outputs": [ { "name": "stdout", @@ -898,20 +895,20 @@ ] } ], - "execution_count": 161 + "source": [ + "# get subset of the index\n", + "df[[11, 12, 13], 'b'].print()" + ] }, { "cell_type": "code", + "execution_count": 162, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.306719Z", "start_time": "2025-04-15T22:47:10.301050Z" } }, - "source": [ - "# get using slices\n", - "df[11:13, 'b'].print()" - ], "outputs": [ { "name": "stdout", @@ -925,20 +922,20 @@ ] } ], - "execution_count": 162 + "source": [ + "# get using slices\n", + "df[11:13, 'b'].print()" + ] }, { "cell_type": "code", + "execution_count": 163, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.351164Z", "start_time": "2025-04-15T22:47:10.345315Z" } }, - "source": [ - "# get a matrix\n", - "df[10:11, ['a', 'c']].print()" - ], "outputs": [ { "name": "stdout", @@ -951,20 +948,20 @@ ] } ], - "execution_count": 163 + "source": [ + "# get a matrix\n", + "df[10:11, ['a', 'c']].print()" + ] }, { "cell_type": "code", + "execution_count": 164, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.420751Z", "start_time": "2025-04-15T22:47:10.414126Z" } }, - "source": [ - "# get a column, return as a list\n", - "df.get(columns='a', as_list=True)" - ], "outputs": [ { "data": { @@ -977,20 +974,20 @@ "output_type": "execute_result" } ], - "execution_count": 164 + "source": [ + "# get a column, return as a list\n", + "df.get(columns='a', as_list=True)" + ] }, { "cell_type": "code", + "execution_count": 165, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.466573Z", "start_time": "2025-04-15T22:47:10.459827Z" } }, - "source": [ - "# get a row and return as a dictionary\n", - "df.get_columns(index=13, columns=['a', 'b'], as_dict=True)" - ], "outputs": [ { "data": { @@ -1003,20 +1000,20 @@ "output_type": "execute_result" } ], - "execution_count": 165 + "source": [ + "# get a row and return as a dictionary\n", + "df.get_columns(index=13, columns=['a', 'b'], as_dict=True)" + ] }, { + "cell_type": "code", + "execution_count": 166, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.500533Z", "start_time": "2025-04-15T22:47:10.492898Z" } }, - "cell_type": "code", - "source": [ - "# get a row and return as a namedtuple, excluding the index\n", - "df.get_columns(index=13, columns=['a', 'b'], as_namedtuple=True, name=\"tuplename\", include_index=False)" - ], "outputs": [ { "data": { @@ -1029,7 +1026,10 @@ "output_type": "execute_result" } ], - "execution_count": 166 + "source": [ + "# get a row and return as a namedtuple, excluding the index\n", + "df.get_columns(index=13, columns=['a', 'b'], as_namedtuple=True, name=\"tuplename\", include_index=False)" + ] }, { "cell_type": "markdown", @@ -1042,16 +1042,13 @@ }, { "cell_type": "code", + "execution_count": 167, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.581191Z", "start_time": "2025-04-15T22:47:10.575155Z" } }, - "source": [ - "# get a single cell\n", - "df.get_location(2, 'a')" - ], "outputs": [ { "data": { @@ -1064,20 +1061,20 @@ "output_type": "execute_result" } ], - "execution_count": 167 + "source": [ + "# get a single cell\n", + "df.get_location(2, 'a')" + ] }, { "cell_type": "code", + "execution_count": 168, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.675921Z", "start_time": "2025-04-15T22:47:10.670595Z" } }, - "source": [ - "# get an entire row when the columns is None\n", - "print(df.get_location(2))" - ], "outputs": [ { "name": "stdout", @@ -1089,17 +1086,20 @@ ] } ], - "execution_count": 168 + "source": [ + "# get an entire row when the columns is None\n", + "print(df.get_location(2))" + ] }, { "cell_type": "code", + "execution_count": 169, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.824274Z", "start_time": "2025-04-15T22:47:10.818095Z" } }, - "source": "df.get_location(0, ['b', 'c'], as_dict=True)", "outputs": [ { "data": { @@ -1112,17 +1112,19 @@ "output_type": "execute_result" } ], - "execution_count": 169 + "source": [ + "df.get_location(0, ['b', 'c'], as_dict=True)" + ] }, { + "cell_type": "code", + "execution_count": 170, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:10.990392Z", "start_time": "2025-04-15T22:47:10.983317Z" } }, - "cell_type": "code", - "source": "df.get_location(1, as_namedtuple=True, name=\"tuplename\", index=False)", "outputs": [ { "data": { @@ -1135,19 +1137,19 @@ "output_type": "execute_result" } ], - "execution_count": 170 + "source": [ + "df.get_location(1, as_namedtuple=True, name=\"tuplename\", index=False)" + ] }, { "cell_type": "code", + "execution_count": 171, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.173318Z", "start_time": "2025-04-15T22:47:11.168316Z" } }, - "source": [ - "df.get_location(-1).print()" - ], "outputs": [ { "name": "stdout", @@ -1159,19 +1161,19 @@ ] } ], - "execution_count": 171 + "source": [ + "df.get_location(-1).print()" + ] }, { "cell_type": "code", + "execution_count": 172, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.251008Z", "start_time": "2025-04-15T22:47:11.244620Z" } }, - "source": [ - "df.get_locations(locations=[0, 2]).print()" - ], "outputs": [ { "name": "stdout", @@ -1184,20 +1186,19 @@ ] } ], - "execution_count": 172 + "source": [ + "df.get_locations(locations=[0, 2]).print()" + ] }, { "cell_type": "code", + "execution_count": 173, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.324973Z", "start_time": "2025-04-15T22:47:11.318821Z" } }, - "source": [ - "df.set_locations(locations=[0, 2], column='a', values=-9)\n", - "df.print()" - ], "outputs": [ { "name": "stdout", @@ -1215,7 +1216,10 @@ ] } ], - "execution_count": 173 + "source": [ + "df.set_locations(locations=[0, 2], column='a', values=-9)\n", + "df.print()" + ] }, { "cell_type": "markdown", @@ -1227,15 +1231,13 @@ }, { "cell_type": "code", + "execution_count": 174, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.465349Z", "start_time": "2025-04-15T22:47:11.459938Z" } }, - "source": [ - "df.head(2).print()" - ], "outputs": [ { "name": "stdout", @@ -1248,19 +1250,19 @@ ] } ], - "execution_count": 174 + "source": [ + "df.head(2).print()" + ] }, { "cell_type": "code", + "execution_count": 175, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.569526Z", "start_time": "2025-04-15T22:47:11.564092Z" } }, - "source": [ - "df.tail(2).print()" - ], "outputs": [ { "name": "stdout", @@ -1273,7 +1275,9 @@ ] } ], - "execution_count": 175 + "source": [ + "df.tail(2).print()" + ] }, { "cell_type": "markdown", @@ -1285,16 +1289,13 @@ }, { "cell_type": "code", + "execution_count": 176, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.640002Z", "start_time": "2025-04-15T22:47:11.634285Z" } }, - "source": [ - "df.delete_rows([10, 13])\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1310,20 +1311,20 @@ ] } ], - "execution_count": 176 + "source": [ + "df.delete_rows([10, 13])\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 177, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.677591Z", "start_time": "2025-04-15T22:47:11.671222Z" } }, - "source": [ - "df.delete_columns('b')\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1339,7 +1340,10 @@ ] } ], - "execution_count": 177 + "source": [ + "df.delete_columns('b')\n", + "print(df)" + ] }, { "cell_type": "markdown", @@ -1351,16 +1355,13 @@ }, { "cell_type": "code", + "execution_count": 178, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.729564Z", "start_time": "2025-04-15T22:47:11.721116Z" } }, - "source": [ - "# return a dict\n", - "df.to_dict()" - ], "outputs": [ { "data": { @@ -1376,20 +1377,20 @@ "output_type": "execute_result" } ], - "execution_count": 178 + "source": [ + "# return a dict\n", + "df.to_dict()" + ] }, { "cell_type": "code", + "execution_count": 179, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.794083Z", "start_time": "2025-04-15T22:47:11.787165Z" } }, - "source": [ - "# exclude the index\n", - "df.to_dict(index=False)" - ], "outputs": [ { "data": { @@ -1404,20 +1405,20 @@ "output_type": "execute_result" } ], - "execution_count": 179 + "source": [ + "# exclude the index\n", + "df.to_dict(index=False)" + ] }, { "cell_type": "code", + "execution_count": 180, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.847297Z", "start_time": "2025-04-15T22:47:11.840815Z" } }, - "source": [ - "# return an OrderedDict()\n", - "df.to_dict(ordered=True)" - ], "outputs": [ { "data": { @@ -1433,20 +1434,20 @@ "output_type": "execute_result" } ], - "execution_count": 180 + "source": [ + "# return an OrderedDict()\n", + "df.to_dict(ordered=True)" + ] }, { "cell_type": "code", + "execution_count": 181, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.903623Z", "start_time": "2025-04-15T22:47:11.896528Z" } }, - "source": [ - "# return a list of just one column\n", - "df['c'].to_list()" - ], "outputs": [ { "data": { @@ -1459,21 +1460,20 @@ "output_type": "execute_result" } ], - "execution_count": 181 + "source": [ + "# return a list of just one column\n", + "df['c'].to_list()" + ] }, { "cell_type": "code", + "execution_count": 182, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.946982Z", "start_time": "2025-04-15T22:47:11.942611Z" } }, - "source": [ - "# convert to JSON\n", - "string = df.to_json()\n", - "print(string)" - ], "outputs": [ { "name": "stdout", @@ -1483,21 +1483,21 @@ ] } ], - "execution_count": 182 + "source": [ + "# convert to JSON\n", + "string = df.to_json()\n", + "print(string)" + ] }, { "cell_type": "code", + "execution_count": 183, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:11.986301Z", "start_time": "2025-04-15T22:47:11.981793Z" } }, - "source": [ - "# construct DataFrame from JSON\n", - "df_from_json = rc.DataFrame.from_json(string)\n", - "print(df_from_json)" - ], "outputs": [ { "name": "stdout", @@ -1513,7 +1513,11 @@ ] } ], - "execution_count": 183 + "source": [ + "# construct DataFrame from JSON\n", + "df_from_json = rc.DataFrame.from_json(string)\n", + "print(df_from_json)" + ] }, { "cell_type": "markdown", @@ -1525,16 +1529,13 @@ }, { "cell_type": "code", + "execution_count": 184, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.009287Z", "start_time": "2025-04-15T22:47:12.004191Z" } }, - "source": [ - "df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1549,21 +1550,20 @@ ] } ], - "execution_count": 184 + "source": [ + "df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 185, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.080437Z", "start_time": "2025-04-15T22:47:12.073689Z" } }, - "source": [ - "# sort by index. Sorts are inplace\n", - "df.sort_index()\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1578,21 +1578,21 @@ ] } ], - "execution_count": 185 + "source": [ + "# sort by index. Sorts are inplace\n", + "df.sort_index()\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 186, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.162889Z", "start_time": "2025-04-15T22:47:12.156099Z" } }, - "source": [ - "# sort by column\n", - "df.sort_columns('b')\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1607,21 +1607,21 @@ ] } ], - "execution_count": 186 + "source": [ + "# sort by column\n", + "df.sort_columns('b')\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 187, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.222185Z", "start_time": "2025-04-15T22:47:12.216668Z" } }, - "source": [ - "# sort by column in reverse order\n", - "df.sort_columns('b', reverse=True)\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1636,21 +1636,25 @@ ] } ], - "execution_count": 187 + "source": [ + "# sort by column in reverse order\n", + "df.sort_columns('b', reverse=True)\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 188, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.270342Z", "start_time": "2025-04-15T22:47:12.266284Z" } }, + "outputs": [], "source": [ "# sorting with a key function is avaialble, see tests for examples" - ], - "outputs": [], - "execution_count": 188 + ] }, { "cell_type": "markdown", @@ -1662,16 +1666,13 @@ }, { "cell_type": "code", + "execution_count": 189, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.303923Z", "start_time": "2025-04-15T22:47:12.298653Z" } }, - "source": [ - "df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])\n", - "df1.print()" - ], "outputs": [ { "name": "stdout", @@ -1684,20 +1685,20 @@ ] } ], - "execution_count": 189 + "source": [ + "df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])\n", + "df1.print()" + ] }, { "cell_type": "code", + "execution_count": 190, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.340612Z", "start_time": "2025-04-15T22:47:12.332333Z" } }, - "source": [ - "df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])\n", - "print(df2)" - ], "outputs": [ { "name": "stdout", @@ -1710,20 +1711,20 @@ ] } ], - "execution_count": 190 + "source": [ + "df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])\n", + "print(df2)" + ] }, { "cell_type": "code", + "execution_count": 191, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.374375Z", "start_time": "2025-04-15T22:47:12.366020Z" } }, - "source": [ - "df1.append(df2)\n", - "print(df1)" - ], "outputs": [ { "name": "stdout", @@ -1738,7 +1739,10 @@ ] } ], - "execution_count": 191 + "source": [ + "df1.append(df2)\n", + "print(df1)" + ] }, { "cell_type": "markdown", @@ -1750,30 +1754,27 @@ }, { "cell_type": "code", + "execution_count": 192, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.421557Z", "start_time": "2025-04-15T22:47:12.416340Z" } }, + "outputs": [], "source": [ "df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})" - ], - "outputs": [], - "execution_count": 192 + ] }, { "cell_type": "code", + "execution_count": 193, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.480489Z", "start_time": "2025-04-15T22:47:12.474321Z" } }, - "source": [ - "# test for equality\n", - "df.equality('a', value=3)" - ], "outputs": [ { "data": { @@ -1786,20 +1787,20 @@ "output_type": "execute_result" } ], - "execution_count": 193 + "source": [ + "# test for equality\n", + "df.equality('a', value=3)" + ] }, { "cell_type": "code", + "execution_count": 194, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.502979Z", "start_time": "2025-04-15T22:47:12.497639Z" } }, - "source": [ - "# all math methods can operate on a subset of the index\n", - "df.equality('b', indexes=[1, 2], value=2)" - ], "outputs": [ { "data": { @@ -1812,20 +1813,20 @@ "output_type": "execute_result" } ], - "execution_count": 194 + "source": [ + "# all math methods can operate on a subset of the index\n", + "df.equality('b', indexes=[1, 2], value=2)" + ] }, { "cell_type": "code", + "execution_count": 195, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.552408Z", "start_time": "2025-04-15T22:47:12.546810Z" } }, - "source": [ - "# add two columns\n", - "df.add('a', 'b')" - ], "outputs": [ { "data": { @@ -1838,20 +1839,20 @@ "output_type": "execute_result" } ], - "execution_count": 195 + "source": [ + "# add two columns\n", + "df.add('a', 'b')" + ] }, { "cell_type": "code", + "execution_count": 196, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.596610Z", "start_time": "2025-04-15T22:47:12.591328Z" } }, - "source": [ - "# subtract\n", - "df.subtract('b', 'a')" - ], "outputs": [ { "data": { @@ -1864,20 +1865,20 @@ "output_type": "execute_result" } ], - "execution_count": 196 + "source": [ + "# subtract\n", + "df.subtract('b', 'a')" + ] }, { "cell_type": "code", + "execution_count": 197, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.632380Z", "start_time": "2025-04-15T22:47:12.626779Z" } }, - "source": [ - "# multiply\n", - "df.multiply('a', 'b', [0, 2])" - ], "outputs": [ { "data": { @@ -1890,20 +1891,20 @@ "output_type": "execute_result" } ], - "execution_count": 197 + "source": [ + "# multiply\n", + "df.multiply('a', 'b', [0, 2])" + ] }, { "cell_type": "code", + "execution_count": 198, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.673467Z", "start_time": "2025-04-15T22:47:12.668116Z" } }, - "source": [ - "# divide\n", - "df.divide('b', 'a')" - ], "outputs": [ { "data": { @@ -1916,7 +1917,10 @@ "output_type": "execute_result" } ], - "execution_count": 198 + "source": [ + "# divide\n", + "df.divide('b', 'a')" + ] }, { "cell_type": "markdown", @@ -1930,17 +1934,13 @@ }, { "cell_type": "code", + "execution_count": 199, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.743156Z", "start_time": "2025-04-15T22:47:12.737422Z" } }, - "source": [ - "tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]\n", - "df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -1957,7 +1957,11 @@ ] } ], - "execution_count": 199 + "source": [ + "tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]\n", + "df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)\n", + "print(df)" + ] }, { "cell_type": "markdown", @@ -1968,16 +1972,13 @@ }, { "cell_type": "code", + "execution_count": 200, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.813260Z", "start_time": "2025-04-15T22:47:12.807006Z" } }, - "source": [ - "compare = ('a', None, None)\n", - "df.select_index(compare)" - ], "outputs": [ { "data": { @@ -1990,20 +1991,20 @@ "output_type": "execute_result" } ], - "execution_count": 200 + "source": [ + "compare = ('a', None, None)\n", + "df.select_index(compare)" + ] }, { "cell_type": "code", + "execution_count": 201, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.850419Z", "start_time": "2025-04-15T22:47:12.844931Z" } }, - "source": [ - "compare = ('a', None, 3)\n", - "df.select_index(compare, 'boolean')" - ], "outputs": [ { "data": { @@ -2016,20 +2017,20 @@ "output_type": "execute_result" } ], - "execution_count": 201 + "source": [ + "compare = ('a', None, 3)\n", + "df.select_index(compare, 'boolean')" + ] }, { "cell_type": "code", + "execution_count": 202, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.893226Z", "start_time": "2025-04-15T22:47:12.887477Z" } }, - "source": [ - "compare = (None, 2, None)\n", - "df.select_index(compare, 'value')" - ], "outputs": [ { "data": { @@ -2042,20 +2043,20 @@ "output_type": "execute_result" } ], - "execution_count": 202 + "source": [ + "compare = (None, 2, None)\n", + "df.select_index(compare, 'value')" + ] }, { "cell_type": "code", + "execution_count": 203, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.931270Z", "start_time": "2025-04-15T22:47:12.925555Z" } }, - "source": [ - "compare = (None, None, 3)\n", - "df.select_index(compare, 'value')" - ], "outputs": [ { "data": { @@ -2068,20 +2069,20 @@ "output_type": "execute_result" } ], - "execution_count": 203 + "source": [ + "compare = (None, None, 3)\n", + "df.select_index(compare, 'value')" + ] }, { "cell_type": "code", + "execution_count": 204, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.965439Z", "start_time": "2025-04-15T22:47:12.960694Z" } }, - "source": [ - "compare = (None, None, None)\n", - "df.select_index(compare)" - ], "outputs": [ { "data": { @@ -2094,7 +2095,10 @@ "output_type": "execute_result" } ], - "execution_count": 204 + "source": [ + "compare = (None, None, None)\n", + "df.select_index(compare)" + ] }, { "cell_type": "markdown", @@ -2106,16 +2110,13 @@ }, { "cell_type": "code", + "execution_count": 205, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:12.986721Z", "start_time": "2025-04-15T22:47:12.982181Z" } }, - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2129,20 +2130,20 @@ ] } ], - "execution_count": 205 + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 206, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.031902Z", "start_time": "2025-04-15T22:47:13.026515Z" } }, - "source": [ - "df.reset_index()\n", - "df" - ], "outputs": [ { "data": { @@ -2161,20 +2162,20 @@ "output_type": "execute_result" } ], - "execution_count": 206 + "source": [ + "df.reset_index()\n", + "df" + ] }, { "cell_type": "code", + "execution_count": 207, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.058276Z", "start_time": "2025-04-15T22:47:13.053567Z" } }, - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2188,20 +2189,20 @@ ] } ], - "execution_count": 207 + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 208, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.107204Z", "start_time": "2025-04-15T22:47:13.102194Z" } }, - "source": [ - "df.reset_index()\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2215,21 +2216,20 @@ ] } ], - "execution_count": 208 + "source": [ + "df.reset_index()\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 209, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.136737Z", "start_time": "2025-04-15T22:47:13.131786Z" } }, - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],\n", - " index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2243,20 +2243,21 @@ ] } ], - "execution_count": 209 + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],\n", + " index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 210, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.174199Z", "start_time": "2025-04-15T22:47:13.169724Z" } }, - "source": [ - "df.reset_index()\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2270,20 +2271,20 @@ ] } ], - "execution_count": 210 + "source": [ + "df.reset_index()\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 211, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.212948Z", "start_time": "2025-04-15T22:47:13.207218Z" } }, - "source": [ - "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2297,20 +2298,20 @@ ] } ], - "execution_count": 211 + "source": [ + "df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 212, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.251260Z", "start_time": "2025-04-15T22:47:13.246941Z" } }, - "source": [ - "df.reset_index(drop=True)\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2324,7 +2325,10 @@ ] } ], - "execution_count": 212 + "source": [ + "df.reset_index(drop=True)\n", + "print(df)" + ] }, { "cell_type": "markdown", @@ -2336,30 +2340,27 @@ }, { "cell_type": "code", + "execution_count": 213, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.284325Z", "start_time": "2025-04-15T22:47:13.280388Z" } }, + "outputs": [], "source": [ "df = rc.DataFrame({'a': [1, 2, 'c'], 'b': [5, 6, 'd']}, index=[1, 2, 3])" - ], - "outputs": [], - "execution_count": 213 + ] }, { "cell_type": "code", + "execution_count": 214, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.311413Z", "start_time": "2025-04-15T22:47:13.305726Z" } }, - "source": [ - "for row in df.iterrows():\n", - " print(row)" - ], "outputs": [ { "name": "stdout", @@ -2371,20 +2372,20 @@ ] } ], - "execution_count": 214 + "source": [ + "for row in df.iterrows():\n", + " print(row)" + ] }, { "cell_type": "code", + "execution_count": 215, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.376046Z", "start_time": "2025-04-15T22:47:13.371280Z" } }, - "source": [ - "for row in df.itertuples():\n", - " print(row)" - ], "outputs": [ { "name": "stdout", @@ -2396,7 +2397,10 @@ ] } ], - "execution_count": 215 + "source": [ + "for row in df.itertuples():\n", + " print(row)" + ] }, { "cell_type": "markdown", @@ -2409,17 +2413,17 @@ }, { "cell_type": "code", + "execution_count": 216, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.481935Z", "start_time": "2025-04-15T22:47:13.477795Z" } }, + "outputs": [], "source": [ "df = rc.DataFrame({'a': [3, 5, 4], 'b': [6, 8, 7]}, index=[12, 15, 14], sort=True)" - ], - "outputs": [], - "execution_count": 216 + ] }, { "cell_type": "markdown", @@ -2430,15 +2434,13 @@ }, { "cell_type": "code", + "execution_count": 217, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.547862Z", "start_time": "2025-04-15T22:47:13.542996Z" } }, - "source": [ - "df.print()" - ], "outputs": [ { "name": "stdout", @@ -2452,20 +2454,19 @@ ] } ], - "execution_count": 217 + "source": [ + "df.print()" + ] }, { "cell_type": "code", + "execution_count": 218, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.607717Z", "start_time": "2025-04-15T22:47:13.602745Z" } }, - "source": [ - "df[16, 'b'] = 9\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2480,20 +2481,20 @@ ] } ], - "execution_count": 218 + "source": [ + "df[16, 'b'] = 9\n", + "print(df)" + ] }, { "cell_type": "code", + "execution_count": 219, "metadata": { "ExecuteTime": { "end_time": "2025-04-15T22:47:13.642524Z", "start_time": "2025-04-15T22:47:13.637770Z" } }, - "source": [ - "df.set(indexes=13, values={'a': 3.5, 'b': 6.5})\n", - "print(df)" - ], "outputs": [ { "name": "stdout", @@ -2509,14 +2510,17 @@ ] } ], - "execution_count": 219 + "source": [ + "df.set(indexes=13, values={'a': 3.5, 'b': 6.5})\n", + "print(df)" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:raccoon]", "language": "python", - "name": "python3" + "name": "conda-env-raccoon-py" }, "language_info": { "codemirror_mode": { @@ -2528,7 +2532,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.13.2" }, "pycharm": { "stem_cell": {