77from itertools import compress
88
99from tabulate import tabulate
10- from blist import blist
1110
1211from raccoon .sort_utils import sorted_exists , sorted_index , sorted_list_indexes
1312
@@ -18,7 +17,7 @@ class SeriesBase(ABC):
1817 methods in Series are views to the underlying data and not copies.
1918 """
2019 # Define slots to make object faster
21- __slots__ = ['_data' , '_data_name' , '_index' , '_index_name' , '_sort' ]
20+ __slots__ = ['_data' , '_data_name' , '_index' , '_index_name' , '_sort' , '_dropin' ]
2221
2322 def __init__ (self ):
2423 """
@@ -29,6 +28,7 @@ def __init__(self):
2928 self ._data = None
3029 self ._data_name = None
3130 self ._sort = None
31+ self ._dropin = None
3232
3333 def __len__ (self ):
3434 return len (self ._index )
@@ -90,6 +90,9 @@ def index_name(self, name):
9090 def sort (self ):
9191 return
9292
93+ def _check_list (self , x ):
94+ return type (x ) == (self ._dropin if self ._dropin else list )
95+
9396 def get (self , indexes , as_list = False ):
9497 """
9598 Given indexes will return a sub-set of the Series. This method will direct to the specific methods
@@ -100,7 +103,7 @@ def get(self, indexes, as_list=False):
100103 :param as_list: if True then return the values as a list, if False return a Series.
101104 :return: either Series, list, or single value. The return is a shallow copy
102105 """
103- if isinstance (indexes , ( list , blist ) ):
106+ if self . _check_list (indexes ):
104107 return self .get_rows (indexes , as_list )
105108 else :
106109 return self .get_cell (indexes )
@@ -211,8 +214,8 @@ def _slice_index(self, slicer):
211214 return pre_list
212215
213216 def _validate_index (self , indexes ):
214- if not (isinstance (indexes , ( list , blist )) or indexes is None ):
215- raise TypeError ('indexes must be list, blist or None' )
217+ if not (self . _check_list (indexes ) or type ( indexes ) == list or indexes is None ):
218+ raise TypeError ('indexes must be list, %s or None' % self . _dropin )
216219 if len (indexes ) != len (set (indexes )):
217220 raise ValueError ('index contains duplicates' )
218221 if self ._data :
@@ -323,19 +326,21 @@ def equality(self, indexes=None, value=None):
323326class Series (SeriesBase ):
324327 """
325328 Series class. The raccoon Series implements a simplified version of the pandas Series with the key
326- objective difference that the raccoon Series is meant for use cases where the size of the Series is
329+ objective difference that the raccoon Series is meant for use cases where the size of the Series rows is
327330 expanding frequently. This is known to be slow with Pandas due to the use of numpy as the underlying data structure.
328- The Series can be designated as sort, in which case the rows will be sort by index on construction,
329- and then any addition of a new row will insert it into the Series so that the index remains sort.
331+ Raccoon uses native lists, or any other provided drop-in replacement for lists, as the underlying data structure
332+ which is quick to expand and grow the size. The Series can be designated as sort, in which case the rows will be
333+ sort by index on construction, and then any addition of a new row will insert it into the Series so that the
334+ index remains sort.
330335 """
331- def __init__ (self , data = None , index = None , data_name = 'value' , index_name = 'index' , use_blist = False , sort = None ):
336+ def __init__ (self , data = None , index = None , data_name = 'value' , index_name = 'index' , sort = None , dropin = None ):
332337 """
333338 :param data: (optional) list of values.
334339 :param index: (optional) list of index values. If None then the index will be integers starting with zero
335340 :param data_name: (optional) name of the data column, or will default to 'value'
336341 :param index_name: (optional) name for the index. Default is "index"
337- :param use_blist: if True then use blist() as the underlying data structure, if False use standard list()
338342 :param sort: if True then Series will keep the index sort. If True all index values must be of same type
343+ :param dropin: if supplied the drop-in replacement for list that will be used
339344 """
340345 super (SeriesBase , self ).__init__ ()
341346
@@ -344,19 +349,19 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index',
344349 self ._index_name = index_name
345350 self ._data = None
346351 self ._data_name = data_name
347- self ._blist = use_blist
352+ self ._dropin = dropin
348353
349354 # setup data list
350355 if data is None :
351- self ._data = blist () if self . _blist else list ()
356+ self ._data = dropin () if dropin else list ()
352357 if index :
353358 # pad out to the number of rows
354359 self ._pad_data (len (index ))
355360 self .index = index
356361 else :
357362 self .index = list ()
358- elif isinstance (data , ( list , blist )) :
359- self ._data = blist ([x for x in data ]) if self . _blist else [x for x in data ]
363+ elif self . _check_list (data ) or type ( data ) == list :
364+ self ._data = dropin ([x for x in data ]) if dropin else [x for x in data ]
360365 # setup index
361366 if index :
362367 self .index = index
@@ -395,11 +400,11 @@ def index(self):
395400 @index .setter
396401 def index (self , index_list ):
397402 self ._validate_index (index_list )
398- self ._index = blist (index_list ) if self ._blist else list (index_list )
403+ self ._index = self . _dropin (index_list ) if self ._dropin else list (index_list )
399404
400405 @property
401- def blist (self ):
402- return self ._blist
406+ def dropin (self ):
407+ return self ._dropin
403408
404409 @property
405410 def sort (self ):
@@ -419,9 +424,9 @@ def sort_index(self):
419424 """
420425 sort = sorted_list_indexes (self ._index )
421426 # sort index
422- self ._index = blist ([self ._index [x ] for x in sort ]) if self ._blist else [self ._index [x ] for x in sort ]
427+ self ._index = self . _dropin ([self ._index [x ] for x in sort ]) if self ._dropin else [self ._index [x ] for x in sort ]
423428 # sort data
424- self ._data = blist ([self ._data [x ] for x in sort ]) if self ._blist else [self ._data [x ] for x in sort ]
429+ self ._data = self . _dropin ([self ._data [x ] for x in sort ]) if self ._dropin else [self ._data [x ] for x in sort ]
425430
426431 def set (self , indexes , values = None ):
427432 """
@@ -433,7 +438,7 @@ def set(self, indexes, values=None):
433438 :param values: value or list of values to set. If a list then must be the same length as the indexes parameter.
434439 :return: nothing
435440 """
436- if isinstance (indexes , ( list , blist ) ):
441+ if self . _check_list (indexes ):
437442 self .set_rows (indexes , values )
438443 else :
439444 self .set_cell (indexes , values )
@@ -518,7 +523,7 @@ def set_rows(self, index, values=None):
518523 :return: nothing
519524 """
520525 if all ([isinstance (i , bool ) for i in index ]): # boolean list
521- if not isinstance (values , ( list , blist ) ): # single value provided, not a list, so turn values into list
526+ if not self . _check_list (values ): # single value provided, not a list, so turn values into list
522527 values = [values for x in index if x ]
523528 if len (index ) != len (self ._index ):
524529 raise ValueError ('boolean index list must be same size of existing index' )
@@ -528,7 +533,7 @@ def set_rows(self, index, values=None):
528533 for x , i in enumerate (indexes ):
529534 self ._data [i ] = values [x ]
530535 else : # list of index
531- if not isinstance (values , ( list , blist ) ): # single value provided, not a list, so turn values into list
536+ if not self . _check_list (values ): # single value provided, not a list, so turn values into list
532537 values = [values for _ in index ]
533538 if len (values ) != len (index ):
534539 raise ValueError ('length of values and index must be the same.' )
@@ -652,7 +657,7 @@ def delete(self, indexes):
652657 :param indexes: either a list of values or list of booleans for the rows to delete
653658 :return: nothing
654659 """
655- indexes = [indexes ] if not isinstance (indexes , ( list , blist ) ) else indexes
660+ indexes = [indexes ] if not self . _check_list (indexes ) else indexes
656661 if all ([isinstance (i , bool ) for i in indexes ]): # boolean list
657662 if len (indexes ) != len (self ._index ):
658663 raise ValueError ('boolean indexes list must be same size of existing indexes' )
@@ -681,7 +686,8 @@ class ViewSeries(SeriesBase):
681686 """
682687 ViewSeries class. The raccoon ViewSeries implements a view only version of the Series object with the key
683688 objective difference that the raccoon ViewSeries is meant for view only use cases where the underlying index and
684- data are modified elsewhere or static. Use this for a view into a single column of a DataFrame.
689+ data are modified elsewhere or static. Use this for a view into a single column of a DataFrame. There is no type
690+ checking of the data, so it is assumed the data type is list-style.
685691 """
686692 def __init__ (self , data = None , index = None , data_name = 'value' , index_name = 'index' , sort = False , offset = 0 ):
687693 """
@@ -694,13 +700,14 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index',
694700 """
695701 super (SeriesBase , self ).__init__ ()
696702
703+ # dropin is not a parameter, set it to the value of data
704+ self ._dropin = data .__class__
705+
697706 # check inputs
698707 if index is None :
699708 raise ValueError ('Index cannot be None.' )
700709 if data is None :
701710 raise ValueError ('Data cannot be None.' )
702- if not isinstance (data , (list , blist )):
703- raise TypeError ('Not valid data type.' )
704711
705712 # standard variable setup
706713 self ._data = data # direct view, no copy
0 commit comments