Skip to content

Commit 7bd9f4f

Browse files
committed
refactor blist out of Series
1 parent c320280 commit 7bd9f4f

File tree

12 files changed

+185
-155
lines changed

12 files changed

+185
-155
lines changed

raccoon/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def index_name(self, name):
189189
self._index_name = name
190190

191191
@property
192-
def dropin_func(self):
192+
def dropin(self):
193193
return self._dropin
194194

195195
@property

raccoon/series.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from itertools import compress
88

99
from tabulate import tabulate
10-
from blist import blist
1110

1211
from raccoon.sort_utils import sorted_exists, sorted_index, sorted_list_indexes
1312

@@ -18,7 +17,7 @@ class SeriesBase(ABC):
1817
methods in Series are views to the underlying data and not copies.
1918
"""
2019
# Define slots to make object faster
21-
__slots__ = ['_data', '_data_name', '_index', '_index_name', '_sort']
20+
__slots__ = ['_data', '_data_name', '_index', '_index_name', '_sort', '_dropin']
2221

2322
def __init__(self):
2423
"""
@@ -29,6 +28,7 @@ def __init__(self):
2928
self._data = None
3029
self._data_name = None
3130
self._sort = None
31+
self._dropin = None
3232

3333
def __len__(self):
3434
return len(self._index)
@@ -90,6 +90,9 @@ def index_name(self, name):
9090
def sort(self):
9191
return
9292

93+
def _check_list(self, x):
94+
return type(x) == (self._dropin if self._dropin else list)
95+
9396
def get(self, indexes, as_list=False):
9497
"""
9598
Given indexes will return a sub-set of the Series. This method will direct to the specific methods
@@ -100,7 +103,7 @@ def get(self, indexes, as_list=False):
100103
:param as_list: if True then return the values as a list, if False return a Series.
101104
:return: either Series, list, or single value. The return is a shallow copy
102105
"""
103-
if isinstance(indexes, (list, blist)):
106+
if self._check_list(indexes):
104107
return self.get_rows(indexes, as_list)
105108
else:
106109
return self.get_cell(indexes)
@@ -211,8 +214,8 @@ def _slice_index(self, slicer):
211214
return pre_list
212215

213216
def _validate_index(self, indexes):
214-
if not(isinstance(indexes, (list, blist)) or indexes is None):
215-
raise TypeError('indexes must be list, blist or None')
217+
if not(self._check_list(indexes) or type(indexes) == list or indexes is None):
218+
raise TypeError('indexes must be list, %s or None' % self._dropin)
216219
if len(indexes) != len(set(indexes)):
217220
raise ValueError('index contains duplicates')
218221
if self._data:
@@ -323,19 +326,21 @@ def equality(self, indexes=None, value=None):
323326
class Series(SeriesBase):
324327
"""
325328
Series class. The raccoon Series implements a simplified version of the pandas Series with the key
326-
objective difference that the raccoon Series is meant for use cases where the size of the Series is
329+
objective difference that the raccoon Series is meant for use cases where the size of the Series rows is
327330
expanding frequently. This is known to be slow with Pandas due to the use of numpy as the underlying data structure.
328-
The Series can be designated as sort, in which case the rows will be sort by index on construction,
329-
and then any addition of a new row will insert it into the Series so that the index remains sort.
331+
Raccoon uses native lists, or any other provided drop-in replacement for lists, as the underlying data structure
332+
which is quick to expand and grow the size. The Series can be designated as sort, in which case the rows will be
333+
sort by index on construction, and then any addition of a new row will insert it into the Series so that the
334+
index remains sort.
330335
"""
331-
def __init__(self, data=None, index=None, data_name='value', index_name='index', use_blist=False, sort=None):
336+
def __init__(self, data=None, index=None, data_name='value', index_name='index', sort=None, dropin=None):
332337
"""
333338
:param data: (optional) list of values.
334339
:param index: (optional) list of index values. If None then the index will be integers starting with zero
335340
:param data_name: (optional) name of the data column, or will default to 'value'
336341
:param index_name: (optional) name for the index. Default is "index"
337-
:param use_blist: if True then use blist() as the underlying data structure, if False use standard list()
338342
:param sort: if True then Series will keep the index sort. If True all index values must be of same type
343+
:param dropin: if supplied the drop-in replacement for list that will be used
339344
"""
340345
super(SeriesBase, self).__init__()
341346

@@ -344,19 +349,19 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index',
344349
self._index_name = index_name
345350
self._data = None
346351
self._data_name = data_name
347-
self._blist = use_blist
352+
self._dropin = dropin
348353

349354
# setup data list
350355
if data is None:
351-
self._data = blist() if self._blist else list()
356+
self._data = dropin() if dropin else list()
352357
if index:
353358
# pad out to the number of rows
354359
self._pad_data(len(index))
355360
self.index = index
356361
else:
357362
self.index = list()
358-
elif isinstance(data, (list, blist)):
359-
self._data = blist([x for x in data]) if self._blist else [x for x in data]
363+
elif self._check_list(data) or type(data) == list:
364+
self._data = dropin([x for x in data]) if dropin else [x for x in data]
360365
# setup index
361366
if index:
362367
self.index = index
@@ -395,11 +400,11 @@ def index(self):
395400
@index.setter
396401
def index(self, index_list):
397402
self._validate_index(index_list)
398-
self._index = blist(index_list) if self._blist else list(index_list)
403+
self._index = self._dropin(index_list) if self._dropin else list(index_list)
399404

400405
@property
401-
def blist(self):
402-
return self._blist
406+
def dropin(self):
407+
return self._dropin
403408

404409
@property
405410
def sort(self):
@@ -419,9 +424,9 @@ def sort_index(self):
419424
"""
420425
sort = sorted_list_indexes(self._index)
421426
# sort index
422-
self._index = blist([self._index[x] for x in sort]) if self._blist else [self._index[x] for x in sort]
427+
self._index = self._dropin([self._index[x] for x in sort]) if self._dropin else [self._index[x] for x in sort]
423428
# sort data
424-
self._data = blist([self._data[x] for x in sort]) if self._blist else [self._data[x] for x in sort]
429+
self._data = self._dropin([self._data[x] for x in sort]) if self._dropin else [self._data[x] for x in sort]
425430

426431
def set(self, indexes, values=None):
427432
"""
@@ -433,7 +438,7 @@ def set(self, indexes, values=None):
433438
:param values: value or list of values to set. If a list then must be the same length as the indexes parameter.
434439
:return: nothing
435440
"""
436-
if isinstance(indexes, (list, blist)):
441+
if self._check_list(indexes):
437442
self.set_rows(indexes, values)
438443
else:
439444
self.set_cell(indexes, values)
@@ -518,7 +523,7 @@ def set_rows(self, index, values=None):
518523
:return: nothing
519524
"""
520525
if all([isinstance(i, bool) for i in index]): # boolean list
521-
if not isinstance(values, (list, blist)): # single value provided, not a list, so turn values into list
526+
if not self._check_list(values): # single value provided, not a list, so turn values into list
522527
values = [values for x in index if x]
523528
if len(index) != len(self._index):
524529
raise ValueError('boolean index list must be same size of existing index')
@@ -528,7 +533,7 @@ def set_rows(self, index, values=None):
528533
for x, i in enumerate(indexes):
529534
self._data[i] = values[x]
530535
else: # list of index
531-
if not isinstance(values, (list, blist)): # single value provided, not a list, so turn values into list
536+
if not self._check_list(values): # single value provided, not a list, so turn values into list
532537
values = [values for _ in index]
533538
if len(values) != len(index):
534539
raise ValueError('length of values and index must be the same.')
@@ -652,7 +657,7 @@ def delete(self, indexes):
652657
:param indexes: either a list of values or list of booleans for the rows to delete
653658
:return: nothing
654659
"""
655-
indexes = [indexes] if not isinstance(indexes, (list, blist)) else indexes
660+
indexes = [indexes] if not self._check_list(indexes) else indexes
656661
if all([isinstance(i, bool) for i in indexes]): # boolean list
657662
if len(indexes) != len(self._index):
658663
raise ValueError('boolean indexes list must be same size of existing indexes')
@@ -681,7 +686,8 @@ class ViewSeries(SeriesBase):
681686
"""
682687
ViewSeries class. The raccoon ViewSeries implements a view only version of the Series object with the key
683688
objective difference that the raccoon ViewSeries is meant for view only use cases where the underlying index and
684-
data are modified elsewhere or static. Use this for a view into a single column of a DataFrame.
689+
data are modified elsewhere or static. Use this for a view into a single column of a DataFrame. There is no type
690+
checking of the data, so it is assumed the data type is list-style.
685691
"""
686692
def __init__(self, data=None, index=None, data_name='value', index_name='index', sort=False, offset=0):
687693
"""
@@ -694,13 +700,14 @@ def __init__(self, data=None, index=None, data_name='value', index_name='index',
694700
"""
695701
super(SeriesBase, self).__init__()
696702

703+
# dropin is not a parameter, set it to the value of data
704+
self._dropin = data.__class__
705+
697706
# check inputs
698707
if index is None:
699708
raise ValueError('Index cannot be None.')
700709
if data is None:
701710
raise ValueError('Data cannot be None.')
702-
if not isinstance(data, (list, blist)):
703-
raise TypeError('Not valid data type.')
704711

705712
# standard variable setup
706713
self._data = data # direct view, no copy

raccoon/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def assert_frame_equal(left, right, data_function=None, data_args=None):
2424
assert left.columns == right.columns
2525
assert left.index_name == right.index_name
2626
assert left.sort == right.sort
27-
assert left.dropin_func == right.dropin_func
27+
assert left.dropin == right.dropin
2828

2929

3030
def assert_series_equal(left, right, data_function=None, data_args=None):
@@ -50,4 +50,4 @@ def assert_series_equal(left, right, data_function=None, data_args=None):
5050
if isinstance(left, rc.ViewSeries):
5151
assert left.offset == right.offset
5252
if isinstance(left, rc.Series):
53-
assert left.blist == right.blist
53+
assert left.dropin == right.dropin
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import pytest
2+
import raccoon as rc
3+
from raccoon.utils import assert_series_equal
4+
5+
try:
6+
from blist import blist
7+
except ImportError:
8+
pytest.skip("blist is not installed, skipping tests.", allow_module_level=True)
9+
10+
11+
def test_assert_series_equal():
12+
srs1 = rc.Series([1, 2, 3], index=[1, 2, 3])
13+
srs2 = rc.Series([1, 2, 3], index=[1, 2, 3], dropin=blist)
14+
with pytest.raises(AssertionError):
15+
assert_series_equal(srs1, srs2)
16+
17+
18+
def test_default_empty_init():
19+
actual = rc.Series(index=[1, 2, 3], data_name='points', dropin=blist)
20+
assert actual.data == [None, None, None]
21+
assert actual.data_name == 'points'
22+
assert actual.index == [1, 2, 3]
23+
assert actual.index_name == 'index'
24+
assert actual.sort is False
25+
assert isinstance(actual.index, blist)
26+
assert isinstance(actual.data, blist)
27+
28+
29+
def test_use_blist():
30+
def check_blist():
31+
assert isinstance(srs.index, blist)
32+
assert isinstance(srs.data, blist)
33+
34+
srs = rc.Series(dropin=blist)
35+
assert isinstance(srs, rc.Series)
36+
assert srs.data == []
37+
assert srs.index == []
38+
assert srs.sort is True
39+
check_blist()
40+
41+
# add a new row and col
42+
srs.set_cell(1, 1)
43+
check_blist()
44+
45+
# add a new row
46+
srs.set_cell(2, 2)
47+
check_blist()
48+
49+
# add a new col
50+
srs.set_cell(1, 3)
51+
check_blist()
52+
53+
# add a complete new row
54+
srs.set_rows([3], [5])
55+
check_blist()
56+
57+
58+
def test_index_blist():
59+
actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], dropin=blist)
60+
result = actual.index
61+
assert result == ['a', 'b', 'c']
62+
assert isinstance(result, blist)
63+
64+
# test that a view is returned
65+
result.append('bad')
66+
assert actual.index == ['a', 'b', 'c', 'bad']
67+
68+
actual.index = [9, 10, 11]
69+
assert actual.index == [9, 10, 11]
70+
assert isinstance(result, blist)
71+
72+
# index too long
73+
with pytest.raises(ValueError):
74+
actual.index = [1, 3, 4, 5, 6]
75+
76+
77+
def test_data_blist():
78+
actual = rc.Series([4, 5, 6], index=['a', 'b', 'c'], dropin=blist)
79+
assert actual.data == [4, 5, 6]
80+
assert isinstance(actual.data, blist)
81+
82+
83+
def test_print():
84+
srs = rc.Series([1.0, 2.55, 3.1], data_name='boo', index=['row1', 'row2', 'row3'], dropin=blist)
85+
86+
# __repr__ produces a simple representation
87+
expected = "object id: %s\ndata:\nblist([1.0, 2.55, 3.1])\nindex:\nblist(['row1', 'row2', 'row3'])\n" % id(srs)
88+
actual = srs.__repr__()
89+
assert actual == expected
90+
91+
# __str__ produces the standard table
92+
expected = 'index boo\n------- -----\nrow1 1\nrow2 2.55\nrow3 3.1'
93+
actual = srs.__str__()
94+
assert actual == expected
95+
96+
# print() method will pass along any argument for the tabulate.tabulate function
97+
srs.print()
98+
99+
100+
def test_sort_index():
101+
srs = rc.Series([4, 5, 6], index=[10, 8, 9], sort=False, dropin=blist)
102+
srs.sort_index()
103+
assert isinstance(srs.index, blist)
104+
assert_series_equal(srs, rc.Series([5, 6, 4], index=[8, 9, 10], sort=False, dropin=blist))
105+
106+
107+
def test_select_index():
108+
# simple index, not sort, blist
109+
srs = rc.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'], dropin=blist)
110+
actual = srs.select_index('c', 'value')
111+
assert actual == ['c']
112+
113+
114+
def test_from_dataframe():
115+
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 9], dropin=blist)
116+
actual = rc.ViewSeries.from_dataframe(df, 'b')
117+
expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9])
118+
assert_series_equal(actual, expected)
119+
120+
121+
def test_from_series():
122+
srs = rc.Series(data=[4, 5, 6], data_name='b', index=['a', 'b', 9], dropin=blist)
123+
actual = rc.ViewSeries.from_series(srs)
124+
expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9])
125+
assert_series_equal(actual, expected)

tests/test_series/test_delete.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def test_delete():
3838

3939

4040
def test_delete_sort():
41-
srs = rc.Series([4, 5, 6], index=['a', 'b', 'c'], sort=True, use_blist=False)
41+
srs = rc.Series([4, 5, 6], index=['a', 'b', 'c'], sort=True)
4242

4343
srs.delete(['a', 'c'])
44-
assert_series_equal(srs, rc.Series([5], index=['b'], sort=True, use_blist=False))
44+
assert_series_equal(srs, rc.Series([5], index=['b'], sort=True))
4545

4646
srs.delete('b')
47-
assert_series_equal(srs, rc.Series(sort=True, use_blist=False))
47+
assert_series_equal(srs, rc.Series(sort=True))
4848

4949
# insert back in data
5050
srs[2] = 9

0 commit comments

Comments
 (0)