Skip to content

Commit ed308bb

Browse files
committed
NA entries and more tests
1 parent 09da7f1 commit ed308bb

File tree

8 files changed

+115
-32
lines changed

8 files changed

+115
-32
lines changed

build/lib/data_algebra/cdata_impl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def transform(
132132

133133
def compose(self, other):
134134
"""
135-
Compose transforms
135+
Experimental method to compose transforms
136136
(self.compose(other)).transform(data) == self.transform(other.transform(data))
137137
138138
:param other: another data_algebra.cdata_impl.RecordMap
@@ -183,7 +183,7 @@ def __rrshift__(self, other): # override other >> self
183183
if other is None:
184184
return self
185185
if isinstance(other, RecordMap):
186-
# data >> other >> self
186+
# (data >> other) >> self == data >> (other >> self)
187187
return self.compose(other)
188188
return self.transform(other)
189189

build/lib/data_algebra/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ def is_bad(x):
3434

3535
# for testing
3636

37+
def pandas_to_example_str(obj):
38+
if not isinstance(obj, pandas.DataFrame):
39+
raise TypeError("Expect obj to be pandas.DataFrame")
40+
pstr = 'pandas.DataFrame({'
41+
for k in obj.columns:
42+
cells = ['None' if pandas.isnull(v) else v.__repr__() for v in obj[k]]
43+
pstr = pstr + "\n " + k.__repr__() + ": [" + ', '.join(cells) + "],"
44+
pstr = pstr + "\n })"
45+
return pstr
46+
3747

3848
def equivalent_frames(
3949
a,

coverage.txt

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,56 @@
11
============================= test session starts ==============================
2-
platform linux -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
3-
rootdir: /home/john/Documents/work/data_algebra
2+
platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
3+
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: cov-2.7.1
5-
collected 24 items
5+
collected 27 items
66

7-
tests/test_R_yaml.py . [ 4%]
8-
tests/test_apply.py . [ 8%]
9-
tests/test_cdata1.py . [ 12%]
10-
tests/test_cdata_example.py .. [ 20%]
11-
tests/test_dask.py .. [ 29%]
12-
tests/test_datatable.py . [ 33%]
13-
tests/test_drop_columns.py . [ 37%]
14-
tests/test_example_data_ops.py . [ 41%]
15-
tests/test_exp.py . [ 45%]
16-
tests/test_export_neg.py . [ 50%]
17-
tests/test_free_expr.py . [ 54%]
18-
tests/test_natural_join.py . [ 58%]
7+
tests/test_R_yaml.py . [ 3%]
8+
tests/test_apply.py . [ 7%]
9+
tests/test_cdata1.py . [ 11%]
10+
tests/test_cdata_example.py .... [ 25%]
11+
tests/test_dask.py .. [ 33%]
12+
tests/test_datatable.py . [ 37%]
13+
tests/test_drop_columns.py . [ 40%]
14+
tests/test_example_data_ops.py . [ 44%]
15+
tests/test_exp.py . [ 48%]
16+
tests/test_export_neg.py . [ 51%]
17+
tests/test_free_expr.py . [ 55%]
18+
tests/test_natural_join.py . [ 59%]
1919
tests/test_neg.py . [ 62%]
2020
tests/test_null_bad.py . [ 66%]
2121
tests/test_parse.py . [ 70%]
22-
tests/test_poject.py . [ 75%]
23-
tests/test_scoring_example.py . [ 79%]
24-
tests/test_simple.py .... [ 95%]
22+
tests/test_poject.py . [ 74%]
23+
tests/test_scoring_example.py . [ 77%]
24+
tests/test_simple.py ..... [ 96%]
2525
tests/test_sqlite.py . [100%]
2626

27-
----------- coverage: platform linux, python 3.6.9-final-0 -----------
27+
---------- coverage: platform darwin, python 3.6.9-final-0 -----------
2828
Name Stmts Miss Cover
2929
-----------------------------------------------------
3030
data_algebra/PostgreSQL.py 21 4 81%
3131
data_algebra/SQLite.py 43 5 88%
3232
data_algebra/SparkSQL.py 21 21 0%
33-
data_algebra/__init__.py 36 6 83%
34-
data_algebra/cdata.py 101 20 80%
35-
data_algebra/cdata_impl.py 151 60 60%
36-
data_algebra/dask_model.py 119 102 14%
33+
data_algebra/__init__.py 36 10 72%
34+
data_algebra/cdata.py 101 19 81%
35+
data_algebra/cdata_impl.py 151 59 61%
36+
data_algebra/dask_model.py 119 23 81%
3737
data_algebra/data_model.py 41 15 63%
38-
data_algebra/data_ops.py 764 212 72%
38+
data_algebra/data_ops.py 764 176 77%
3939
data_algebra/data_pipe.py 170 38 78%
40-
data_algebra/data_types.py 39 22 44%
41-
data_algebra/datatable_model.py 131 107 18%
40+
data_algebra/data_types.py 39 19 51%
41+
data_algebra/datatable_model.py 131 81 38%
4242
data_algebra/db_model.py 353 82 77%
4343
data_algebra/diagram.py 52 52 0%
4444
data_algebra/env.py 46 7 85%
4545
data_algebra/expr.py 21 4 81%
46-
data_algebra/expr_rep.py 306 82 73%
46+
data_algebra/expr_rep.py 306 81 74%
4747
data_algebra/pandas_model.py 135 22 84%
4848
data_algebra/pending_eval.py 34 34 0%
4949
data_algebra/pipe.py 65 19 71%
50-
data_algebra/util.py 72 6 92%
50+
data_algebra/util.py 81 7 91%
5151
data_algebra/yaml.py 113 13 88%
5252
-----------------------------------------------------
53-
TOTAL 2834 933 67%
53+
TOTAL 2843 791 72%
5454

5555

56-
========================== 24 passed in 2.63 seconds ===========================
56+
========================== 27 passed in 6.69 seconds ===========================

data_algebra/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ def is_bad(x):
3434

3535
# for testing
3636

37+
def pandas_to_example_str(obj):
38+
if not isinstance(obj, pandas.DataFrame):
39+
raise TypeError("Expect obj to be pandas.DataFrame")
40+
pstr = 'pandas.DataFrame({'
41+
for k in obj.columns:
42+
cells = ['None' if pandas.isnull(v) else v.__repr__() for v in obj[k]]
43+
pstr = pstr + "\n " + k.__repr__() + ": [" + ', '.join(cells) + "],"
44+
pstr = pstr + "\n })"
45+
return pstr
46+
3747

3848
def equivalent_frames(
3949
a,
160 Bytes
Binary file not shown.

dist/data_algebra-0.1.8.tar.gz

413 Bytes
Binary file not shown.

tests/test_cdata_example.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
import io
33
import re
4+
import numpy
45

56
import pandas
67

@@ -72,6 +73,7 @@ def test_keras_example():
7273
})
7374
assert data_algebra.util.equivalent_frames(res, expect)
7475

76+
7577
def test_cdata_block():
7678
data = pandas.DataFrame({
7779
'record_id': [1, 1, 1, 2, 2, 2],
@@ -115,3 +117,52 @@ def test_cdata_block():
115117
back = inv.transform(res)
116118

117119
assert data_algebra.util.equivalent_frames(data, back)
120+
121+
122+
def test_cdata_missing():
123+
data = pandas.DataFrame({
124+
'record_id': [1, 1, 1, 2, 2, 2],
125+
'row': ['row1', 'row2', 'row3', 'row1', 'row2', 'row3'],
126+
'col1': [1, 4, 7, 11, 14, 17],
127+
'col2': [2, 5, 8, 12, 15, 18],
128+
'col3': [3, 6, 9, 13, 16, 19],
129+
})
130+
131+
record_keys = ['record_id']
132+
133+
incoming_shape = pandas.DataFrame({
134+
'row': ['row1', 'row2', 'row3'],
135+
'col1': ['v11', 'v21', 'v31'],
136+
'col2': [None, 'v22', 'v32'],
137+
'col3': ['v13', 'v23', 'v33'],
138+
})
139+
140+
outgoing_shape = pandas.DataFrame({
141+
'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
142+
'c_row1': ['v11', numpy.nan, 'v13'],
143+
'c_row2': ['v21', 'v22', 'v23'],
144+
'c_row3': ['v31', 'v32', 'v33'],
145+
})
146+
147+
record_map = data_algebra.cdata_impl.RecordMap(
148+
blocks_in=data_algebra.cdata.RecordSpecification(
149+
control_table=incoming_shape,
150+
record_keys=record_keys
151+
),
152+
blocks_out=data_algebra.cdata.RecordSpecification(
153+
control_table=outgoing_shape,
154+
record_keys=record_keys
155+
),
156+
)
157+
158+
res = record_map.transform(data)
159+
160+
expect = pandas.DataFrame({
161+
'record_id': [1, 1, 1, 2, 2, 2],
162+
'column_label': ['rec_col1', 'rec_col2', 'rec_col3', 'rec_col1', 'rec_col2', 'rec_col3'],
163+
'c_row1': [1.0, None, 3.0, 11.0, None, 13.0],
164+
'c_row2': [4, 5, 6, 14, 15, 16],
165+
'c_row3': [7, 8, 9, 17, 18, 19],
166+
})
167+
168+
assert data_algebra.util.equivalent_frames(res, expect)

tests/test_simple.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,15 @@ def test_simple():
7171
res = ops.eval_pandas(data_map={"d": d_local}, eval_env=locals())
7272
expect = pandas.DataFrame({"x": [1, 2], "y": [3, 4], "z": [1.25, 2.25]})
7373
assert data_algebra.util.equivalent_frames(res, expect)
74+
75+
def test_pandas_to_example():
76+
d = pandas.DataFrame({
77+
'record_id': [1, 1, 1, 2, 2, 2],
78+
'column_label': ['rec_col1', 'rec_col2', 'rec_col3', 'rec_col1', 'rec_col2', 'rec_col3'],
79+
'c_row1': [1.0, None, 3.0, 11.0, None, 13.0],
80+
'c_row2': [4, 5, 6, 14, 15, 16],
81+
'c_row3': [7, 8, 9, 17, 18, 19],
82+
})
83+
d_str = data_algebra.util.pandas_to_example_str(d)
84+
d_back = eval(d_str)
85+
assert data_algebra.util.equivalent_frames(d, d_back)

0 commit comments

Comments
 (0)