Skip to content

Commit c0295cd

Browse files
committed
work on composition
1 parent 8d97d59 commit c0295cd

File tree

2 files changed

+180
-10
lines changed

2 files changed

+180
-10
lines changed
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": true,
8+
"pycharm": {
9+
"is_executing": false
10+
}
11+
},
12+
"outputs": [
13+
{
14+
"name": "stdout",
15+
"text": [
16+
"TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n extend({'probability': 'probability / total'}) .\\\n extend({'sort_key': '-probability'}) .\\\n extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n select_rows('row_number == 1') .\\\n select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n rename_columns({'diagnosis': 'surveyCategory'})\n"
17+
],
18+
"output_type": "stream"
19+
}
20+
],
21+
"source": [
22+
"import pandas\n",
23+
"\n",
24+
"from data_algebra.data_ops import * # https://github.com/WinVector/data_algebra\n",
25+
"from data_algebra.data_pipe import Locum\n",
26+
"\n",
27+
"d_local = pandas.DataFrame({\n",
28+
" 'subjectID':[1, 1, 2, 2],\n",
29+
" 'surveyCategory': [ \"withdrawal behavior\", \"positive re-framing\", \"withdrawal behavior\", \"positive re-framing\"],\n",
30+
" 'assessmentTotal': [5, 2, 3, 4],\n",
31+
" 'irrelevantCol1': ['irrel1']*4,\n",
32+
" 'irrelevantCol2': ['irrel2']*4,\n",
33+
"})\n",
34+
"\n",
35+
"scale = 0.237\n",
36+
"\n",
37+
"with data_algebra.env.Env(locals()) as env:\n",
38+
" ops = data_algebra.data_ops.describe_table(d_local, 'd'). \\\n",
39+
" extend({'probability': '(assessmentTotal * scale).exp()'}). \\\n",
40+
" extend({'total': 'probability.sum()'},\n",
41+
" partition_by='subjectID'). \\\n",
42+
" extend({'probability': 'probability/total'}). \\\n",
43+
" extend({'sort_key': '-probability'}). \\\n",
44+
" extend({'row_number': '_row_number()'},\n",
45+
" partition_by=['subjectID'],\n",
46+
" order_by=['sort_key']). \\\n",
47+
" select_rows('row_number == 1'). \\\n",
48+
" select_columns(['subjectID', 'surveyCategory', 'probability']). \\\n",
49+
" rename_columns({'diagnosis': 'surveyCategory'})\n",
50+
"\n",
51+
"print(ops)"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": 2,
57+
"outputs": [
58+
{
59+
"name": "stdout",
60+
"text": [
61+
"TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n extend({'probability': 'probability / total'}) .\\\n extend({'sort_key': '-probability'}) .\\\n extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n select_rows('row_number == 1') .\\\n select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n rename_columns({'diagnosis': 'surveyCategory'})\n"
62+
],
63+
"output_type": "stream"
64+
}
65+
],
66+
"source": [
67+
"prob_caclulation = Locum(). \\\n",
68+
" extend({'probability': '(assessmentTotal * 0.237).exp()'}). \\\n",
69+
" extend({'total': 'probability.sum()'},\n",
70+
" partition_by='subjectID'). \\\n",
71+
" extend({'probability': 'probability/total'})\n",
72+
"\n",
73+
"top_rank = Locum(). \\\n",
74+
" extend({'sort_key': '-probability'}). \\\n",
75+
" extend({'row_number': '_row_number()'},\n",
76+
" partition_by=['subjectID'],\n",
77+
" order_by=['sort_key']). \\\n",
78+
" select_rows('row_number == 1')\n",
79+
"\n",
80+
"clean_up_columns = Locum(). \\\n",
81+
" select_columns(['subjectID', 'surveyCategory', 'probability']). \\\n",
82+
" rename_columns({'diagnosis': 'surveyCategory'})\n",
83+
"\n",
84+
"ops = data_algebra.data_ops.describe_table(d_local, 'd') +\\\n",
85+
" prob_caclulation +\\\n",
86+
" top_rank +\\\n",
87+
" clean_up_columns\n",
88+
"\n",
89+
"print(ops)"
90+
],
91+
"metadata": {
92+
"collapsed": false,
93+
"pycharm": {
94+
"name": "#%%\n",
95+
"is_executing": false
96+
}
97+
}
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 3,
102+
"outputs": [
103+
{
104+
"data": {
105+
"text/plain": " subjectID diagnosis probability\n0 1 withdrawal behavior 0.670622\n1 2 positive re-framing 0.558974",
106+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>subjectID</th>\n <th>diagnosis</th>\n <th>probability</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>withdrawal behavior</td>\n <td>0.670622</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>positive re-framing</td>\n <td>0.558974</td>\n </tr>\n </tbody>\n</table>\n</div>"
107+
},
108+
"metadata": {},
109+
"output_type": "execute_result",
110+
"execution_count": 3
111+
}
112+
],
113+
"source": [
114+
"d_local >> ops\n"
115+
],
116+
"metadata": {
117+
"collapsed": false,
118+
"pycharm": {
119+
"name": "#%%\n",
120+
"is_executing": false
121+
}
122+
}
123+
}
124+
],
125+
"metadata": {
126+
"kernelspec": {
127+
"display_name": "Python 3",
128+
"language": "python",
129+
"name": "python3"
130+
},
131+
"language_info": {
132+
"codemirror_mode": {
133+
"name": "ipython",
134+
"version": 2
135+
},
136+
"file_extension": ".py",
137+
"mimetype": "text/x-python",
138+
"name": "python",
139+
"nbconvert_exporter": "python",
140+
"pygments_lexer": "ipython2",
141+
"version": "2.7.6"
142+
},
143+
"pycharm": {
144+
"stem_cell": {
145+
"cell_type": "raw",
146+
"source": [],
147+
"metadata": {
148+
"collapsed": false
149+
}
150+
}
151+
}
152+
},
153+
"nbformat": 4,
154+
"nbformat_minor": 0
155+
}

data_algebra/data_pipe.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -213,32 +213,47 @@ def __init__(self):
213213
data_algebra.data_ops.OperatorPlatform.__init__(self)
214214
self.ops = []
215215

216-
# noinspection PyPep8Naming
217-
def realize(self, X):
218-
pipeline = data_algebra.data_ops.describe_table(X, table_name="X")
216+
def apply_to(self, pipeline):
217+
if not isinstance(pipeline, data_algebra.data_ops.OperatorPlatform):
218+
raise TypeError("Expected othter to be a data_algebra.data_ops.OperatorPlatform")
219219
for s in self.ops:
220220
# pipeline = pipeline >> s
221221
pipeline = s.apply(pipeline)
222222
return pipeline
223223

224+
def append(self, other):
225+
if isinstance(other, Locum):
226+
for o in other.ops:
227+
self.ops.append(o)
228+
elif isinstance(other, data_algebra.pipe.PipeStep):
229+
self.ops.append(other)
230+
else:
231+
raise TypeError("unexpeted type for Locum + " + str(type(other)))
232+
return self
233+
234+
def realize(self, x):
235+
pipeline = data_algebra.data_ops.describe_table(x, table_name="x")
236+
return self.apply_to(pipeline)
237+
224238
# noinspection PyPep8Naming
225239
def transform(self, X):
240+
if isinstance(X, data_algebra.data_ops.OperatorPlatform):
241+
return self.apply_to(X)
226242
pipeline = self.realize(X)
227243
return pipeline.transform(X)
228244

229245
def __rrshift__(self, other): # override other >> self
230246
return self.transform(other)
231247

232-
def __add__(self, other):
233-
if not isinstance(other, Locum):
234-
raise TypeError("Expected other to be of type data_algebra.data_pipe.Locum")
248+
def __add__(self, other): # override self + other
235249
res = Locum()
236-
for o in self.ops:
237-
res.ops.append(o)
238-
for o in other.ops:
239-
res.ops.append(o)
250+
res.append(self)
251+
res.append(other)
240252
return res
241253

254+
def __radd__(self, other): # override other + self
255+
return self.apply_to(other)
256+
242257
# print
243258

244259
def __repr__(self):

0 commit comments

Comments
 (0)