1+ {
2+ "cells" : [
3+ {
4+ "cell_type" : " code" ,
5+ "execution_count" : 1 ,
6+ "metadata" : {
7+ "collapsed" : true ,
8+ "pycharm" : {
9+ "is_executing" : false
10+ }
11+ },
12+ "outputs" : [
13+ {
14+ "name" : " stdout" ,
15+ "text" : [
16+ " TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n extend({'probability': 'probability / total'}) .\\\n extend({'sort_key': '-probability'}) .\\\n extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n select_rows('row_number == 1') .\\\n select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n rename_columns({'diagnosis': 'surveyCategory'})\n "
17+ ],
18+ "output_type" : " stream"
19+ }
20+ ],
21+ "source" : [
22+ " import pandas\n " ,
23+ " \n " ,
24+ " from data_algebra.data_ops import * # https://github.com/WinVector/data_algebra\n " ,
25+ " from data_algebra.data_pipe import Locum\n " ,
26+ " \n " ,
27+ " d_local = pandas.DataFrame({\n " ,
28+ " 'subjectID':[1, 1, 2, 2],\n " ,
29+ " 'surveyCategory': [ \" withdrawal behavior\" , \" positive re-framing\" , \" withdrawal behavior\" , \" positive re-framing\" ],\n " ,
30+ " 'assessmentTotal': [5, 2, 3, 4],\n " ,
31+ " 'irrelevantCol1': ['irrel1']*4,\n " ,
32+ " 'irrelevantCol2': ['irrel2']*4,\n " ,
33+ " })\n " ,
34+ " \n " ,
35+ " scale = 0.237\n " ,
36+ " \n " ,
37+ " with data_algebra.env.Env(locals()) as env:\n " ,
38+ " ops = data_algebra.data_ops.describe_table(d_local, 'd'). \\\n " ,
39+ " extend({'probability': '(assessmentTotal * scale).exp()'}). \\\n " ,
40+ " extend({'total': 'probability.sum()'},\n " ,
41+ " partition_by='subjectID'). \\\n " ,
42+ " extend({'probability': 'probability/total'}). \\\n " ,
43+ " extend({'sort_key': '-probability'}). \\\n " ,
44+ " extend({'row_number': '_row_number()'},\n " ,
45+ " partition_by=['subjectID'],\n " ,
46+ " order_by=['sort_key']). \\\n " ,
47+ " select_rows('row_number == 1'). \\\n " ,
48+ " select_columns(['subjectID', 'surveyCategory', 'probability']). \\\n " ,
49+ " rename_columns({'diagnosis': 'surveyCategory'})\n " ,
50+ " \n " ,
51+ " print(ops)"
52+ ]
53+ },
54+ {
55+ "cell_type" : " code" ,
56+ "execution_count" : 2 ,
57+ "outputs" : [
58+ {
59+ "name" : " stdout" ,
60+ "text" : [
61+ " TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n extend({'probability': 'probability / total'}) .\\\n extend({'sort_key': '-probability'}) .\\\n extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n select_rows('row_number == 1') .\\\n select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n rename_columns({'diagnosis': 'surveyCategory'})\n "
62+ ],
63+ "output_type" : " stream"
64+ }
65+ ],
66+ "source" : [
67+ " prob_caclulation = Locum(). \\\n " ,
68+ " extend({'probability': '(assessmentTotal * 0.237).exp()'}). \\\n " ,
69+ " extend({'total': 'probability.sum()'},\n " ,
70+ " partition_by='subjectID'). \\\n " ,
71+ " extend({'probability': 'probability/total'})\n " ,
72+ " \n " ,
73+ " top_rank = Locum(). \\\n " ,
74+ " extend({'sort_key': '-probability'}). \\\n " ,
75+ " extend({'row_number': '_row_number()'},\n " ,
76+ " partition_by=['subjectID'],\n " ,
77+ " order_by=['sort_key']). \\\n " ,
78+ " select_rows('row_number == 1')\n " ,
79+ " \n " ,
80+ " clean_up_columns = Locum(). \\\n " ,
81+ " select_columns(['subjectID', 'surveyCategory', 'probability']). \\\n " ,
82+ " rename_columns({'diagnosis': 'surveyCategory'})\n " ,
83+ " \n " ,
84+ " ops = data_algebra.data_ops.describe_table(d_local, 'd') +\\\n " ,
85+ " prob_caclulation +\\\n " ,
86+ " top_rank +\\\n " ,
87+ " clean_up_columns\n " ,
88+ " \n " ,
89+ " print(ops)"
90+ ],
91+ "metadata" : {
92+ "collapsed" : false ,
93+ "pycharm" : {
94+ "name" : " #%%\n " ,
95+ "is_executing" : false
96+ }
97+ }
98+ },
99+ {
100+ "cell_type" : " code" ,
101+ "execution_count" : 3 ,
102+ "outputs" : [
103+ {
104+ "data" : {
105+ "text/plain" : " subjectID diagnosis probability\n 0 1 withdrawal behavior 0.670622\n 1 2 positive re-framing 0.558974" ,
106+ "text/html" : " <div>\n <style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n </style>\n <table border=\" 1\" class=\" dataframe\" >\n <thead>\n <tr style=\" text-align: right;\" >\n <th></th>\n <th>subjectID</th>\n <th>diagnosis</th>\n <th>probability</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>withdrawal behavior</td>\n <td>0.670622</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>positive re-framing</td>\n <td>0.558974</td>\n </tr>\n </tbody>\n </table>\n </div>"
107+ },
108+ "metadata" : {},
109+ "output_type" : " execute_result" ,
110+ "execution_count" : 3
111+ }
112+ ],
113+ "source" : [
114+ " d_local >> ops\n "
115+ ],
116+ "metadata" : {
117+ "collapsed" : false ,
118+ "pycharm" : {
119+ "name" : " #%%\n " ,
120+ "is_executing" : false
121+ }
122+ }
123+ }
124+ ],
125+ "metadata" : {
126+ "kernelspec" : {
127+ "display_name" : " Python 3" ,
128+ "language" : " python" ,
129+ "name" : " python3"
130+ },
131+ "language_info" : {
132+ "codemirror_mode" : {
133+ "name" : " ipython" ,
134+ "version" : 2
135+ },
136+ "file_extension" : " .py" ,
137+ "mimetype" : " text/x-python" ,
138+ "name" : " python" ,
139+ "nbconvert_exporter" : " python" ,
140+ "pygments_lexer" : " ipython2" ,
141+ "version" : " 2.7.6"
142+ },
143+ "pycharm" : {
144+ "stem_cell" : {
145+ "cell_type" : " raw" ,
146+ "source" : [],
147+ "metadata" : {
148+ "collapsed" : false
149+ }
150+ }
151+ }
152+ },
153+ "nbformat" : 4 ,
154+ "nbformat_minor" : 0
155+ }
0 commit comments