Skip to content

Commit 1edd868

Browse files
committed
Update h2o example and move older to archive
1 parent dbc6ab9 commit 1edd868

File tree

5 files changed

+275
-31
lines changed

5 files changed

+275
-31
lines changed

examples/ARCHIVE/v1_8/pzmmModelImportExampleRegression.ipynb

Lines changed: 274 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@
2121
},
2222
{
2323
"cell_type": "code",
24-
"execution_count": null,
24+
"execution_count": 1,
2525
"metadata": {
26-
"Collapsed": "false"
26+
"Collapsed": "false",
27+
"ExecuteTime": {
28+
"end_time": "2023-04-03T14:28:44.170919Z",
29+
"start_time": "2023-04-03T14:28:41.650760Z"
30+
}
2731
},
2832
"outputs": [],
2933
"source": [
@@ -60,36 +64,172 @@
6064
},
6165
{
6266
"cell_type": "code",
63-
"execution_count": null,
67+
"execution_count": 5,
6468
"metadata": {
65-
"Collapsed": "false"
69+
"Collapsed": "false",
70+
"ExecuteTime": {
71+
"end_time": "2023-04-03T14:29:22.362807Z",
72+
"start_time": "2023-04-03T14:29:22.227471Z"
73+
}
6674
},
67-
"outputs": [],
75+
"outputs": [
76+
{
77+
"data": {
78+
"text/plain": [
79+
"(5000, 7)"
80+
]
81+
},
82+
"execution_count": 5,
83+
"metadata": {},
84+
"output_type": "execute_result"
85+
}
86+
],
6887
"source": [
69-
"housingData = pd.read_csv('data/USA_Housing.csv',sep= ',')\n",
88+
"housingData = pd.read_csv('../../data/USA_Housing.csv',sep= ',')\n",
7089
"housingData.shape"
7190
]
7291
},
7392
{
7493
"cell_type": "code",
75-
"execution_count": null,
94+
"execution_count": 6,
7695
"metadata": {
77-
"Collapsed": "false"
96+
"Collapsed": "false",
97+
"ExecuteTime": {
98+
"end_time": "2023-04-03T14:29:24.284223Z",
99+
"start_time": "2023-04-03T14:29:24.269273Z"
100+
}
78101
},
79-
"outputs": [],
102+
"outputs": [
103+
{
104+
"data": {
105+
"text/html": [
106+
"<div>\n",
107+
"<style scoped>\n",
108+
" .dataframe tbody tr th:only-of-type {\n",
109+
" vertical-align: middle;\n",
110+
" }\n",
111+
"\n",
112+
" .dataframe tbody tr th {\n",
113+
" vertical-align: top;\n",
114+
" }\n",
115+
"\n",
116+
" .dataframe thead th {\n",
117+
" text-align: right;\n",
118+
" }\n",
119+
"</style>\n",
120+
"<table border=\"1\" class=\"dataframe\">\n",
121+
" <thead>\n",
122+
" <tr style=\"text-align: right;\">\n",
123+
" <th></th>\n",
124+
" <th>Avg_Area_Income</th>\n",
125+
" <th>Avg_Area_House_Age</th>\n",
126+
" <th>Avg_Area_Number_of_Rooms</th>\n",
127+
" <th>Avg_Area_Number_of_Bedrooms</th>\n",
128+
" <th>Area_Population</th>\n",
129+
" <th>Price</th>\n",
130+
" </tr>\n",
131+
" </thead>\n",
132+
" <tbody>\n",
133+
" <tr>\n",
134+
" <th>0</th>\n",
135+
" <td>79545.45857</td>\n",
136+
" <td>5.682861</td>\n",
137+
" <td>7.009188</td>\n",
138+
" <td>4.09</td>\n",
139+
" <td>23086.80050</td>\n",
140+
" <td>1.059034e+06</td>\n",
141+
" </tr>\n",
142+
" <tr>\n",
143+
" <th>1</th>\n",
144+
" <td>79248.64245</td>\n",
145+
" <td>6.002900</td>\n",
146+
" <td>6.730821</td>\n",
147+
" <td>3.09</td>\n",
148+
" <td>40173.07217</td>\n",
149+
" <td>1.505891e+06</td>\n",
150+
" </tr>\n",
151+
" <tr>\n",
152+
" <th>2</th>\n",
153+
" <td>61287.06718</td>\n",
154+
" <td>5.865890</td>\n",
155+
" <td>8.512727</td>\n",
156+
" <td>5.13</td>\n",
157+
" <td>36882.15940</td>\n",
158+
" <td>1.058988e+06</td>\n",
159+
" </tr>\n",
160+
" <tr>\n",
161+
" <th>3</th>\n",
162+
" <td>63345.24005</td>\n",
163+
" <td>7.188236</td>\n",
164+
" <td>5.586729</td>\n",
165+
" <td>3.26</td>\n",
166+
" <td>34310.24283</td>\n",
167+
" <td>1.260617e+06</td>\n",
168+
" </tr>\n",
169+
" <tr>\n",
170+
" <th>4</th>\n",
171+
" <td>59982.19723</td>\n",
172+
" <td>5.040555</td>\n",
173+
" <td>7.839388</td>\n",
174+
" <td>4.23</td>\n",
175+
" <td>26354.10947</td>\n",
176+
" <td>6.309435e+05</td>\n",
177+
" </tr>\n",
178+
" </tbody>\n",
179+
"</table>\n",
180+
"</div>"
181+
],
182+
"text/plain": [
183+
" Avg_Area_Income Avg_Area_House_Age Avg_Area_Number_of_Rooms \\\n",
184+
"0 79545.45857 5.682861 7.009188 \n",
185+
"1 79248.64245 6.002900 6.730821 \n",
186+
"2 61287.06718 5.865890 8.512727 \n",
187+
"3 63345.24005 7.188236 5.586729 \n",
188+
"4 59982.19723 5.040555 7.839388 \n",
189+
"\n",
190+
" Avg_Area_Number_of_Bedrooms Area_Population Price \n",
191+
"0 4.09 23086.80050 1.059034e+06 \n",
192+
"1 3.09 40173.07217 1.505891e+06 \n",
193+
"2 5.13 36882.15940 1.058988e+06 \n",
194+
"3 3.26 34310.24283 1.260617e+06 \n",
195+
"4 4.23 26354.10947 6.309435e+05 "
196+
]
197+
},
198+
"execution_count": 6,
199+
"metadata": {},
200+
"output_type": "execute_result"
201+
}
202+
],
80203
"source": [
81204
"housingData = housingData.drop(['Address'], axis=1)\n",
82205
"housingData.head()"
83206
]
84207
},
85208
{
86209
"cell_type": "code",
87-
"execution_count": null,
210+
"execution_count": 7,
88211
"metadata": {
89212
"Collapsed": "false",
213+
"ExecuteTime": {
214+
"end_time": "2023-04-03T14:29:25.456875Z",
215+
"start_time": "2023-04-03T14:29:25.450876Z"
216+
},
90217
"scrolled": true
91218
},
92-
"outputs": [],
219+
"outputs": [
220+
{
221+
"data": {
222+
"text/plain": [
223+
"Index(['Avg_Area_Income', 'Avg_Area_House_Age', 'Avg_Area_Number_of_Rooms',\n",
224+
" 'Avg_Area_Number_of_Bedrooms', 'Area_Population', 'Price'],\n",
225+
" dtype='object')"
226+
]
227+
},
228+
"execution_count": 7,
229+
"metadata": {},
230+
"output_type": "execute_result"
231+
}
232+
],
93233
"source": [
94234
"housingData.columns"
95235
]
@@ -105,11 +245,24 @@
105245
},
106246
{
107247
"cell_type": "code",
108-
"execution_count": null,
248+
"execution_count": 8,
109249
"metadata": {
110-
"Collapsed": "false"
250+
"Collapsed": "false",
251+
"ExecuteTime": {
252+
"end_time": "2023-04-03T14:29:38.649081Z",
253+
"start_time": "2023-04-03T14:29:38.634078Z"
254+
}
111255
},
112-
"outputs": [],
256+
"outputs": [
257+
{
258+
"name": "stdout",
259+
"output_type": "stream",
260+
"text": [
261+
"(1500, 5)\n",
262+
"(3500, 5)\n"
263+
]
264+
}
265+
],
113266
"source": [
114267
"# Input \n",
115268
"predictorColumns = ['Avg_Area_Income', 'Avg_Area_House_Age', 'Avg_Area_Number_of_Rooms', \n",
@@ -140,11 +293,46 @@
140293
},
141294
{
142295
"cell_type": "code",
143-
"execution_count": null,
296+
"execution_count": 9,
144297
"metadata": {
145-
"Collapsed": "false"
298+
"Collapsed": "false",
299+
"ExecuteTime": {
300+
"end_time": "2023-04-03T14:29:40.804108Z",
301+
"start_time": "2023-04-03T14:29:40.673110Z"
302+
}
146303
},
147-
"outputs": [],
304+
"outputs": [
305+
{
306+
"name": "stderr",
307+
"output_type": "stream",
308+
"text": [
309+
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.\n",
310+
"If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:\n",
311+
"\n",
312+
"from sklearn.pipeline import make_pipeline\n",
313+
"\n",
314+
"model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())\n",
315+
"\n",
316+
"If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:\n",
317+
"\n",
318+
"kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}\n",
319+
"model.fit(X, y, **kwargs)\n",
320+
"\n",
321+
"\n",
322+
" warnings.warn(\n"
323+
]
324+
},
325+
{
326+
"data": {
327+
"text/plain": [
328+
"LinearRegression(normalize=True)"
329+
]
330+
},
331+
"execution_count": 9,
332+
"metadata": {},
333+
"output_type": "execute_result"
334+
}
335+
],
148336
"source": [
149337
"# Linear Regression Training\n",
150338
"from sklearn.linear_model import LinearRegression\n",
@@ -154,11 +342,23 @@
154342
},
155343
{
156344
"cell_type": "code",
157-
"execution_count": null,
345+
"execution_count": 10,
158346
"metadata": {
159-
"Collapsed": "false"
347+
"Collapsed": "false",
348+
"ExecuteTime": {
349+
"end_time": "2023-04-03T14:29:42.800193Z",
350+
"start_time": "2023-04-03T14:29:42.787165Z"
351+
}
160352
},
161-
"outputs": [],
353+
"outputs": [
354+
{
355+
"name": "stdout",
356+
"output_type": "stream",
357+
"text": [
358+
"0.9146818498916266\n"
359+
]
360+
}
361+
],
162362
"source": [
163363
"# Test Predictions\n",
164364
"from sklearn import metrics\n",
@@ -177,25 +377,70 @@
177377
},
178378
{
179379
"cell_type": "code",
180-
"execution_count": null,
380+
"execution_count": 11,
181381
"metadata": {
182-
"Collapsed": "false"
382+
"Collapsed": "false",
383+
"ExecuteTime": {
384+
"end_time": "2023-04-03T14:34:10.912291Z",
385+
"start_time": "2023-04-03T14:34:10.906281Z"
386+
}
183387
},
184-
"outputs": [],
388+
"outputs": [
389+
{
390+
"name": "stdout",
391+
"output_type": "stream",
392+
"text": [
393+
"Model LinearRegression was successfully pickled and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\LinearRegression.pickle.\n"
394+
]
395+
}
396+
],
185397
"source": [
186398
"modelPrefix = 'LinearRegression'\n",
187-
"zipFolder = Path.cwd() / 'data/USAHousingModels/LinearRegression'\n",
399+
"zipFolder = Path.cwd() / 'RegressionModel/'\n",
188400
"\n",
189401
"pzmm.PickleModel.pickle_trained_model(linReg, modelPrefix, zipFolder)"
190402
]
191403
},
192404
{
193405
"cell_type": "code",
194-
"execution_count": null,
406+
"execution_count": 13,
195407
"metadata": {
196-
"Collapsed": "false"
408+
"Collapsed": "false",
409+
"ExecuteTime": {
410+
"end_time": "2023-04-03T14:35:59.447060Z",
411+
"start_time": "2023-04-03T14:35:59.327305Z"
412+
}
197413
},
198-
"outputs": [],
414+
"outputs": [
415+
{
416+
"name": "stdout",
417+
"output_type": "stream",
418+
"text": [
419+
"inputVar.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\inputVar.json\n",
420+
"outputVar.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\outputVar.json\n",
421+
"ModelProperties.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\ModelProperties.json\n",
422+
"fileMetaData.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\fileMetaData.json\n"
423+
]
424+
},
425+
{
426+
"name": "stderr",
427+
"output_type": "stream",
428+
"text": [
429+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
430+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n",
431+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
432+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n",
433+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
434+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n",
435+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
436+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n",
437+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
438+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n",
439+
"C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
440+
" outputJSON = outputJSON.append([outputRow], ignore_index=True)\n"
441+
]
442+
}
443+
],
199444
"source": [
200445
"def writeJSONFiles(data, predict, target, zipFolder, modelPrefix):\n",
201446
" J = pzmm.JSONFiles()\n",
@@ -204,8 +449,7 @@
204449
" J.writeVarJSON(data[predict], isInput=True, jPath=zipFolder)\n",
205450
" \n",
206451
" # Set output variables and assign an event threshold, then write output variable mapping\n",
207-
" outputVar = pd.DataFrame(columns=['EM_PREDICTION'])\n",
208-
" outputVar['EM_PREDICTION'].loc[1] = 0.5\n",
452+
" outputVar = pd.DataFrame(data=[[0.5]], columns=['EM_PREDICTION'])\n",
209453
" J.writeVarJSON(outputVar, isInput=False, jPath=zipFolder)\n",
210454
" \n",
211455
" # Write model properties to a json file\n",
@@ -276,7 +520,7 @@
276520
"name": "python",
277521
"nbconvert_exporter": "python",
278522
"pygments_lexer": "ipython3",
279-
"version": "3.8.16"
523+
"version": "3.9.12"
280524
},
281525
"latex_envs": {
282526
"LaTeX_envs_menu_present": true,
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)