|
21 | 21 | },
|
22 | 22 | {
|
23 | 23 | "cell_type": "code",
|
24 |
| - "execution_count": null, |
| 24 | + "execution_count": 1, |
25 | 25 | "metadata": {
|
26 |
| - "Collapsed": "false" |
| 26 | + "Collapsed": "false", |
| 27 | + "ExecuteTime": { |
| 28 | + "end_time": "2023-04-03T14:28:44.170919Z", |
| 29 | + "start_time": "2023-04-03T14:28:41.650760Z" |
| 30 | + } |
27 | 31 | },
|
28 | 32 | "outputs": [],
|
29 | 33 | "source": [
|
|
60 | 64 | },
|
61 | 65 | {
|
62 | 66 | "cell_type": "code",
|
63 |
| - "execution_count": null, |
| 67 | + "execution_count": 5, |
64 | 68 | "metadata": {
|
65 |
| - "Collapsed": "false" |
| 69 | + "Collapsed": "false", |
| 70 | + "ExecuteTime": { |
| 71 | + "end_time": "2023-04-03T14:29:22.362807Z", |
| 72 | + "start_time": "2023-04-03T14:29:22.227471Z" |
| 73 | + } |
66 | 74 | },
|
67 |
| - "outputs": [], |
| 75 | + "outputs": [ |
| 76 | + { |
| 77 | + "data": { |
| 78 | + "text/plain": [ |
| 79 | + "(5000, 7)" |
| 80 | + ] |
| 81 | + }, |
| 82 | + "execution_count": 5, |
| 83 | + "metadata": {}, |
| 84 | + "output_type": "execute_result" |
| 85 | + } |
| 86 | + ], |
68 | 87 | "source": [
|
69 |
| - "housingData = pd.read_csv('data/USA_Housing.csv',sep= ',')\n", |
| 88 | + "housingData = pd.read_csv('../../data/USA_Housing.csv',sep= ',')\n", |
70 | 89 | "housingData.shape"
|
71 | 90 | ]
|
72 | 91 | },
|
73 | 92 | {
|
74 | 93 | "cell_type": "code",
|
75 |
| - "execution_count": null, |
| 94 | + "execution_count": 6, |
76 | 95 | "metadata": {
|
77 |
| - "Collapsed": "false" |
| 96 | + "Collapsed": "false", |
| 97 | + "ExecuteTime": { |
| 98 | + "end_time": "2023-04-03T14:29:24.284223Z", |
| 99 | + "start_time": "2023-04-03T14:29:24.269273Z" |
| 100 | + } |
78 | 101 | },
|
79 |
| - "outputs": [], |
| 102 | + "outputs": [ |
| 103 | + { |
| 104 | + "data": { |
| 105 | + "text/html": [ |
| 106 | + "<div>\n", |
| 107 | + "<style scoped>\n", |
| 108 | + " .dataframe tbody tr th:only-of-type {\n", |
| 109 | + " vertical-align: middle;\n", |
| 110 | + " }\n", |
| 111 | + "\n", |
| 112 | + " .dataframe tbody tr th {\n", |
| 113 | + " vertical-align: top;\n", |
| 114 | + " }\n", |
| 115 | + "\n", |
| 116 | + " .dataframe thead th {\n", |
| 117 | + " text-align: right;\n", |
| 118 | + " }\n", |
| 119 | + "</style>\n", |
| 120 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 121 | + " <thead>\n", |
| 122 | + " <tr style=\"text-align: right;\">\n", |
| 123 | + " <th></th>\n", |
| 124 | + " <th>Avg_Area_Income</th>\n", |
| 125 | + " <th>Avg_Area_House_Age</th>\n", |
| 126 | + " <th>Avg_Area_Number_of_Rooms</th>\n", |
| 127 | + " <th>Avg_Area_Number_of_Bedrooms</th>\n", |
| 128 | + " <th>Area_Population</th>\n", |
| 129 | + " <th>Price</th>\n", |
| 130 | + " </tr>\n", |
| 131 | + " </thead>\n", |
| 132 | + " <tbody>\n", |
| 133 | + " <tr>\n", |
| 134 | + " <th>0</th>\n", |
| 135 | + " <td>79545.45857</td>\n", |
| 136 | + " <td>5.682861</td>\n", |
| 137 | + " <td>7.009188</td>\n", |
| 138 | + " <td>4.09</td>\n", |
| 139 | + " <td>23086.80050</td>\n", |
| 140 | + " <td>1.059034e+06</td>\n", |
| 141 | + " </tr>\n", |
| 142 | + " <tr>\n", |
| 143 | + " <th>1</th>\n", |
| 144 | + " <td>79248.64245</td>\n", |
| 145 | + " <td>6.002900</td>\n", |
| 146 | + " <td>6.730821</td>\n", |
| 147 | + " <td>3.09</td>\n", |
| 148 | + " <td>40173.07217</td>\n", |
| 149 | + " <td>1.505891e+06</td>\n", |
| 150 | + " </tr>\n", |
| 151 | + " <tr>\n", |
| 152 | + " <th>2</th>\n", |
| 153 | + " <td>61287.06718</td>\n", |
| 154 | + " <td>5.865890</td>\n", |
| 155 | + " <td>8.512727</td>\n", |
| 156 | + " <td>5.13</td>\n", |
| 157 | + " <td>36882.15940</td>\n", |
| 158 | + " <td>1.058988e+06</td>\n", |
| 159 | + " </tr>\n", |
| 160 | + " <tr>\n", |
| 161 | + " <th>3</th>\n", |
| 162 | + " <td>63345.24005</td>\n", |
| 163 | + " <td>7.188236</td>\n", |
| 164 | + " <td>5.586729</td>\n", |
| 165 | + " <td>3.26</td>\n", |
| 166 | + " <td>34310.24283</td>\n", |
| 167 | + " <td>1.260617e+06</td>\n", |
| 168 | + " </tr>\n", |
| 169 | + " <tr>\n", |
| 170 | + " <th>4</th>\n", |
| 171 | + " <td>59982.19723</td>\n", |
| 172 | + " <td>5.040555</td>\n", |
| 173 | + " <td>7.839388</td>\n", |
| 174 | + " <td>4.23</td>\n", |
| 175 | + " <td>26354.10947</td>\n", |
| 176 | + " <td>6.309435e+05</td>\n", |
| 177 | + " </tr>\n", |
| 178 | + " </tbody>\n", |
| 179 | + "</table>\n", |
| 180 | + "</div>" |
| 181 | + ], |
| 182 | + "text/plain": [ |
| 183 | + " Avg_Area_Income Avg_Area_House_Age Avg_Area_Number_of_Rooms \\\n", |
| 184 | + "0 79545.45857 5.682861 7.009188 \n", |
| 185 | + "1 79248.64245 6.002900 6.730821 \n", |
| 186 | + "2 61287.06718 5.865890 8.512727 \n", |
| 187 | + "3 63345.24005 7.188236 5.586729 \n", |
| 188 | + "4 59982.19723 5.040555 7.839388 \n", |
| 189 | + "\n", |
| 190 | + " Avg_Area_Number_of_Bedrooms Area_Population Price \n", |
| 191 | + "0 4.09 23086.80050 1.059034e+06 \n", |
| 192 | + "1 3.09 40173.07217 1.505891e+06 \n", |
| 193 | + "2 5.13 36882.15940 1.058988e+06 \n", |
| 194 | + "3 3.26 34310.24283 1.260617e+06 \n", |
| 195 | + "4 4.23 26354.10947 6.309435e+05 " |
| 196 | + ] |
| 197 | + }, |
| 198 | + "execution_count": 6, |
| 199 | + "metadata": {}, |
| 200 | + "output_type": "execute_result" |
| 201 | + } |
| 202 | + ], |
80 | 203 | "source": [
|
81 | 204 | "housingData = housingData.drop(['Address'], axis=1)\n",
|
82 | 205 | "housingData.head()"
|
83 | 206 | ]
|
84 | 207 | },
|
85 | 208 | {
|
86 | 209 | "cell_type": "code",
|
87 |
| - "execution_count": null, |
| 210 | + "execution_count": 7, |
88 | 211 | "metadata": {
|
89 | 212 | "Collapsed": "false",
|
| 213 | + "ExecuteTime": { |
| 214 | + "end_time": "2023-04-03T14:29:25.456875Z", |
| 215 | + "start_time": "2023-04-03T14:29:25.450876Z" |
| 216 | + }, |
90 | 217 | "scrolled": true
|
91 | 218 | },
|
92 |
| - "outputs": [], |
| 219 | + "outputs": [ |
| 220 | + { |
| 221 | + "data": { |
| 222 | + "text/plain": [ |
| 223 | + "Index(['Avg_Area_Income', 'Avg_Area_House_Age', 'Avg_Area_Number_of_Rooms',\n", |
| 224 | + " 'Avg_Area_Number_of_Bedrooms', 'Area_Population', 'Price'],\n", |
| 225 | + " dtype='object')" |
| 226 | + ] |
| 227 | + }, |
| 228 | + "execution_count": 7, |
| 229 | + "metadata": {}, |
| 230 | + "output_type": "execute_result" |
| 231 | + } |
| 232 | + ], |
93 | 233 | "source": [
|
94 | 234 | "housingData.columns"
|
95 | 235 | ]
|
|
105 | 245 | },
|
106 | 246 | {
|
107 | 247 | "cell_type": "code",
|
108 |
| - "execution_count": null, |
| 248 | + "execution_count": 8, |
109 | 249 | "metadata": {
|
110 |
| - "Collapsed": "false" |
| 250 | + "Collapsed": "false", |
| 251 | + "ExecuteTime": { |
| 252 | + "end_time": "2023-04-03T14:29:38.649081Z", |
| 253 | + "start_time": "2023-04-03T14:29:38.634078Z" |
| 254 | + } |
111 | 255 | },
|
112 |
| - "outputs": [], |
| 256 | + "outputs": [ |
| 257 | + { |
| 258 | + "name": "stdout", |
| 259 | + "output_type": "stream", |
| 260 | + "text": [ |
| 261 | + "(1500, 5)\n", |
| 262 | + "(3500, 5)\n" |
| 263 | + ] |
| 264 | + } |
| 265 | + ], |
113 | 266 | "source": [
|
114 | 267 | "# Input \n",
|
115 | 268 | "predictorColumns = ['Avg_Area_Income', 'Avg_Area_House_Age', 'Avg_Area_Number_of_Rooms', \n",
|
|
140 | 293 | },
|
141 | 294 | {
|
142 | 295 | "cell_type": "code",
|
143 |
| - "execution_count": null, |
| 296 | + "execution_count": 9, |
144 | 297 | "metadata": {
|
145 |
| - "Collapsed": "false" |
| 298 | + "Collapsed": "false", |
| 299 | + "ExecuteTime": { |
| 300 | + "end_time": "2023-04-03T14:29:40.804108Z", |
| 301 | + "start_time": "2023-04-03T14:29:40.673110Z" |
| 302 | + } |
146 | 303 | },
|
147 |
| - "outputs": [], |
| 304 | + "outputs": [ |
| 305 | + { |
| 306 | + "name": "stderr", |
| 307 | + "output_type": "stream", |
| 308 | + "text": [ |
| 309 | + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_base.py:141: FutureWarning: 'normalize' was deprecated in version 1.0 and will be removed in 1.2.\n", |
| 310 | + "If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:\n", |
| 311 | + "\n", |
| 312 | + "from sklearn.pipeline import make_pipeline\n", |
| 313 | + "\n", |
| 314 | + "model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())\n", |
| 315 | + "\n", |
| 316 | + "If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:\n", |
| 317 | + "\n", |
| 318 | + "kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}\n", |
| 319 | + "model.fit(X, y, **kwargs)\n", |
| 320 | + "\n", |
| 321 | + "\n", |
| 322 | + " warnings.warn(\n" |
| 323 | + ] |
| 324 | + }, |
| 325 | + { |
| 326 | + "data": { |
| 327 | + "text/plain": [ |
| 328 | + "LinearRegression(normalize=True)" |
| 329 | + ] |
| 330 | + }, |
| 331 | + "execution_count": 9, |
| 332 | + "metadata": {}, |
| 333 | + "output_type": "execute_result" |
| 334 | + } |
| 335 | + ], |
148 | 336 | "source": [
|
149 | 337 | "# Linear Regression Training\n",
|
150 | 338 | "from sklearn.linear_model import LinearRegression\n",
|
|
154 | 342 | },
|
155 | 343 | {
|
156 | 344 | "cell_type": "code",
|
157 |
| - "execution_count": null, |
| 345 | + "execution_count": 10, |
158 | 346 | "metadata": {
|
159 |
| - "Collapsed": "false" |
| 347 | + "Collapsed": "false", |
| 348 | + "ExecuteTime": { |
| 349 | + "end_time": "2023-04-03T14:29:42.800193Z", |
| 350 | + "start_time": "2023-04-03T14:29:42.787165Z" |
| 351 | + } |
160 | 352 | },
|
161 |
| - "outputs": [], |
| 353 | + "outputs": [ |
| 354 | + { |
| 355 | + "name": "stdout", |
| 356 | + "output_type": "stream", |
| 357 | + "text": [ |
| 358 | + "0.9146818498916266\n" |
| 359 | + ] |
| 360 | + } |
| 361 | + ], |
162 | 362 | "source": [
|
163 | 363 | "# Test Predictions\n",
|
164 | 364 | "from sklearn import metrics\n",
|
|
177 | 377 | },
|
178 | 378 | {
|
179 | 379 | "cell_type": "code",
|
180 |
| - "execution_count": null, |
| 380 | + "execution_count": 11, |
181 | 381 | "metadata": {
|
182 |
| - "Collapsed": "false" |
| 382 | + "Collapsed": "false", |
| 383 | + "ExecuteTime": { |
| 384 | + "end_time": "2023-04-03T14:34:10.912291Z", |
| 385 | + "start_time": "2023-04-03T14:34:10.906281Z" |
| 386 | + } |
183 | 387 | },
|
184 |
| - "outputs": [], |
| 388 | + "outputs": [ |
| 389 | + { |
| 390 | + "name": "stdout", |
| 391 | + "output_type": "stream", |
| 392 | + "text": [ |
| 393 | + "Model LinearRegression was successfully pickled and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\LinearRegression.pickle.\n" |
| 394 | + ] |
| 395 | + } |
| 396 | + ], |
185 | 397 | "source": [
|
186 | 398 | "modelPrefix = 'LinearRegression'\n",
|
187 |
| - "zipFolder = Path.cwd() / 'data/USAHousingModels/LinearRegression'\n", |
| 399 | + "zipFolder = Path.cwd() / 'RegressionModel/'\n", |
188 | 400 | "\n",
|
189 | 401 | "pzmm.PickleModel.pickle_trained_model(linReg, modelPrefix, zipFolder)"
|
190 | 402 | ]
|
191 | 403 | },
|
192 | 404 | {
|
193 | 405 | "cell_type": "code",
|
194 |
| - "execution_count": null, |
| 406 | + "execution_count": 13, |
195 | 407 | "metadata": {
|
196 |
| - "Collapsed": "false" |
| 408 | + "Collapsed": "false", |
| 409 | + "ExecuteTime": { |
| 410 | + "end_time": "2023-04-03T14:35:59.447060Z", |
| 411 | + "start_time": "2023-04-03T14:35:59.327305Z" |
| 412 | + } |
197 | 413 | },
|
198 |
| - "outputs": [], |
| 414 | + "outputs": [ |
| 415 | + { |
| 416 | + "name": "stdout", |
| 417 | + "output_type": "stream", |
| 418 | + "text": [ |
| 419 | + "inputVar.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\inputVar.json\n", |
| 420 | + "outputVar.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\outputVar.json\n", |
| 421 | + "ModelProperties.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\ModelProperties.json\n", |
| 422 | + "fileMetaData.json was successfully written and saved to C:\\Users\\sclind\\Documents\\Python Scripts\\GitHub\\sassoftware\\python-sasctl\\examples\\ARCHIVE\\v1_8\\RegressionModel\\fileMetaData.json\n" |
| 423 | + ] |
| 424 | + }, |
| 425 | + { |
| 426 | + "name": "stderr", |
| 427 | + "output_type": "stream", |
| 428 | + "text": [ |
| 429 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 430 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n", |
| 431 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 432 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n", |
| 433 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 434 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n", |
| 435 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 436 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n", |
| 437 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 438 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n", |
| 439 | + "C:\\Users\\sclind\\AppData\\Roaming\\Python\\Python39\\site-packages\\sasctl\\pzmm\\write_json_files.py:133: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", |
| 440 | + " outputJSON = outputJSON.append([outputRow], ignore_index=True)\n" |
| 441 | + ] |
| 442 | + } |
| 443 | + ], |
199 | 444 | "source": [
|
200 | 445 | "def writeJSONFiles(data, predict, target, zipFolder, modelPrefix):\n",
|
201 | 446 | " J = pzmm.JSONFiles()\n",
|
|
204 | 449 | " J.writeVarJSON(data[predict], isInput=True, jPath=zipFolder)\n",
|
205 | 450 | " \n",
|
206 | 451 | " # Set output variables and assign an event threshold, then write output variable mapping\n",
|
207 |
| - " outputVar = pd.DataFrame(columns=['EM_PREDICTION'])\n", |
208 |
| - " outputVar['EM_PREDICTION'].loc[1] = 0.5\n", |
| 452 | + " outputVar = pd.DataFrame(data=[[0.5]], columns=['EM_PREDICTION'])\n", |
209 | 453 | " J.writeVarJSON(outputVar, isInput=False, jPath=zipFolder)\n",
|
210 | 454 | " \n",
|
211 | 455 | " # Write model properties to a json file\n",
|
|
276 | 520 | "name": "python",
|
277 | 521 | "nbconvert_exporter": "python",
|
278 | 522 | "pygments_lexer": "ipython3",
|
279 |
| - "version": "3.8.16" |
| 523 | + "version": "3.9.12" |
280 | 524 | },
|
281 | 525 | "latex_envs": {
|
282 | 526 | "LaTeX_envs_menu_present": true,
|
|
0 commit comments