|
25 | 25 | "cell_type": "markdown", |
26 | 26 | "metadata": {}, |
27 | 27 | "source": [ |
28 | | - "### Installing Pandas" |
| 28 | + "### Installing Dependencies" |
29 | 29 | ] |
30 | 30 | }, |
31 | 31 | { |
32 | 32 | "cell_type": "markdown", |
33 | 33 | "metadata": {}, |
34 | 34 | "source": [ |
35 | 35 | "```console\n", |
36 | | - "$ python -m pip install pandas\n", |
| 36 | + "$ python3 -m pip install requests pandas matplotlib\n", |
37 | 37 | "```" |
38 | 38 | ] |
39 | 39 | }, |
|
42 | 42 | "metadata": {}, |
43 | 43 | "source": [ |
44 | 44 | "```console\n", |
45 | | - "$ conda install pandas\n", |
| 45 | + "$ conda install requests pandas matplotlib\n", |
46 | 46 | "```" |
47 | 47 | ] |
48 | 48 | }, |
|
197 | 197 | "outputs": [], |
198 | 198 | "source": [ |
199 | 199 | "import numpy as np\n", |
200 | | - "nba.describe(include=np.object)" |
| 200 | + "nba.describe(include=object)" |
201 | 201 | ] |
202 | 202 | }, |
203 | 203 | { |
|
230 | 230 | "source": [ |
231 | 231 | "nba[\"fran_id\"].value_counts()\n", |
232 | 232 | "# Expected:\n", |
233 | | - "# Name: team_id, Length: 104, dtype: int64\n", |
234 | 233 | "# Lakers 6024\n", |
235 | 234 | "# Celtics 5997\n", |
236 | 235 | "# Knicks 5769\n", |
237 | 236 | "\n", |
238 | | - "# Huskies 60\n", |
| 237 | + "# Falcons 60\n", |
239 | 238 | "# Name: fran_id, dtype: int64" |
240 | 239 | ] |
241 | 240 | }, |
|
258 | 257 | "metadata": {}, |
259 | 258 | "outputs": [], |
260 | 259 | "source": [ |
261 | | - "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_game\"].min()\n", |
| 260 | + "nba[\"date_played\"] = pd.to_datetime(nba[\"date_game\"])" |
| 261 | + ] |
| 262 | + }, |
| 263 | + { |
| 264 | + "cell_type": "code", |
| 265 | + "execution_count": null, |
| 266 | + "metadata": {}, |
| 267 | + "outputs": [], |
| 268 | + "source": [ |
| 269 | + "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_played\"].min()\n", |
262 | 270 | "# Expected:\n", |
263 | | - "# '1/1/1949'" |
| 271 | + "# Timestamp('1948-11-04 00:00:00')" |
264 | 272 | ] |
265 | 273 | }, |
266 | 274 | { |
|
269 | 277 | "metadata": {}, |
270 | 278 | "outputs": [], |
271 | 279 | "source": [ |
272 | | - "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_game\"].max()\n", |
| 280 | + "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_played\"].max()\n", |
273 | 281 | "# Expected:\n", |
274 | | - "# '4/9/1959'" |
| 282 | + "# Timestamp('1960-03-26 00:00:00')" |
275 | 283 | ] |
276 | 284 | }, |
277 | 285 | { |
|
280 | 288 | "metadata": {}, |
281 | 289 | "outputs": [], |
282 | 290 | "source": [ |
283 | | - "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_game\"].agg((\"min\", \"max\"))\n", |
| 291 | + "nba.loc[nba[\"team_id\"] == \"MNL\", \"date_played\"].agg((\"min\", \"max\"))\n", |
284 | 292 | "# Expected:\n", |
285 | | - "# min 1/1/1949\n", |
286 | | - "# max 4/9/1959\n", |
287 | | - "# Name: date_game, dtype: object" |
| 293 | + "# min 1948-11-04\n", |
| 294 | + "# max 1960-03-26\n", |
| 295 | + "# Name: date_played, dtype: datetime64[ns]" |
288 | 296 | ] |
289 | 297 | }, |
290 | 298 | { |
|
475 | 483 | "city_data.values\n", |
476 | 484 | "# Expected:\n", |
477 | 485 | "# array([[4.2e+03, 5.0e+00],\n", |
478 | | - "# [6.5e+03, 8.0e+00],\n", |
479 | | - "# [8.0e+03, nan]])" |
| 486 | + "# [6.5e+03, 8.0e+00],\n", |
| 487 | + "# [8.0e+03, nan]])" |
480 | 488 | ] |
481 | 489 | }, |
482 | 490 | { |
|
913 | 921 | "current_decade = nba[nba[\"year_id\"] > 2010]\n", |
914 | 922 | "current_decade.shape\n", |
915 | 923 | "# Expected:\n", |
916 | | - "# (12658, 23)" |
| 924 | + "# (12658, 24)" |
917 | 925 | ] |
918 | 926 | }, |
919 | 927 | { |
|
925 | 933 | "games_with_notes = nba[nba[\"notes\"].notnull()]\n", |
926 | 934 | "games_with_notes.shape\n", |
927 | 935 | "# Expected:\n", |
928 | | - "# (5424, 23)" |
| 936 | + "# (5424, 24)" |
929 | 937 | ] |
930 | 938 | }, |
931 | 939 | { |
|
937 | 945 | "ers = nba[nba[\"fran_id\"].str.endswith(\"ers\")]\n", |
938 | 946 | "ers.shape\n", |
939 | 947 | "# Expected:\n", |
940 | | - "# (27797, 23)" |
| 948 | + "# (27797, 24)" |
941 | 949 | ] |
942 | 950 | }, |
943 | 951 | { |
|
1102 | 1110 | "df = nba.copy()\n", |
1103 | 1111 | "df.shape\n", |
1104 | 1112 | "# Expected:\n", |
1105 | | - "# (126314, 23)" |
| 1113 | + "# (126314, 24)" |
1106 | 1114 | ] |
1107 | 1115 | }, |
1108 | 1116 | { |
|
1121 | 1129 | "df[\"difference\"] = df.pts - df.opp_pts\n", |
1122 | 1130 | "df.shape\n", |
1123 | 1131 | "# Expected:\n", |
1124 | | - "# (126314, 24)" |
| 1132 | + "# (126314, 25)" |
1125 | 1133 | ] |
1126 | 1134 | }, |
1127 | 1135 | { |
|
1155 | 1163 | "# Expected:\n", |
1156 | 1164 | "# <class 'pandas.core.frame.DataFrame'>\n", |
1157 | 1165 | "# RangeIndex: 126314 entries, 0 to 126313\n", |
1158 | | - "# Data columns (total 24 columns):\n", |
| 1166 | + "# Data columns (total 25 columns):\n", |
1159 | 1167 | "# gameorder 126314 non-null int64\n", |
1160 | 1168 | "\n", |
1161 | 1169 | "# location 126314 non-null object\n", |
1162 | 1170 | "# result 126314 non-null object\n", |
1163 | 1171 | "# forecast 126314 non-null float64\n", |
1164 | 1172 | "# notes 5424 non-null object\n", |
| 1173 | + "# date_played 126314 non-null datetime64[ns]\n", |
1165 | 1174 | "# difference 126314 non-null int64\n", |
1166 | | - "# dtypes: float64(6), int64(8), object(10)\n", |
1167 | | - "# memory usage: 23.1+ MB" |
| 1175 | + "# dtypes: datetime64[ns](1), float64(6), int64(8), object(10)\n", |
| 1176 | + "# memory usage: 24.1+ MB" |
1168 | 1177 | ] |
1169 | 1178 | }, |
1170 | 1179 | { |
|
1182 | 1191 | "source": [ |
1183 | 1192 | "df.shape\n", |
1184 | 1193 | "# Expected:\n", |
1185 | | - "# (126314, 24)" |
| 1194 | + "# (126314, 25)" |
1186 | 1195 | ] |
1187 | 1196 | }, |
1188 | 1197 | { |
|
1195 | 1204 | "df.drop(elo_columns, inplace=True, axis=1)\n", |
1196 | 1205 | "df.shape\n", |
1197 | 1206 | "# Expected:\n", |
1198 | | - "# (126314, 20)" |
| 1207 | + "# (126314, 21)" |
1199 | 1208 | ] |
1200 | 1209 | }, |
1201 | 1210 | { |
|
1301 | 1310 | "rows_without_missing_data = nba.dropna()\n", |
1302 | 1311 | "rows_without_missing_data.shape\n", |
1303 | 1312 | "# Expected:\n", |
1304 | | - "# (5424, 23)" |
| 1313 | + "# (5424, 24)" |
1305 | 1314 | ] |
1306 | 1315 | }, |
1307 | 1316 | { |
|
1313 | 1322 | "data_without_missing_columns = nba.dropna(axis=1)\n", |
1314 | 1323 | "data_without_missing_columns.shape\n", |
1315 | 1324 | "# Expected:\n", |
1316 | | - "# (126314, 22)" |
| 1325 | + "# (126314, 23)" |
1317 | 1326 | ] |
1318 | 1327 | }, |
1319 | 1328 | { |
|
1541 | 1550 | ], |
1542 | 1551 | "metadata": { |
1543 | 1552 | "kernelspec": { |
1544 | | - "argv": [ |
1545 | | - "/home/reka/anaconda3/bin/python", |
1546 | | - "-m", |
1547 | | - "ipykernel_launcher", |
1548 | | - "-f", |
1549 | | - "{connection_file}" |
1550 | | - ], |
1551 | 1553 | "display_name": "Python 3", |
1552 | | - "env": {}, |
1553 | | - "interrupt_mode": "signal", |
1554 | 1554 | "language": "python", |
1555 | | - "metadata": {}, |
1556 | 1555 | "name": "python3" |
1557 | 1556 | }, |
1558 | | - "language": "python" |
| 1557 | + "language": "python", |
| 1558 | + "language_info": { |
| 1559 | + "codemirror_mode": { |
| 1560 | + "name": "ipython", |
| 1561 | + "version": 3 |
| 1562 | + }, |
| 1563 | + "file_extension": ".py", |
| 1564 | + "mimetype": "text/x-python", |
| 1565 | + "name": "python", |
| 1566 | + "nbconvert_exporter": "python", |
| 1567 | + "pygments_lexer": "ipython3", |
| 1568 | + "version": "3.9.2" |
| 1569 | + } |
1559 | 1570 | }, |
1560 | 1571 | "nbformat": 4, |
1561 | | - "nbformat_minor": 2 |
| 1572 | + "nbformat_minor": 4 |
1562 | 1573 | } |
0 commit comments