|
5 | 5 | "colab": { |
6 | 6 | "name": "seaborn.ipynb", |
7 | 7 | "provenance": [], |
8 | | - "collapsed_sections": [] |
| 8 | + "collapsed_sections": [], |
| 9 | + "include_colab_link": true |
9 | 10 | }, |
10 | 11 | "kernelspec": { |
11 | 12 | "name": "python3", |
12 | 13 | "display_name": "Python 3" |
13 | 14 | } |
14 | 15 | }, |
15 | 16 | "cells": [ |
| 17 | + { |
| 18 | + "cell_type": "markdown", |
| 19 | + "metadata": { |
| 20 | + "id": "view-in-github", |
| 21 | + "colab_type": "text" |
| 22 | + }, |
| 23 | + "source": [ |
| 24 | + "<a href=\"https://colab.research.google.com/github/gumdropsteve/intro_to_python/blob/main/day_12/seaborn.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" |
| 25 | + ] |
| 26 | + }, |
16 | 27 | { |
17 | 28 | "cell_type": "code", |
18 | 29 | "metadata": { |
|
24 | 35 | "import matplotlib.pyplot as plt\n", |
25 | 36 | "import numpy as np" |
26 | 37 | ], |
27 | | - "execution_count": 157, |
| 38 | + "execution_count": null, |
28 | 39 | "outputs": [] |
29 | 40 | }, |
30 | 41 | { |
|
42 | 53 | "df = sns.load_dataset(\"titanic\")\n", |
43 | 54 | "df.head()" |
44 | 55 | ], |
45 | | - "execution_count": 158, |
| 56 | + "execution_count": null, |
46 | 57 | "outputs": [ |
47 | 58 | { |
48 | 59 | "output_type": "execute_result", |
|
209 | 220 | "# Look at dtypes and NaN \n", |
210 | 221 | "df.info()" |
211 | 222 | ], |
212 | | - "execution_count": 159, |
| 223 | + "execution_count": null, |
213 | 224 | "outputs": [ |
214 | 225 | { |
215 | 226 | "output_type": "stream", |
|
254 | 265 | "# How many NaN values in each column\n", |
255 | 266 | "df.isnull().sum()" |
256 | 267 | ], |
257 | | - "execution_count": 160, |
| 268 | + "execution_count": null, |
258 | 269 | "outputs": [ |
259 | 270 | { |
260 | 271 | "output_type": "execute_result", |
|
298 | 309 | "# Finding the mean of a column\n", |
299 | 310 | "df[\"age\"].mean()" |
300 | 311 | ], |
301 | | - "execution_count": 161, |
| 312 | + "execution_count": null, |
302 | 313 | "outputs": [ |
303 | 314 | { |
304 | 315 | "output_type": "execute_result", |
|
328 | 339 | "# Describe shows a lot of useful summary statistics\n", |
329 | 340 | "df.describe()" |
330 | 341 | ], |
331 | | - "execution_count": 162, |
| 342 | + "execution_count": null, |
332 | 343 | "outputs": [ |
333 | 344 | { |
334 | 345 | "output_type": "execute_result", |
|
469 | 480 | "# Look at NaN values in deck column\n", |
470 | 481 | "df[\"deck\"]" |
471 | 482 | ], |
472 | | - "execution_count": 163, |
| 483 | + "execution_count": null, |
473 | 484 | "outputs": [ |
474 | 485 | { |
475 | 486 | "output_type": "execute_result", |
|
514 | 525 | "\n", |
515 | 526 | "df[[\"age\"]][:19].fillna(age_med)" |
516 | 527 | ], |
517 | | - "execution_count": 165, |
| 528 | + "execution_count": null, |
518 | 529 | "outputs": [ |
519 | 530 | { |
520 | 531 | "output_type": "stream", |
|
673 | 684 | "# Backfill NaN values\n", |
674 | 685 | "df[[\"age\"]][:19].fillna(method=\"bfill\")" |
675 | 686 | ], |
676 | | - "execution_count": 166, |
| 687 | + "execution_count": null, |
677 | 688 | "outputs": [ |
678 | 689 | { |
679 | 690 | "output_type": "stream", |
|
832 | 843 | "# Forward fill NaN values\n", |
833 | 844 | "df[[\"age\"]][:19].fillna(method=\"ffill\")" |
834 | 845 | ], |
835 | | - "execution_count": 167, |
| 846 | + "execution_count": null, |
836 | 847 | "outputs": [ |
837 | 848 | { |
838 | 849 | "output_type": "execute_result", |
|
986 | 997 | " df[\"age\"]\n", |
987 | 998 | ")" |
988 | 999 | ], |
989 | | - "execution_count": 14, |
| 1000 | + "execution_count": null, |
990 | 1001 | "outputs": [ |
991 | 1002 | { |
992 | 1003 | "output_type": "execute_result", |
|
1031 | 1042 | "\n", |
1032 | 1043 | "sns.histplot(df[\"age\"])" |
1033 | 1044 | ], |
1034 | | - "execution_count": 37, |
| 1045 | + "execution_count": null, |
1035 | 1046 | "outputs": [ |
1036 | 1047 | { |
1037 | 1048 | "output_type": "execute_result", |
|
1087 | 1098 | "df.columns = col_title\n", |
1088 | 1099 | "df.head()" |
1089 | 1100 | ], |
1090 | | - "execution_count": 168, |
| 1101 | + "execution_count": null, |
1091 | 1102 | "outputs": [ |
1092 | 1103 | { |
1093 | 1104 | "output_type": "execute_result", |
|
1232 | 1243 | "# Create a new column and change the datatype\n", |
1233 | 1244 | "df[\"Survived String\"] = df[\"Survived\"].astype(\"str\")" |
1234 | 1245 | ], |
1235 | | - "execution_count": 54, |
| 1246 | + "execution_count": null, |
1236 | 1247 | "outputs": [] |
1237 | 1248 | }, |
1238 | 1249 | { |
|
1261 | 1272 | "g.despine(left=True)\n", |
1262 | 1273 | "plt.title(\"Survival on the Titanic Separated by Class\");" |
1263 | 1274 | ], |
1264 | | - "execution_count": 169, |
| 1275 | + "execution_count": null, |
1265 | 1276 | "outputs": [ |
1266 | 1277 | { |
1267 | 1278 | "output_type": "display_data", |
|
1293 | 1304 | "sns.histplot(df[\"Age\"])\n", |
1294 | 1305 | "sns.histplot(df[\"Fare\"])" |
1295 | 1306 | ], |
1296 | | - "execution_count": 94, |
| 1307 | + "execution_count": null, |
1297 | 1308 | "outputs": [ |
1298 | 1309 | { |
1299 | 1310 | "output_type": "execute_result", |
|
1338 | 1349 | "sns.histplot(df[df[\"Class\"] == \"Second\"][\"Age\"])\n", |
1339 | 1350 | "sns.histplot(df[df[\"Class\"] == \"Third\"][\"Age\"])" |
1340 | 1351 | ], |
1341 | | - "execution_count": 170, |
| 1352 | + "execution_count": null, |
1342 | 1353 | "outputs": [ |
1343 | 1354 | { |
1344 | 1355 | "output_type": "execute_result", |
|
1389 | 1400 | "plt.legend(labels=[\"First Class\", \"Second Class\", \"Third Class\"])\n", |
1390 | 1401 | "plt.title(\"KDE Plot of Fares grouped by class\");" |
1391 | 1402 | ], |
1392 | | - "execution_count": 180, |
| 1403 | + "execution_count": null, |
1393 | 1404 | "outputs": [ |
1394 | 1405 | { |
1395 | 1406 | "output_type": "display_data", |
|
1428 | 1439 | "plt.legend(labels=[\"First Class\", \"Second Class\", \"Third Class\"])\n", |
1429 | 1440 | "plt.title(\"KDE Plot of Fares grouped by class\");" |
1430 | 1441 | ], |
1431 | | - "execution_count": 182, |
| 1442 | + "execution_count": null, |
1432 | 1443 | "outputs": [ |
1433 | 1444 | { |
1434 | 1445 | "output_type": "display_data", |
|
1471 | 1482 | "\n", |
1472 | 1483 | "make_kde(df)" |
1473 | 1484 | ], |
1474 | | - "execution_count": 179, |
| 1485 | + "execution_count": null, |
1475 | 1486 | "outputs": [ |
1476 | 1487 | { |
1477 | 1488 | "output_type": "display_data", |
|
1500 | 1511 | "source": [ |
1501 | 1512 | "df[\"Class\"].value_counts()" |
1502 | 1513 | ], |
1503 | | - "execution_count": 95, |
| 1514 | + "execution_count": null, |
1504 | 1515 | "outputs": [ |
1505 | 1516 | { |
1506 | 1517 | "output_type": "execute_result", |
|
1534 | 1545 | "sns.pairplot(df, hue=\"Class\")\n", |
1535 | 1546 | "plt.title(\"Pairplot of Columns in Titanic Dataset\")" |
1536 | 1547 | ], |
1537 | | - "execution_count": 121, |
| 1548 | + "execution_count": null, |
1538 | 1549 | "outputs": [ |
1539 | 1550 | { |
1540 | 1551 | "output_type": "stream", |
|
1594 | 1605 | "ax.set(ylabel=\"\")\n", |
1595 | 1606 | "plt.title(\"Swarmplot\")" |
1596 | 1607 | ], |
1597 | | - "execution_count": 127, |
| 1608 | + "execution_count": null, |
1598 | 1609 | "outputs": [ |
1599 | 1610 | { |
1600 | 1611 | "output_type": "stream", |
|
1659 | 1670 | "\n", |
1660 | 1671 | "plt.title(\"Survival Violin Plot\");" |
1661 | 1672 | ], |
1662 | | - "execution_count": 130, |
| 1673 | + "execution_count": null, |
1663 | 1674 | "outputs": [ |
1664 | 1675 | { |
1665 | 1676 | "output_type": "display_data", |
|
1689 | 1700 | "# Find min value of fare\n", |
1690 | 1701 | "df[\"Fare\"].min()" |
1691 | 1702 | ], |
1692 | | - "execution_count": 131, |
| 1703 | + "execution_count": null, |
1693 | 1704 | "outputs": [ |
1694 | 1705 | { |
1695 | 1706 | "output_type": "execute_result", |
|
1718 | 1729 | "# Find max value of fare\n", |
1719 | 1730 | "df[[\"Fare\"]].max()" |
1720 | 1731 | ], |
1721 | | - "execution_count": 135, |
| 1732 | + "execution_count": null, |
1722 | 1733 | "outputs": [ |
1723 | 1734 | { |
1724 | 1735 | "output_type": "execute_result", |
|
1748 | 1759 | "# Whar methods can we use with a pandas dataframe object\n", |
1749 | 1760 | "dir(df[[\"Fare\"]])" |
1750 | 1761 | ], |
1751 | | - "execution_count": 134, |
| 1762 | + "execution_count": null, |
1752 | 1763 | "outputs": [ |
1753 | 1764 | { |
1754 | 1765 | "output_type": "execute_result", |
|
2213 | 2224 | " # multiple=\"fill\", # Look up what fill is doing with a kdeplot with multiple classes\n", |
2214 | 2225 | ")" |
2215 | 2226 | ], |
2216 | | - "execution_count": 155, |
| 2227 | + "execution_count": null, |
2217 | 2228 | "outputs": [ |
2218 | 2229 | { |
2219 | 2230 | "output_type": "execute_result", |
|
2256 | 2267 | "# Seeing which class was represented the most in the data\n", |
2257 | 2268 | "df[\"Class\"].value_counts().plot(kind=\"bar\", figsize=(10,10))" |
2258 | 2269 | ], |
2259 | | - "execution_count": 152, |
| 2270 | + "execution_count": null, |
2260 | 2271 | "outputs": [ |
2261 | 2272 | { |
2262 | 2273 | "output_type": "execute_result", |
|
0 commit comments