|
597 | 597 | }, |
598 | 598 | { |
599 | 599 | "cell_type": "code", |
600 | | - "execution_count": 5, |
| 600 | + "execution_count": 6, |
601 | 601 | "metadata": {}, |
602 | 602 | "outputs": [ |
603 | 603 | { |
|
694 | 694 | "4 0.0 1.0 5.0 0.0 0.0 0.0 0.0" |
695 | 695 | ] |
696 | 696 | }, |
697 | | - "execution_count": 5, |
| 697 | + "execution_count": 6, |
698 | 698 | "metadata": {}, |
699 | 699 | "output_type": "execute_result" |
700 | 700 | } |
|
710 | 710 | }, |
711 | 711 | { |
712 | 712 | "cell_type": "code", |
713 | | - "execution_count": 6, |
| 713 | + "execution_count": 7, |
714 | 714 | "metadata": {}, |
715 | 715 | "outputs": [ |
716 | 716 | { |
|
726 | 726 | "dtype: object" |
727 | 727 | ] |
728 | 728 | }, |
729 | | - "execution_count": 6, |
| 729 | + "execution_count": 7, |
730 | 730 | "metadata": {}, |
731 | 731 | "output_type": "execute_result" |
732 | 732 | } |
|
737 | 737 | }, |
738 | 738 | { |
739 | 739 | "cell_type": "code", |
740 | | - "execution_count": 7, |
| 740 | + "execution_count": null, |
741 | 741 | "metadata": {}, |
742 | 742 | "outputs": [ |
743 | 743 | { |
|
750 | 750 | ], |
751 | 751 | "source": [ |
752 | 752 | "# Create a dataframe from filtered_df except the bias metric column\n", |
753 | | - "df_no_bias_metric = filtered_df.drop(columns=[\"false_positive\"])\n", |
| 753 | + "bias_metric = \"false_positive\"\n", |
| 754 | + "df_no_bias_metric = filtered_df.drop(columns=[bias_metric])\n", |
754 | 755 | "if df_no_bias_metric.dtypes.nunique() == 1:\n", |
755 | 756 | " print('consistent data')\n", |
756 | 757 | "else:\n", |
|
766 | 767 | }, |
767 | 768 | { |
768 | 769 | "cell_type": "code", |
769 | | - "execution_count": 8, |
| 770 | + "execution_count": 9, |
770 | 771 | "metadata": {}, |
771 | 772 | "outputs": [ |
772 | 773 | { |
|
786 | 787 | }, |
787 | 788 | { |
788 | 789 | "cell_type": "code", |
789 | | - "execution_count": 9, |
| 790 | + "execution_count": 10, |
790 | 791 | "metadata": {}, |
791 | 792 | "outputs": [ |
792 | 793 | { |
|
844 | 845 | }, |
845 | 846 | { |
846 | 847 | "cell_type": "code", |
847 | | - "execution_count": 10, |
| 848 | + "execution_count": null, |
848 | 849 | "metadata": {}, |
849 | 850 | "outputs": [ |
850 | 851 | { |
|
858 | 859 | ], |
859 | 860 | "source": [ |
860 | 861 | "# split the data into training and testing sets\n", |
861 | | - "bias_metric = \"false_positive\"\n", |
862 | 862 | "train_df, test_df = train_test_split(filtered_df, test_size=0.2, random_state=42)\n", |
863 | 863 | "X_train = train_df.drop(columns=[bias_metric])\n", |
864 | 864 | "\n", |
|
896 | 896 | }, |
897 | 897 | { |
898 | 898 | "cell_type": "code", |
899 | | - "execution_count": 11, |
| 899 | + "execution_count": 12, |
900 | 900 | "metadata": {}, |
901 | 901 | "outputs": [ |
902 | 902 | { |
|
1331 | 1331 | "BiasAwareHierarchicalKModes(bahc_max_iter=20, bahc_min_cluster_size=57.71)" |
1332 | 1332 | ] |
1333 | 1333 | }, |
1334 | | - "execution_count": 11, |
| 1334 | + "execution_count": 12, |
1335 | 1335 | "metadata": {}, |
1336 | 1336 | "output_type": "execute_result" |
1337 | 1337 | } |
|
1344 | 1344 | }, |
1345 | 1345 | { |
1346 | 1346 | "cell_type": "code", |
1347 | | - "execution_count": 12, |
| 1347 | + "execution_count": 13, |
1348 | 1348 | "metadata": {}, |
1349 | 1349 | "outputs": [ |
1350 | 1350 | { |
|
1353 | 1353 | "5" |
1354 | 1354 | ] |
1355 | 1355 | }, |
1356 | | - "execution_count": 12, |
| 1356 | + "execution_count": 13, |
1357 | 1357 | "metadata": {}, |
1358 | 1358 | "output_type": "execute_result" |
1359 | 1359 | } |
|
1365 | 1365 | }, |
1366 | 1366 | { |
1367 | 1367 | "cell_type": "code", |
1368 | | - "execution_count": 13, |
| 1368 | + "execution_count": 14, |
1369 | 1369 | "metadata": {}, |
1370 | 1370 | "outputs": [ |
1371 | 1371 | { |
|
1374 | 1374 | "array([ 0.05290941, -0.02457072, -0.04827102, -0.04888889, -0.05093596])" |
1375 | 1375 | ] |
1376 | 1376 | }, |
1377 | | - "execution_count": 13, |
| 1377 | + "execution_count": 14, |
1378 | 1378 | "metadata": {}, |
1379 | 1379 | "output_type": "execute_result" |
1380 | 1380 | } |
|
1386 | 1386 | }, |
1387 | 1387 | { |
1388 | 1388 | "cell_type": "code", |
1389 | | - "execution_count": 14, |
| 1389 | + "execution_count": 15, |
1390 | 1390 | "metadata": {}, |
1391 | 1391 | "outputs": [ |
1392 | 1392 | { |
|
1417 | 1417 | }, |
1418 | 1418 | { |
1419 | 1419 | "cell_type": "code", |
1420 | | - "execution_count": 15, |
| 1420 | + "execution_count": 16, |
1421 | 1421 | "metadata": {}, |
1422 | 1422 | "outputs": [ |
1423 | 1423 | { |
|
1426 | 1426 | "array([0, 0, 4, ..., 0, 0, 1], dtype=uint32)" |
1427 | 1427 | ] |
1428 | 1428 | }, |
1429 | | - "execution_count": 15, |
| 1429 | + "execution_count": 16, |
1430 | 1430 | "metadata": {}, |
1431 | 1431 | "output_type": "execute_result" |
1432 | 1432 | } |
|
1439 | 1439 | }, |
1440 | 1440 | { |
1441 | 1441 | "cell_type": "code", |
1442 | | - "execution_count": 16, |
| 1442 | + "execution_count": 17, |
1443 | 1443 | "metadata": {}, |
1444 | 1444 | "outputs": [ |
1445 | 1445 | { |
|
1618 | 1618 | "[1443 rows x 7 columns]" |
1619 | 1619 | ] |
1620 | 1620 | }, |
1621 | | - "execution_count": 16, |
| 1621 | + "execution_count": 17, |
1622 | 1622 | "metadata": {}, |
1623 | 1623 | "output_type": "execute_result" |
1624 | 1624 | } |
|
1642 | 1642 | }, |
1643 | 1643 | { |
1644 | 1644 | "cell_type": "code", |
1645 | | - "execution_count": 17, |
| 1645 | + "execution_count": 18, |
1646 | 1646 | "metadata": {}, |
1647 | 1647 | "outputs": [ |
1648 | 1648 | { |
|
1752 | 1752 | "4 0 0 0 " |
1753 | 1753 | ] |
1754 | 1754 | }, |
1755 | | - "execution_count": 17, |
| 1755 | + "execution_count": 18, |
1756 | 1756 | "metadata": {}, |
1757 | 1757 | "output_type": "execute_result" |
1758 | 1758 | } |
|
1776 | 1776 | }, |
1777 | 1777 | { |
1778 | 1778 | "cell_type": "code", |
1779 | | - "execution_count": 18, |
| 1779 | + "execution_count": 19, |
1780 | 1780 | "metadata": {}, |
1781 | 1781 | "outputs": [], |
1782 | 1782 | "source": [ |
|
1787 | 1787 | }, |
1788 | 1788 | { |
1789 | 1789 | "cell_type": "code", |
1790 | | - "execution_count": 19, |
| 1790 | + "execution_count": 20, |
1791 | 1791 | "metadata": {}, |
1792 | 1792 | "outputs": [ |
1793 | 1793 | { |
|
1842 | 1842 | }, |
1843 | 1843 | { |
1844 | 1844 | "cell_type": "code", |
1845 | | - "execution_count": 20, |
| 1845 | + "execution_count": 21, |
1846 | 1846 | "metadata": {}, |
1847 | 1847 | "outputs": [ |
1848 | 1848 | { |
|
1906 | 1906 | }, |
1907 | 1907 | { |
1908 | 1908 | "cell_type": "code", |
1909 | | - "execution_count": 33, |
| 1909 | + "execution_count": 22, |
1910 | 1910 | "metadata": { |
1911 | 1911 | "vscode": { |
1912 | 1912 | "languageId": "ruby" |
|
1970 | 1970 | }, |
1971 | 1971 | { |
1972 | 1972 | "cell_type": "code", |
1973 | | - "execution_count": 32, |
| 1973 | + "execution_count": 23, |
1974 | 1974 | "metadata": { |
1975 | 1975 | "vscode": { |
1976 | 1976 | "languageId": "ruby" |
|
0 commit comments