|
597 | 597 | }, |
598 | 598 | { |
599 | 599 | "cell_type": "code", |
600 | | - "execution_count": 6, |
| 600 | + "execution_count": 5, |
601 | 601 | "metadata": {}, |
602 | 602 | "outputs": [ |
603 | 603 | { |
|
633 | 633 | " <tbody>\n", |
634 | 634 | " <tr>\n", |
635 | 635 | " <th>0</th>\n", |
636 | | - " <td>1.0</td>\n", |
637 | | - " <td>1.0</td>\n", |
638 | | - " <td>5.0</td>\n", |
639 | | - " <td>0.0</td>\n", |
640 | | - " <td>0.0</td>\n", |
641 | | - " <td>0.0</td>\n", |
642 | | - " <td>0.0</td>\n", |
| 636 | + " <td>1</td>\n", |
| 637 | + " <td>1</td>\n", |
| 638 | + " <td>5</td>\n", |
| 639 | + " <td>0</td>\n", |
| 640 | + " <td>0</td>\n", |
| 641 | + " <td>0</td>\n", |
| 642 | + " <td>0</td>\n", |
643 | 643 | " </tr>\n", |
644 | 644 | " <tr>\n", |
645 | 645 | " <th>1</th>\n", |
646 | | - " <td>0.0</td>\n", |
647 | | - " <td>1.0</td>\n", |
648 | | - " <td>0.0</td>\n", |
649 | | - " <td>0.0</td>\n", |
650 | | - " <td>1.0</td>\n", |
651 | | - " <td>0.0</td>\n", |
652 | | - " <td>0.0</td>\n", |
| 646 | + " <td>0</td>\n", |
| 647 | + " <td>1</td>\n", |
| 648 | + " <td>0</td>\n", |
| 649 | + " <td>0</td>\n", |
| 650 | + " <td>1</td>\n", |
| 651 | + " <td>0</td>\n", |
| 652 | + " <td>0</td>\n", |
653 | 653 | " </tr>\n", |
654 | 654 | " <tr>\n", |
655 | 655 | " <th>2</th>\n", |
656 | | - " <td>2.0</td>\n", |
657 | | - " <td>1.0</td>\n", |
658 | | - " <td>0.0</td>\n", |
659 | | - " <td>0.0</td>\n", |
660 | | - " <td>1.0</td>\n", |
661 | | - " <td>0.0</td>\n", |
662 | | - " <td>0.0</td>\n", |
| 656 | + " <td>2</td>\n", |
| 657 | + " <td>1</td>\n", |
| 658 | + " <td>0</td>\n", |
| 659 | + " <td>0</td>\n", |
| 660 | + " <td>1</td>\n", |
| 661 | + " <td>0</td>\n", |
| 662 | + " <td>0</td>\n", |
663 | 663 | " </tr>\n", |
664 | 664 | " <tr>\n", |
665 | 665 | " <th>3</th>\n", |
666 | | - " <td>2.0</td>\n", |
667 | | - " <td>1.0</td>\n", |
668 | | - " <td>0.0</td>\n", |
669 | | - " <td>0.0</td>\n", |
670 | | - " <td>0.0</td>\n", |
671 | | - " <td>1.0</td>\n", |
672 | | - " <td>1.0</td>\n", |
| 666 | + " <td>2</td>\n", |
| 667 | + " <td>1</td>\n", |
| 668 | + " <td>0</td>\n", |
| 669 | + " <td>0</td>\n", |
| 670 | + " <td>0</td>\n", |
| 671 | + " <td>1</td>\n", |
| 672 | + " <td>1</td>\n", |
673 | 673 | " </tr>\n", |
674 | 674 | " <tr>\n", |
675 | 675 | " <th>4</th>\n", |
676 | | - " <td>0.0</td>\n", |
677 | | - " <td>1.0</td>\n", |
678 | | - " <td>5.0</td>\n", |
679 | | - " <td>0.0</td>\n", |
680 | | - " <td>0.0</td>\n", |
681 | | - " <td>0.0</td>\n", |
682 | | - " <td>0.0</td>\n", |
| 676 | + " <td>0</td>\n", |
| 677 | + " <td>1</td>\n", |
| 678 | + " <td>5</td>\n", |
| 679 | + " <td>0</td>\n", |
| 680 | + " <td>0</td>\n", |
| 681 | + " <td>0</td>\n", |
| 682 | + " <td>0</td>\n", |
683 | 683 | " </tr>\n", |
684 | 684 | " </tbody>\n", |
685 | 685 | "</table>\n", |
686 | 686 | "</div>" |
687 | 687 | ], |
688 | 688 | "text/plain": [ |
689 | 689 | " age_cat sex race c_charge_degree is_recid score_text false_positive\n", |
690 | | - "0 1.0 1.0 5.0 0.0 0.0 0.0 0.0\n", |
691 | | - "1 0.0 1.0 0.0 0.0 1.0 0.0 0.0\n", |
692 | | - "2 2.0 1.0 0.0 0.0 1.0 0.0 0.0\n", |
693 | | - "3 2.0 1.0 0.0 0.0 0.0 1.0 1.0\n", |
694 | | - "4 0.0 1.0 5.0 0.0 0.0 0.0 0.0" |
| 690 | + "0 1 1 5 0 0 0 0\n", |
| 691 | + "1 0 1 0 0 1 0 0\n", |
| 692 | + "2 2 1 0 0 1 0 0\n", |
| 693 | + "3 2 1 0 0 0 1 1\n", |
| 694 | + "4 0 1 5 0 0 0 0" |
695 | 695 | ] |
696 | 696 | }, |
697 | | - "execution_count": 6, |
| 697 | + "execution_count": 5, |
698 | 698 | "metadata": {}, |
699 | 699 | "output_type": "execute_result" |
700 | 700 | } |
701 | 701 | ], |
702 | 702 | "source": [ |
703 | 703 | "# Apply OrdinalEncoder to the categorical columns\n", |
704 | 704 | "encoder = OrdinalEncoder()\n", |
705 | | - "filtered_df[filtered_df.columns] = encoder.fit_transform(filtered_df)\n", |
| 705 | + "filtered_df[filtered_df.columns] = encoder.fit_transform(filtered_df).astype(\"uint32\")\n", |
706 | 706 | "\n", |
707 | 707 | "# Display the transformed DataFrame\n", |
708 | 708 | "filtered_df.head()" |
709 | 709 | ] |
710 | 710 | }, |
711 | 711 | { |
712 | 712 | "cell_type": "code", |
713 | | - "execution_count": 7, |
| 713 | + "execution_count": 6, |
714 | 714 | "metadata": {}, |
715 | 715 | "outputs": [ |
716 | 716 | { |
717 | 717 | "data": { |
718 | 718 | "text/plain": [ |
719 | | - "age_cat float64\n", |
720 | | - "sex float64\n", |
721 | | - "race float64\n", |
722 | | - "c_charge_degree float64\n", |
723 | | - "is_recid float64\n", |
724 | | - "score_text float64\n", |
725 | | - "false_positive float64\n", |
| 719 | + "age_cat uint32\n", |
| 720 | + "sex uint32\n", |
| 721 | + "race uint32\n", |
| 722 | + "c_charge_degree uint32\n", |
| 723 | + "is_recid uint32\n", |
| 724 | + "score_text uint32\n", |
| 725 | + "false_positive uint32\n", |
726 | 726 | "dtype: object" |
727 | 727 | ] |
728 | 728 | }, |
729 | | - "execution_count": 7, |
| 729 | + "execution_count": 6, |
730 | 730 | "metadata": {}, |
731 | 731 | "output_type": "execute_result" |
732 | 732 | } |
|
737 | 737 | }, |
738 | 738 | { |
739 | 739 | "cell_type": "code", |
740 | | - "execution_count": null, |
| 740 | + "execution_count": 7, |
741 | 741 | "metadata": {}, |
742 | 742 | "outputs": [ |
743 | 743 | { |
|
767 | 767 | }, |
768 | 768 | { |
769 | 769 | "cell_type": "code", |
770 | | - "execution_count": 9, |
| 770 | + "execution_count": 8, |
771 | 771 | "metadata": {}, |
772 | 772 | "outputs": [ |
773 | 773 | { |
|
787 | 787 | }, |
788 | 788 | { |
789 | 789 | "cell_type": "code", |
790 | | - "execution_count": 10, |
| 790 | + "execution_count": 9, |
791 | 791 | "metadata": {}, |
792 | 792 | "outputs": [ |
793 | 793 | { |
|
845 | 845 | }, |
846 | 846 | { |
847 | 847 | "cell_type": "code", |
848 | | - "execution_count": null, |
| 848 | + "execution_count": 10, |
849 | 849 | "metadata": {}, |
850 | 850 | "outputs": [ |
851 | 851 | { |
|
896 | 896 | }, |
897 | 897 | { |
898 | 898 | "cell_type": "code", |
899 | | - "execution_count": 12, |
| 899 | + "execution_count": 11, |
900 | 900 | "metadata": {}, |
901 | 901 | "outputs": [ |
902 | 902 | { |
|
1331 | 1331 | "BiasAwareHierarchicalKModes(bahc_max_iter=20, bahc_min_cluster_size=57.71)" |
1332 | 1332 | ] |
1333 | 1333 | }, |
1334 | | - "execution_count": 12, |
| 1334 | + "execution_count": 11, |
1335 | 1335 | "metadata": {}, |
1336 | 1336 | "output_type": "execute_result" |
1337 | 1337 | } |
|
1344 | 1344 | }, |
1345 | 1345 | { |
1346 | 1346 | "cell_type": "code", |
1347 | | - "execution_count": 13, |
| 1347 | + "execution_count": 12, |
1348 | 1348 | "metadata": {}, |
1349 | 1349 | "outputs": [ |
1350 | 1350 | { |
|
1353 | 1353 | "5" |
1354 | 1354 | ] |
1355 | 1355 | }, |
1356 | | - "execution_count": 13, |
| 1356 | + "execution_count": 12, |
1357 | 1357 | "metadata": {}, |
1358 | 1358 | "output_type": "execute_result" |
1359 | 1359 | } |
|
1365 | 1365 | }, |
1366 | 1366 | { |
1367 | 1367 | "cell_type": "code", |
1368 | | - "execution_count": 14, |
| 1368 | + "execution_count": 13, |
1369 | 1369 | "metadata": {}, |
1370 | 1370 | "outputs": [ |
1371 | 1371 | { |
|
1374 | 1374 | "array([ 0.05290941, -0.02457072, -0.04827102, -0.04888889, -0.05093596])" |
1375 | 1375 | ] |
1376 | 1376 | }, |
1377 | | - "execution_count": 14, |
| 1377 | + "execution_count": 13, |
1378 | 1378 | "metadata": {}, |
1379 | 1379 | "output_type": "execute_result" |
1380 | 1380 | } |
|
1386 | 1386 | }, |
1387 | 1387 | { |
1388 | 1388 | "cell_type": "code", |
1389 | | - "execution_count": 15, |
| 1389 | + "execution_count": 14, |
1390 | 1390 | "metadata": {}, |
1391 | 1391 | "outputs": [ |
1392 | 1392 | { |
|
1417 | 1417 | }, |
1418 | 1418 | { |
1419 | 1419 | "cell_type": "code", |
1420 | | - "execution_count": 16, |
| 1420 | + "execution_count": 15, |
1421 | 1421 | "metadata": {}, |
1422 | 1422 | "outputs": [ |
1423 | 1423 | { |
|
1426 | 1426 | "array([0, 0, 4, ..., 0, 0, 1], dtype=uint32)" |
1427 | 1427 | ] |
1428 | 1428 | }, |
1429 | | - "execution_count": 16, |
| 1429 | + "execution_count": 15, |
1430 | 1430 | "metadata": {}, |
1431 | 1431 | "output_type": "execute_result" |
1432 | 1432 | } |
|
1439 | 1439 | }, |
1440 | 1440 | { |
1441 | 1441 | "cell_type": "code", |
1442 | | - "execution_count": 17, |
| 1442 | + "execution_count": 16, |
1443 | 1443 | "metadata": {}, |
1444 | 1444 | "outputs": [ |
1445 | 1445 | { |
|
1618 | 1618 | "[1443 rows x 7 columns]" |
1619 | 1619 | ] |
1620 | 1620 | }, |
1621 | | - "execution_count": 17, |
| 1621 | + "execution_count": 16, |
1622 | 1622 | "metadata": {}, |
1623 | 1623 | "output_type": "execute_result" |
1624 | 1624 | } |
|
1642 | 1642 | }, |
1643 | 1643 | { |
1644 | 1644 | "cell_type": "code", |
1645 | | - "execution_count": 18, |
| 1645 | + "execution_count": 17, |
1646 | 1646 | "metadata": {}, |
1647 | 1647 | "outputs": [ |
1648 | 1648 | { |
|
1752 | 1752 | "4 0 0 0 " |
1753 | 1753 | ] |
1754 | 1754 | }, |
1755 | | - "execution_count": 18, |
| 1755 | + "execution_count": 17, |
1756 | 1756 | "metadata": {}, |
1757 | 1757 | "output_type": "execute_result" |
1758 | 1758 | } |
|
1776 | 1776 | }, |
1777 | 1777 | { |
1778 | 1778 | "cell_type": "code", |
1779 | | - "execution_count": 19, |
| 1779 | + "execution_count": 18, |
1780 | 1780 | "metadata": {}, |
1781 | 1781 | "outputs": [], |
1782 | 1782 | "source": [ |
|
1787 | 1787 | }, |
1788 | 1788 | { |
1789 | 1789 | "cell_type": "code", |
1790 | | - "execution_count": 20, |
| 1790 | + "execution_count": 19, |
1791 | 1791 | "metadata": {}, |
1792 | 1792 | "outputs": [ |
1793 | 1793 | { |
|
1842 | 1842 | }, |
1843 | 1843 | { |
1844 | 1844 | "cell_type": "code", |
1845 | | - "execution_count": 21, |
| 1845 | + "execution_count": 20, |
1846 | 1846 | "metadata": {}, |
1847 | 1847 | "outputs": [ |
1848 | 1848 | { |
|
1906 | 1906 | }, |
1907 | 1907 | { |
1908 | 1908 | "cell_type": "code", |
1909 | | - "execution_count": 22, |
| 1909 | + "execution_count": 21, |
1910 | 1910 | "metadata": { |
1911 | 1911 | "vscode": { |
1912 | 1912 | "languageId": "ruby" |
|
1970 | 1970 | }, |
1971 | 1971 | { |
1972 | 1972 | "cell_type": "code", |
1973 | | - "execution_count": 23, |
| 1973 | + "execution_count": 22, |
1974 | 1974 | "metadata": { |
1975 | 1975 | "vscode": { |
1976 | 1976 | "languageId": "ruby" |
|
0 commit comments