|
553 | 553 | }, |
554 | 554 | { |
555 | 555 | "cell_type": "code", |
556 | | - "execution_count": 14, |
| 556 | + "execution_count": 11, |
557 | 557 | "metadata": {}, |
558 | | - "outputs": [ |
559 | | - { |
560 | | - "ename": "ValueError", |
561 | | - "evalue": "All objects passed were None", |
562 | | - "output_type": "error", |
563 | | - "traceback": [ |
564 | | - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
565 | | - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", |
566 | | - "Cell \u001b[0;32mIn[14], line 22\u001b[0m\n\u001b[1;32m 20\u001b[0m encoders[col] \u001b[38;5;241m=\u001b[39m encoder\n\u001b[1;32m 21\u001b[0m data\u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m[col], inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 22\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformed_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumerical\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 25\u001b[0m scaler \u001b[38;5;241m=\u001b[39m StandardScaler(with_mean\u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, with_std\u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m)\n", |
567 | | - "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m \u001b[43m_Concatenator\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverify_integrity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n", |
568 | | - "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[0;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[0;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_clean_keys_and_objs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[1;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n", |
569 | | - "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:541\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[0;34m(self, objs, keys)\u001b[0m\n\u001b[1;32m 538\u001b[0m keys \u001b[38;5;241m=\u001b[39m Index(clean_keys, name\u001b[38;5;241m=\u001b[39mname, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(keys, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 541\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll objects passed were None\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m objs_list, keys\n", |
570 | | - "\u001b[0;31mValueError\u001b[0m: All objects passed were None" |
571 | | - ] |
572 | | - } |
573 | | - ], |
| 558 | + "outputs": [], |
574 | 559 | "source": [ |
575 | 560 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler\n", |
576 | 561 | "\n", |
|
593 | 578 | " transformed_data = _encode_categorical(data[col], encoder)\n", |
594 | 579 | " encoders[col] = encoder\n", |
595 | 580 | " data.drop(columns=[col], inplace=True)\n", |
596 | | - " data = pd.concat([data.reset_index(drop=True, inplace=True), transformed_data.reset_index(drop=True, inplace=True)], axis=1)\n", |
| 581 | + " data = pd.concat([data.reset_index(drop=True), transformed_data.reset_index(drop=True)], axis=1)\n", |
597 | 582 | "\n", |
598 | 583 | " elif dtype == \"numerical\":\n", |
599 | 584 | " scaler = StandardScaler(with_mean= False, with_std= False)\n", |
600 | 585 | " data[col] = scaler.fit_transform(data[[col]])\n", |
601 | | - " scalers[col] = scaler\n" |
| 586 | + " scalers[col] = scaler" |
602 | 587 | ] |
603 | 588 | }, |
604 | 589 | { |
605 | 590 | "cell_type": "code", |
606 | | - "execution_count": 13, |
| 591 | + "execution_count": 12, |
607 | 592 | "metadata": {}, |
608 | 593 | "outputs": [ |
609 | 594 | { |
610 | 595 | "name": "stdout", |
611 | 596 | "output_type": "stream", |
612 | 597 | "text": [ |
613 | 598 | "Missing values in data:\n", |
614 | | - "ugpa 20\n", |
615 | | - "sex 20\n", |
616 | | - "race1 0\n", |
617 | | - "bar 0\n", |
| 599 | + "ugpa 0\n", |
| 600 | + "sex 0\n", |
| 601 | + "race1 0\n", |
| 602 | + "bar 0\n", |
618 | 603 | "dtype: int64\n", |
619 | 604 | "\n", |
620 | | - "Total missing values: 40\n", |
621 | | - "Data shape: (22407, 4)\n" |
| 605 | + "Total missing values: 0\n", |
| 606 | + "Data shape: (22387, 4)\n" |
622 | 607 | ] |
623 | 608 | } |
624 | 609 | ], |
|
631 | 616 | }, |
632 | 617 | { |
633 | 618 | "cell_type": "code", |
634 | | - "execution_count": null, |
| 619 | + "execution_count": 13, |
635 | 620 | "metadata": {}, |
636 | 621 | "outputs": [ |
637 | 622 | { |
|
652 | 637 | }, |
653 | 638 | { |
654 | 639 | "cell_type": "code", |
655 | | - "execution_count": null, |
| 640 | + "execution_count": 14, |
656 | 641 | "metadata": {}, |
657 | 642 | "outputs": [ |
658 | 643 | { |
|
678 | 663 | }, |
679 | 664 | { |
680 | 665 | "cell_type": "code", |
681 | | - "execution_count": null, |
| 666 | + "execution_count": 15, |
682 | 667 | "metadata": {}, |
683 | 668 | "outputs": [ |
684 | 669 | { |
|
0 commit comments