|
35 | 35 | "source": [ |
36 | 36 | "import pandas as pd\n", |
37 | 37 | "\n", |
38 | | - "james_bond_data = pd.read_csv(\"james_bond_data.csv\").convert_dtypes()\n", |
39 | | - " " |
| 38 | + "james_bond_data = pd.read_csv(\"james_bond_data.csv\").convert_dtypes()" |
40 | 39 | ] |
41 | 40 | }, |
42 | 41 | { |
|
134 | 133 | "source": [ |
135 | 134 | "import pandas as pd\n", |
136 | 135 | "\n", |
137 | | - "james_bond_data = pd.read_parquet(\n", |
138 | | - " \"james_bond_data.parquet\"\n", |
139 | | - ").convert_dtypes()\n", |
| 136 | + "james_bond_data = pd.read_parquet(\"james_bond_data.parquet\").convert_dtypes()\n", |
140 | 137 | "\n", |
141 | 138 | "james_bond_data" |
142 | 139 | ] |
|
261 | 258 | "outputs": [], |
262 | 259 | "source": [ |
263 | 260 | "data = james_bond_data.rename(columns=new_column_names).combine_first(\n", |
264 | | - " pd.DataFrame(\n", |
265 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
266 | | - " )\n", |
| 261 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
267 | 262 | ")" |
268 | 263 | ] |
269 | 264 | }, |
|
292 | 287 | "metadata": {}, |
293 | 288 | "outputs": [], |
294 | 289 | "source": [ |
295 | | - "data[\n", |
296 | | - " [\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]\n", |
297 | | - "].head()" |
| 290 | + "data[[\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]].head()" |
298 | 291 | ] |
299 | 292 | }, |
300 | 293 | { |
|
307 | 300 | "data = (\n", |
308 | 301 | " james_bond_data.rename(columns=new_column_names)\n", |
309 | 302 | " .combine_first(\n", |
310 | | - " pd.DataFrame(\n", |
311 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
312 | | - " )\n", |
| 303 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
313 | 304 | " )\n", |
314 | 305 | " .assign(\n", |
315 | 306 | " income_usa=lambda data: (\n", |
|
331 | 322 | "data = (\n", |
332 | 323 | " james_bond_data.rename(columns=new_column_names)\n", |
333 | 324 | " .combine_first(\n", |
334 | | - " pd.DataFrame(\n", |
335 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
336 | | - " )\n", |
| 325 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
337 | 326 | " )\n", |
338 | 327 | " .assign(\n", |
339 | 328 | " income_usa=lambda data: (\n", |
|
373 | 362 | "data = (\n", |
374 | 363 | " james_bond_data.rename(columns=new_column_names)\n", |
375 | 364 | " .combine_first(\n", |
376 | | - " pd.DataFrame(\n", |
377 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
378 | | - " )\n", |
| 365 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
379 | 366 | " )\n", |
380 | 367 | " .assign(\n", |
381 | 368 | " income_usa=lambda data: (\n", |
|
394 | 381 | " .astype(\"Float64\")\n", |
395 | 382 | " ),\n", |
396 | 383 | " film_length=lambda data: (\n", |
397 | | - " data[\"film_length\"]\n", |
398 | | - " .str.removesuffix(\"mins\")\n", |
399 | | - " .astype(\"Int64\")\n", |
| 384 | + " data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n", |
400 | 385 | " ),\n", |
401 | 386 | " )\n", |
402 | 387 | ")" |
|
409 | 394 | "metadata": {}, |
410 | 395 | "outputs": [], |
411 | 396 | "source": [ |
412 | | - "data[\n", |
413 | | - " [\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]\n", |
414 | | - "].info()" |
| 397 | + "data[[\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]].info()" |
415 | 398 | ] |
416 | 399 | }, |
417 | 400 | { |
|
421 | 404 | "metadata": {}, |
422 | 405 | "outputs": [], |
423 | 406 | "source": [ |
424 | | - "data[\n", |
425 | | - " [\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]\n", |
426 | | - "].head()" |
| 407 | + "data[[\"income_usa\", \"income_world\", \"movie_budget\", \"film_length\"]].head()" |
427 | 408 | ] |
428 | 409 | }, |
429 | 410 | { |
|
456 | 437 | "data = (\n", |
457 | 438 | " james_bond_data.rename(columns=new_column_names)\n", |
458 | 439 | " .combine_first(\n", |
459 | | - " pd.DataFrame(\n", |
460 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
461 | | - " )\n", |
| 440 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
462 | 441 | " )\n", |
463 | 442 | " .assign(\n", |
464 | 443 | " income_usa=lambda data: (\n", |
|
477 | 456 | " .astype(\"Float64\")\n", |
478 | 457 | " ),\n", |
479 | 458 | " film_length=lambda data: (\n", |
480 | | - " data[\"film_length\"]\n", |
481 | | - " .str.removesuffix(\"mins\")\n", |
482 | | - " .astype(\"Int64\")\n", |
| 459 | + " data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n", |
483 | 460 | " ),\n", |
484 | 461 | " release_date=lambda data: pd.to_datetime(\n", |
485 | 462 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
486 | 463 | " ),\n", |
487 | | - " release_year=lambda data: data[\"release_date\"]\n", |
488 | | - " .dt.year\n", |
489 | | - " .astype(\"Int64\"),\n", |
| 464 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
490 | 465 | " )\n", |
491 | 466 | ")" |
492 | 467 | ] |
|
549 | 524 | "data = (\n", |
550 | 525 | " james_bond_data.rename(columns=new_column_names)\n", |
551 | 526 | " .combine_first(\n", |
552 | | - " pd.DataFrame(\n", |
553 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
554 | | - " )\n", |
| 527 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
555 | 528 | " )\n", |
556 | 529 | " .assign(\n", |
557 | 530 | " income_usa=lambda data: (\n", |
|
571 | 544 | " * 1000\n", |
572 | 545 | " ),\n", |
573 | 546 | " film_length=lambda data: (\n", |
574 | | - " data[\"film_length\"]\n", |
575 | | - " .str.removesuffix(\"mins\")\n", |
576 | | - " .astype(\"Int64\")\n", |
| 547 | + " data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n", |
577 | 548 | " ),\n", |
578 | 549 | " release_date=lambda data: pd.to_datetime(\n", |
579 | 550 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
580 | 551 | " ),\n", |
581 | | - " release_year=lambda data: data[\"release_date\"]\n", |
582 | | - " .dt.year\n", |
583 | | - " .astype(\"Int64\"),\n", |
| 552 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
584 | 553 | " )\n", |
585 | 554 | ")" |
586 | 555 | ] |
|
623 | 592 | "data = (\n", |
624 | 593 | " james_bond_data.rename(columns=new_column_names)\n", |
625 | 594 | " .combine_first(\n", |
626 | | - " pd.DataFrame(\n", |
627 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
628 | | - " )\n", |
| 595 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
629 | 596 | " )\n", |
630 | 597 | " .assign(\n", |
631 | 598 | " income_usa=lambda data: (\n", |
|
645 | 612 | " * 1000\n", |
646 | 613 | " ),\n", |
647 | 614 | " film_length=lambda data: (\n", |
648 | | - " data[\"film_length\"]\n", |
649 | | - " .str.removesuffix(\"mins\")\n", |
650 | | - " .astype(\"Int64\")\n", |
| 615 | + " data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n", |
651 | 616 | " ),\n", |
652 | 617 | " release_date=lambda data: pd.to_datetime(\n", |
653 | 618 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
654 | 619 | " ),\n", |
655 | | - " release_year=lambda data: data[\"release_date\"]\n", |
656 | | - " .dt.year\n", |
657 | | - " .astype(\"Int64\"),\n", |
| 620 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
658 | 621 | " bond_actor=lambda data: (\n", |
659 | 622 | " data[\"bond_actor\"]\n", |
660 | 623 | " .str.replace(\"Shawn\", \"Sean\")\n", |
|
694 | 657 | "data = (\n", |
695 | 658 | " james_bond_data.rename(columns=new_column_names)\n", |
696 | 659 | " .combine_first(\n", |
697 | | - " pd.DataFrame(\n", |
698 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
699 | | - " )\n", |
| 660 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
700 | 661 | " )\n", |
701 | 662 | " .assign(\n", |
702 | 663 | " income_usa=lambda data: (\n", |
|
716 | 677 | " * 1000\n", |
717 | 678 | " ),\n", |
718 | 679 | " film_length=lambda data: (\n", |
719 | | - " data[\"film_length\"]\n", |
720 | | - " .str.removesuffix(\"mins\")\n", |
721 | | - " .astype(\"Int64\")\n", |
| 680 | + " data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n", |
722 | 681 | " ),\n", |
723 | 682 | " release_date=lambda data: pd.to_datetime(\n", |
724 | 683 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
725 | 684 | " ),\n", |
726 | | - " release_year=lambda data: data[\"release_date\"]\n", |
727 | | - " .dt.year\n", |
728 | | - " .astype(\"Int64\"),\n", |
| 685 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
729 | 686 | " bond_actor=lambda data: (\n", |
730 | 687 | " data[\"bond_actor\"]\n", |
731 | 688 | " .str.replace(\"Shawn\", \"Sean\")\n", |
732 | 689 | " .str.replace(\"MOORE\", \"Moore\")\n", |
733 | 690 | " ),\n", |
734 | | - " car_manufacturer=lambda data: data[\"car_manufacturer\"]\n", |
735 | | - " .str.replace(\"Astin\", \"Aston\"),\n", |
| 691 | + " car_manufacturer=lambda data: data[\"car_manufacturer\"].str.replace(\n", |
| 692 | + " \"Astin\", \"Aston\"\n", |
| 693 | + " ),\n", |
736 | 694 | " )\n", |
737 | 695 | ")" |
738 | 696 | ] |
|
775 | 733 | "data = (\n", |
776 | 734 | " james_bond_data.rename(columns=new_column_names)\n", |
777 | 735 | " .combine_first(\n", |
778 | | - " pd.DataFrame(\n", |
779 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
780 | | - " )\n", |
| 736 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
781 | 737 | " )\n", |
782 | 738 | " .assign(\n", |
783 | 739 | " income_usa=lambda data: (\n", |
|
791 | 747 | " .astype(\"Float64\")\n", |
792 | 748 | " ),\n", |
793 | 749 | " movie_budget=lambda data: (\n", |
794 | | - " data[\"movie_budget\"].replace(\"[$,]\", \"\", regex=True)\n", |
| 750 | + " data[\"movie_budget\"]\n", |
| 751 | + " .replace(\"[$,]\", \"\", regex=True)\n", |
795 | 752 | " .astype(\"Float64\")\n", |
796 | 753 | " * 1000\n", |
797 | 754 | " ),\n", |
|
804 | 761 | " release_date=lambda data: pd.to_datetime(\n", |
805 | 762 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
806 | 763 | " ),\n", |
807 | | - " release_year=lambda data: data[\"release_date\"]\n", |
808 | | - " .dt.year\n", |
809 | | - " .astype(\"Int64\"),\n", |
| 764 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
810 | 765 | " bond_actor=lambda data: (\n", |
811 | 766 | " data[\"bond_actor\"]\n", |
812 | 767 | " .str.replace(\"Shawn\", \"Sean\")\n", |
813 | 768 | " .str.replace(\"MOORE\", \"Moore\")\n", |
814 | 769 | " ),\n", |
815 | | - " car_manufacturer=lambda data: data[\"car_manufacturer\"]\n", |
816 | | - " .str.replace(\"Astin\", \"Aston\"),\n", |
817 | | - " martinis_consumed=lambda data: data[\"martinis_consumed\"]\n", |
818 | | - " .replace(-6, 6),\n", |
| 770 | + " car_manufacturer=lambda data: data[\"car_manufacturer\"].str.replace(\n", |
| 771 | + " \"Astin\", \"Aston\"\n", |
| 772 | + " ),\n", |
| 773 | + " martinis_consumed=lambda data: data[\"martinis_consumed\"].replace(\n", |
| 774 | + " -6, 6\n", |
| 775 | + " ),\n", |
819 | 776 | " )\n", |
820 | 777 | ")" |
821 | 778 | ] |
|
858 | 815 | "data = (\n", |
859 | 816 | " james_bond_data.rename(columns=new_column_names)\n", |
860 | 817 | " .combine_first(\n", |
861 | | - " pd.DataFrame(\n", |
862 | | - " {\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}}\n", |
863 | | - " )\n", |
| 818 | + " pd.DataFrame({\"imdb\": {10: 7.1}, \"rotten_tomatoes\": {10: 6.8}})\n", |
864 | 819 | " )\n", |
865 | 820 | " .assign(\n", |
866 | 821 | " income_usa=lambda data: (\n", |
|
888 | 843 | " release_date=lambda data: pd.to_datetime(\n", |
889 | 844 | " data[\"release_date\"], format=\"%B, %Y\"\n", |
890 | 845 | " ),\n", |
891 | | - " release_year=lambda data: data[\"release_date\"]\n", |
892 | | - " .dt.year.astype(\"Int64\"),\n", |
| 846 | + " release_year=lambda data: data[\"release_date\"].dt.year.astype(\"Int64\"),\n", |
893 | 847 | " bond_actor=lambda data: (\n", |
894 | 848 | " data[\"bond_actor\"]\n", |
895 | 849 | " .str.replace(\"Shawn\", \"Sean\")\n", |
896 | 850 | " .str.replace(\"MOORE\", \"Moore\")\n", |
897 | 851 | " ),\n", |
898 | | - " car_manufacturer=lambda data: data[\"car_manufacturer\"]\n", |
899 | | - " .str.replace(\"Astin\", \"Aston\"),\n", |
900 | | - " martinis_consumed=lambda data: data[\"martinis_consumed\"]\n", |
901 | | - " .replace(-6, 6),\n", |
| 852 | + " car_manufacturer=lambda data: data[\"car_manufacturer\"].str.replace(\n", |
| 853 | + " \"Astin\", \"Aston\"\n", |
| 854 | + " ),\n", |
| 855 | + " martinis_consumed=lambda data: data[\"martinis_consumed\"].replace(\n", |
| 856 | + " -6, 6\n", |
| 857 | + " ),\n", |
902 | 858 | " )\n", |
903 | 859 | " .drop_duplicates(ignore_index=True)\n", |
904 | 860 | ")" |
|
931 | 887 | "metadata": {}, |
932 | 888 | "outputs": [], |
933 | 889 | "source": [ |
934 | | - " data[\"bond_actor\"].value_counts()" |
| 890 | + "data[\"bond_actor\"].value_counts()" |
935 | 891 | ] |
936 | 892 | }, |
937 | 893 | { |
|
1085 | 1041 | "ax.set_title(\"Scatter Plot of Kills vs Ratings\")\n", |
1086 | 1042 | "ax.set_xlabel(\"Average IMDb Rating\")\n", |
1087 | 1043 | "ax.set_ylabel(\"Kills by Bond\")\n", |
1088 | | - "#fig.show()" |
| 1044 | + "# fig.show()" |
1089 | 1045 | ] |
1090 | 1046 | } |
1091 | 1047 | ], |
|
0 commit comments