|
783 | 783 | }, |
784 | 784 | { |
785 | 785 | "cell_type": "code", |
786 | | - "execution_count": 19, |
| 786 | + "execution_count": 1, |
787 | 787 | "id": "327695ae-b376-4d5b-9293-6e89ff272790", |
788 | 788 | "metadata": {}, |
789 | | - "outputs": [], |
| 789 | + "outputs": [ |
| 790 | + { |
| 791 | + "ename": "NameError", |
| 792 | + "evalue": "name 'html' is not defined", |
| 793 | + "output_type": "error", |
| 794 | + "traceback": [ |
| 795 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 796 | + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
| 797 | + "Cell \u001b[0;32mIn [1], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbs4\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BeautifulSoup\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Create soup\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m soup \u001b[38;5;241m=\u001b[39m BeautifulSoup(\u001b[43mhtml\u001b[49m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhtml.parser\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", |
| 798 | + "\u001b[0;31mNameError\u001b[0m: name 'html' is not defined" |
| 799 | + ] |
| 800 | + } |
| 801 | + ], |
790 | 802 | "source": [ |
791 | 803 | "# Import beautiful soup module\n", |
792 | 804 | "from bs4 import BeautifulSoup\n", |
|
887 | 899 | }, |
888 | 900 | { |
889 | 901 | "cell_type": "code", |
890 | | - "execution_count": 23, |
| 902 | + "execution_count": 5, |
891 | 903 | "id": "80a8dcd1-ba04-45de-840e-aa014c19a75c", |
892 | 904 | "metadata": {}, |
893 | | - "outputs": [], |
| 905 | + "outputs": [ |
| 906 | + { |
| 907 | + "name": "stdout", |
| 908 | + "output_type": "stream", |
| 909 | + "text": [ |
| 910 | + "Make request 0\n" |
| 911 | + ] |
| 912 | + }, |
| 913 | + { |
| 914 | + "ename": "NameError", |
| 915 | + "evalue": "name 'requests' is not defined", |
| 916 | + "output_type": "error", |
| 917 | + "traceback": [ |
| 918 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 919 | + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
| 920 | + "Cell \u001b[0;32mIn [5], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMake request \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# Make a request\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241m.\u001b[39mget(url)\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# Decode to JSON\u001b[39;00m\n\u001b[1;32m 18\u001b[0m response_json \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n", |
| 921 | + "\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined" |
| 922 | + ] |
| 923 | + } |
| 924 | + ], |
894 | 925 | "source": [ |
895 | 926 | "# Import\n", |
| 927 | + "import requests\n", |
896 | 928 | "import jsonlines\n", |
897 | 929 | "import time\n", |
898 | 930 | "\n", |
899 | | - "\n", |
900 | 931 | "# URL\n", |
901 | 932 | "url = 'https://catfact.ninja/fact'\n", |
902 | 933 | "\n", |
903 | 934 | "# Make three requests in loop and make a list of response JSON objects\n", |
904 | | - "response_jsons = []\n", |
905 | 935 | "for i in range(3):\n", |
906 | 936 | "\n", |
907 | 937 | " # Logging\n", |
|
912 | 942 | " \n", |
913 | 943 | " # Decode to JSON\n", |
914 | 944 | " response_json = response.json()\n", |
| 945 | + " \n", |
| 946 | + " # Open a jsonlines writer in 'append' mode \n", |
| 947 | + " with jsonlines.open('catfacts.jsonl', mode='a') as writer:\n", |
| 948 | + "\n", |
| 949 | + " # Write\n", |
| 950 | + " writer.write(response_json)\n", |
915 | 951 | " \n", |
916 | | - " # Append to list\n", |
917 | | - " response_jsons.append(response_json)\n", |
918 | | - " \n", |
919 | 952 | " # Sleep for one second between requests\n", |
920 | | - " time.sleep(1)\n", |
921 | | - " \n", |
922 | | - "# Open a jsonlines writer\n", |
923 | | - "with jsonlines.open('catfacts.jsonl', mode='w') as writer:\n", |
924 | | - "\n", |
925 | | - " # Write\n", |
926 | | - " writer.write(response_jsons)" |
| 953 | + " time.sleep(1)\n" |
927 | 954 | ] |
928 | 955 | }, |
929 | 956 | { |
|
936 | 963 | }, |
937 | 964 | { |
938 | 965 | "cell_type": "code", |
939 | | - "execution_count": 23, |
| 966 | + "execution_count": 4, |
940 | 967 | "id": "2d424c23-bdf3-47f7-a643-4312a07c955a", |
941 | 968 | "metadata": {}, |
942 | 969 | "outputs": [ |
943 | 970 | { |
944 | | - "name": "stdout", |
945 | | - "output_type": "stream", |
946 | | - "text": [ |
947 | | - "Make request 0\n", |
948 | | - "Make request 1\n", |
949 | | - "Make request 2\n" |
950 | | - ] |
| 971 | + "data": { |
| 972 | + "text/plain": [ |
| 973 | + "{'fact': 'In the 1930s, two Russian biologists discovered that color change in Siamese kittens depend on their body temperature. Siamese cats carry albino genes that work only when the body temperature is above 98° F. If these kittens are left in a very warm room, their points won’t darken and they will stay a creamy white.',\n", |
| 974 | + " 'length': 315}" |
| 975 | + ] |
| 976 | + }, |
| 977 | + "metadata": {}, |
| 978 | + "output_type": "display_data" |
951 | 979 | }, |
952 | 980 | { |
953 | 981 | "data": { |
954 | 982 | "text/plain": [ |
955 | | - "[{'fact': 'A cat has 230 bones in its body. A human has 206. A cat has no collarbone, so it can fit through any opening the size of its head.',\n", |
956 | | - " 'length': 130},\n", |
957 | | - " {'fact': 'The most popular pedigreed cat is the Persian cat, followed by the Main Coon cat and the Siamese cat.',\n", |
958 | | - " 'length': 101},\n", |
959 | | - " {'fact': 'The average cat food meal is the equivalent to about five mice.',\n", |
960 | | - " 'length': 63}]" |
| 983 | + "{'fact': \"The Maine Coon cat is America's only natural breed of domestic feline. It is 4 to 5 times larger than the Singapura, the smallest breed of cat.\",\n", |
| 984 | + " 'length': 143}" |
| 985 | + ] |
| 986 | + }, |
| 987 | + "metadata": {}, |
| 988 | + "output_type": "display_data" |
| 989 | + }, |
| 990 | + { |
| 991 | + "data": { |
| 992 | + "text/plain": [ |
| 993 | + "{'fact': \"Cats must have fat in their diet because they can't produce it on their own.\",\n", |
| 994 | + " 'length': 76}" |
961 | 995 | ] |
962 | 996 | }, |
963 | 997 | "metadata": {}, |
|
968 | 1002 | "# Open a jsonlines reader\n", |
969 | 1003 | "with jsonlines.open('catfacts.jsonl', mode='r') as reader:\n", |
970 | 1004 | " \n", |
971 | | - " # Read\n", |
972 | | - " jsons_from_file = reader.read()\n", |
973 | | - " \n", |
974 | | - "# Test that the written and read object lists are the same\n", |
975 | | - "assert response_jsons == jsons_from_file\n", |
976 | | - "\n", |
977 | | - "# Display the JSON objects read from file\n", |
978 | | - "display(jsons_from_file)" |
| 1005 | + " # Read and display\n", |
| 1006 | + " for obj in reader:\n", |
| 1007 | + " display(obj)" |
979 | 1008 | ] |
980 | 1009 | }, |
981 | 1010 | { |
|
0 commit comments