|
45 | 45 | }, |
46 | 46 | { |
47 | 47 | "cell_type": "code", |
48 | | - "execution_count": 2, |
| 48 | + "execution_count": 3, |
49 | 49 | "metadata": {}, |
50 | 50 | "outputs": [ |
51 | 51 | { |
|
73 | 73 | }, |
74 | 74 | { |
75 | 75 | "cell_type": "code", |
76 | | - "execution_count": 3, |
| 76 | + "execution_count": 4, |
77 | 77 | "metadata": {}, |
78 | 78 | "outputs": [ |
79 | 79 | { |
|
251 | 251 | "[29240017 rows x 8 columns]" |
252 | 252 | ] |
253 | 253 | }, |
254 | | - "execution_count": 3, |
| 254 | + "execution_count": 4, |
255 | 255 | "metadata": {}, |
256 | 256 | "output_type": "execute_result" |
257 | 257 | } |
|
269 | 269 | }, |
270 | 270 | { |
271 | 271 | "cell_type": "code", |
272 | | - "execution_count": 4, |
| 272 | + "execution_count": 5, |
273 | 273 | "metadata": {}, |
274 | 274 | "outputs": [], |
275 | 275 | "source": [ |
|
285 | 285 | }, |
286 | 286 | { |
287 | 287 | "cell_type": "code", |
288 | | - "execution_count": 5, |
| 288 | + "execution_count": 6, |
289 | 289 | "metadata": {}, |
290 | 290 | "outputs": [ |
291 | 291 | { |
|
388 | 388 | "7 obs_time string False " |
389 | 389 | ] |
390 | 390 | }, |
391 | | - "execution_count": 5, |
| 391 | + "execution_count": 6, |
392 | 392 | "metadata": {}, |
393 | 393 | "output_type": "execute_result" |
394 | 394 | } |
|
809 | 809 | "wr.athena.read_sql_query(\"SELECT * FROM noaa\", database=\"awswrangler_test\", ctas_approach=False)" |
810 | 810 | ] |
811 | 811 | }, |
| 812 | + { |
| 813 | + "cell_type": "markdown", |
| 814 | + "metadata": {}, |
| 815 | + "source": [ |
| 816 | + "## Using categories to speed up and save memory!" |
| 817 | + ] |
| 818 | + }, |
| 819 | + { |
| 820 | + "cell_type": "code", |
| 821 | + "execution_count": 7, |
| 822 | + "metadata": {}, |
| 823 | + "outputs": [ |
| 824 | + { |
| 825 | + "name": "stdout", |
| 826 | + "output_type": "stream", |
| 827 | + "text": [ |
| 828 | + "CPU times: user 3.84 s, sys: 2.01 s, total: 5.85 s\n", |
| 829 | + "Wall time: 30.2 s\n" |
| 830 | + ] |
| 831 | + }, |
| 832 | + { |
| 833 | + "data": { |
| 834 | + "text/html": [ |
| 835 | + "<div>\n", |
| 836 | + "<style scoped>\n", |
| 837 | + " .dataframe tbody tr th:only-of-type {\n", |
| 838 | + " vertical-align: middle;\n", |
| 839 | + " }\n", |
| 840 | + "\n", |
| 841 | + " .dataframe tbody tr th {\n", |
| 842 | + " vertical-align: top;\n", |
| 843 | + " }\n", |
| 844 | + "\n", |
| 845 | + " .dataframe thead th {\n", |
| 846 | + " text-align: right;\n", |
| 847 | + " }\n", |
| 848 | + "</style>\n", |
| 849 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 850 | + " <thead>\n", |
| 851 | + " <tr style=\"text-align: right;\">\n", |
| 852 | + " <th></th>\n", |
| 853 | + " <th>id</th>\n", |
| 854 | + " <th>dt</th>\n", |
| 855 | + " <th>element</th>\n", |
| 856 | + " <th>value</th>\n", |
| 857 | + " <th>m_flag</th>\n", |
| 858 | + " <th>q_flag</th>\n", |
| 859 | + " <th>s_flag</th>\n", |
| 860 | + " <th>obs_time</th>\n", |
| 861 | + " </tr>\n", |
| 862 | + " </thead>\n", |
| 863 | + " <tbody>\n", |
| 864 | + " <tr>\n", |
| 865 | + " <th>0</th>\n", |
| 866 | + " <td>SF001465880</td>\n", |
| 867 | + " <td>1890-01-02</td>\n", |
| 868 | + " <td>PRCP</td>\n", |
| 869 | + " <td>0</td>\n", |
| 870 | + " <td>NaN</td>\n", |
| 871 | + " <td>NaN</td>\n", |
| 872 | + " <td>I</td>\n", |
| 873 | + " <td>NaN</td>\n", |
| 874 | + " </tr>\n", |
| 875 | + " <tr>\n", |
| 876 | + " <th>1</th>\n", |
| 877 | + " <td>ASN00074068</td>\n", |
| 878 | + " <td>1890-01-02</td>\n", |
| 879 | + " <td>PRCP</td>\n", |
| 880 | + " <td>0</td>\n", |
| 881 | + " <td>NaN</td>\n", |
| 882 | + " <td>NaN</td>\n", |
| 883 | + " <td>a</td>\n", |
| 884 | + " <td>NaN</td>\n", |
| 885 | + " </tr>\n", |
| 886 | + " <tr>\n", |
| 887 | + " <th>2</th>\n", |
| 888 | + " <td>ASN00083029</td>\n", |
| 889 | + " <td>1890-01-02</td>\n", |
| 890 | + " <td>PRCP</td>\n", |
| 891 | + " <td>25</td>\n", |
| 892 | + " <td>NaN</td>\n", |
| 893 | + " <td>NaN</td>\n", |
| 894 | + " <td>a</td>\n", |
| 895 | + " <td>NaN</td>\n", |
| 896 | + " </tr>\n", |
| 897 | + " <tr>\n", |
| 898 | + " <th>3</th>\n", |
| 899 | + " <td>ASN00064021</td>\n", |
| 900 | + " <td>1890-01-02</td>\n", |
| 901 | + " <td>PRCP</td>\n", |
| 902 | + " <td>0</td>\n", |
| 903 | + " <td>NaN</td>\n", |
| 904 | + " <td>NaN</td>\n", |
| 905 | + " <td>a</td>\n", |
| 906 | + " <td>NaN</td>\n", |
| 907 | + " </tr>\n", |
| 908 | + " <tr>\n", |
| 909 | + " <th>4</th>\n", |
| 910 | + " <td>ASN00077022</td>\n", |
| 911 | + " <td>1890-01-02</td>\n", |
| 912 | + " <td>PRCP</td>\n", |
| 913 | + " <td>0</td>\n", |
| 914 | + " <td>NaN</td>\n", |
| 915 | + " <td>NaN</td>\n", |
| 916 | + " <td>a</td>\n", |
| 917 | + " <td>NaN</td>\n", |
| 918 | + " </tr>\n", |
| 919 | + " <tr>\n", |
| 920 | + " <th>...</th>\n", |
| 921 | + " <td>...</td>\n", |
| 922 | + " <td>...</td>\n", |
| 923 | + " <td>...</td>\n", |
| 924 | + " <td>...</td>\n", |
| 925 | + " <td>...</td>\n", |
| 926 | + " <td>...</td>\n", |
| 927 | + " <td>...</td>\n", |
| 928 | + " <td>...</td>\n", |
| 929 | + " </tr>\n", |
| 930 | + " <tr>\n", |
| 931 | + " <th>29240012</th>\n", |
| 932 | + " <td>USC00395481</td>\n", |
| 933 | + " <td>1899-12-31</td>\n", |
| 934 | + " <td>SNOW</td>\n", |
| 935 | + " <td>0</td>\n", |
| 936 | + " <td>NaN</td>\n", |
| 937 | + " <td>NaN</td>\n", |
| 938 | + " <td>6</td>\n", |
| 939 | + " <td>NaN</td>\n", |
| 940 | + " </tr>\n", |
| 941 | + " <tr>\n", |
| 942 | + " <th>29240013</th>\n", |
| 943 | + " <td>ASN00063055</td>\n", |
| 944 | + " <td>1899-12-31</td>\n", |
| 945 | + " <td>PRCP</td>\n", |
| 946 | + " <td>0</td>\n", |
| 947 | + " <td>NaN</td>\n", |
| 948 | + " <td>NaN</td>\n", |
| 949 | + " <td>a</td>\n", |
| 950 | + " <td>NaN</td>\n", |
| 951 | + " </tr>\n", |
| 952 | + " <tr>\n", |
| 953 | + " <th>29240014</th>\n", |
| 954 | + " <td>USC00357814</td>\n", |
| 955 | + " <td>1899-12-31</td>\n", |
| 956 | + " <td>TMAX</td>\n", |
| 957 | + " <td>78</td>\n", |
| 958 | + " <td>NaN</td>\n", |
| 959 | + " <td>NaN</td>\n", |
| 960 | + " <td>6</td>\n", |
| 961 | + " <td>NaN</td>\n", |
| 962 | + " </tr>\n", |
| 963 | + " <tr>\n", |
| 964 | + " <th>29240015</th>\n", |
| 965 | + " <td>USC00357814</td>\n", |
| 966 | + " <td>1899-12-31</td>\n", |
| 967 | + " <td>TMIN</td>\n", |
| 968 | + " <td>0</td>\n", |
| 969 | + " <td>NaN</td>\n", |
| 970 | + " <td>NaN</td>\n", |
| 971 | + " <td>6</td>\n", |
| 972 | + " <td>NaN</td>\n", |
| 973 | + " </tr>\n", |
| 974 | + " <tr>\n", |
| 975 | + " <th>29240016</th>\n", |
| 976 | + " <td>USC00357814</td>\n", |
| 977 | + " <td>1899-12-31</td>\n", |
| 978 | + " <td>PRCP</td>\n", |
| 979 | + " <td>102</td>\n", |
| 980 | + " <td>NaN</td>\n", |
| 981 | + " <td>NaN</td>\n", |
| 982 | + " <td>6</td>\n", |
| 983 | + " <td>NaN</td>\n", |
| 984 | + " </tr>\n", |
| 985 | + " </tbody>\n", |
| 986 | + "</table>\n", |
| 987 | + "<p>29240017 rows × 8 columns</p>\n", |
| 988 | + "</div>" |
| 989 | + ], |
| 990 | + "text/plain": [ |
| 991 | + " id dt element value m_flag q_flag s_flag obs_time\n", |
| 992 | + "0 SF001465880 1890-01-02 PRCP 0 NaN NaN I NaN\n", |
| 993 | + "1 ASN00074068 1890-01-02 PRCP 0 NaN NaN a NaN\n", |
| 994 | + "2 ASN00083029 1890-01-02 PRCP 25 NaN NaN a NaN\n", |
| 995 | + "3 ASN00064021 1890-01-02 PRCP 0 NaN NaN a NaN\n", |
| 996 | + "4 ASN00077022 1890-01-02 PRCP 0 NaN NaN a NaN\n", |
| 997 | + "... ... ... ... ... ... ... ... ...\n", |
| 998 | + "29240012 USC00395481 1899-12-31 SNOW 0 NaN NaN 6 NaN\n", |
| 999 | + "29240013 ASN00063055 1899-12-31 PRCP 0 NaN NaN a NaN\n", |
| 1000 | + "29240014 USC00357814 1899-12-31 TMAX 78 NaN NaN 6 NaN\n", |
| 1001 | + "29240015 USC00357814 1899-12-31 TMIN 0 NaN NaN 6 NaN\n", |
| 1002 | + "29240016 USC00357814 1899-12-31 PRCP 102 NaN NaN 6 NaN\n", |
| 1003 | + "\n", |
| 1004 | + "[29240017 rows x 8 columns]" |
| 1005 | + ] |
| 1006 | + }, |
| 1007 | + "execution_count": 7, |
| 1008 | + "metadata": {}, |
| 1009 | + "output_type": "execute_result" |
| 1010 | + } |
| 1011 | + ], |
| 1012 | + "source": [ |
| 1013 | + "%%time\n", |
| 1014 | + "\n", |
| 1015 | + "wr.athena.read_sql_query(\"SELECT * FROM noaa\", database=\"awswrangler_test\", categories=[\"id\", \"dt\", \"element\", \"value\", \"m_flag\", \"q_flag\", \"s_flag\", \"obs_time\"])" |
| 1016 | + ] |
| 1017 | + }, |
812 | 1018 | { |
813 | 1019 | "cell_type": "markdown", |
814 | 1020 | "metadata": {}, |
|
0 commit comments