Skip to content

Commit 6b24a5d

Browse files
committed
Add categories example to tutorial 6 #160
1 parent c9435f1 commit 6b24a5d

File tree

1 file changed

+212
-6
lines changed

1 file changed

+212
-6
lines changed

tutorials/06 - Amazon Athena.ipynb

Lines changed: 212 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
},
4646
{
4747
"cell_type": "code",
48-
"execution_count": 2,
48+
"execution_count": 3,
4949
"metadata": {},
5050
"outputs": [
5151
{
@@ -73,7 +73,7 @@
7373
},
7474
{
7575
"cell_type": "code",
76-
"execution_count": 3,
76+
"execution_count": 4,
7777
"metadata": {},
7878
"outputs": [
7979
{
@@ -251,7 +251,7 @@
251251
"[29240017 rows x 8 columns]"
252252
]
253253
},
254-
"execution_count": 3,
254+
"execution_count": 4,
255255
"metadata": {},
256256
"output_type": "execute_result"
257257
}
@@ -269,7 +269,7 @@
269269
},
270270
{
271271
"cell_type": "code",
272-
"execution_count": 4,
272+
"execution_count": 5,
273273
"metadata": {},
274274
"outputs": [],
275275
"source": [
@@ -285,7 +285,7 @@
285285
},
286286
{
287287
"cell_type": "code",
288-
"execution_count": 5,
288+
"execution_count": 6,
289289
"metadata": {},
290290
"outputs": [
291291
{
@@ -388,7 +388,7 @@
388388
"7 obs_time string False "
389389
]
390390
},
391-
"execution_count": 5,
391+
"execution_count": 6,
392392
"metadata": {},
393393
"output_type": "execute_result"
394394
}
@@ -809,6 +809,212 @@
809809
"wr.athena.read_sql_query(\"SELECT * FROM noaa\", database=\"awswrangler_test\", ctas_approach=False)"
810810
]
811811
},
812+
{
813+
"cell_type": "markdown",
814+
"metadata": {},
815+
"source": [
816+
"## Using categories to speed up and save memory!"
817+
]
818+
},
819+
{
820+
"cell_type": "code",
821+
"execution_count": 7,
822+
"metadata": {},
823+
"outputs": [
824+
{
825+
"name": "stdout",
826+
"output_type": "stream",
827+
"text": [
828+
"CPU times: user 3.84 s, sys: 2.01 s, total: 5.85 s\n",
829+
"Wall time: 30.2 s\n"
830+
]
831+
},
832+
{
833+
"data": {
834+
"text/html": [
835+
"<div>\n",
836+
"<style scoped>\n",
837+
" .dataframe tbody tr th:only-of-type {\n",
838+
" vertical-align: middle;\n",
839+
" }\n",
840+
"\n",
841+
" .dataframe tbody tr th {\n",
842+
" vertical-align: top;\n",
843+
" }\n",
844+
"\n",
845+
" .dataframe thead th {\n",
846+
" text-align: right;\n",
847+
" }\n",
848+
"</style>\n",
849+
"<table border=\"1\" class=\"dataframe\">\n",
850+
" <thead>\n",
851+
" <tr style=\"text-align: right;\">\n",
852+
" <th></th>\n",
853+
" <th>id</th>\n",
854+
" <th>dt</th>\n",
855+
" <th>element</th>\n",
856+
" <th>value</th>\n",
857+
" <th>m_flag</th>\n",
858+
" <th>q_flag</th>\n",
859+
" <th>s_flag</th>\n",
860+
" <th>obs_time</th>\n",
861+
" </tr>\n",
862+
" </thead>\n",
863+
" <tbody>\n",
864+
" <tr>\n",
865+
" <th>0</th>\n",
866+
" <td>SF001465880</td>\n",
867+
" <td>1890-01-02</td>\n",
868+
" <td>PRCP</td>\n",
869+
" <td>0</td>\n",
870+
" <td>NaN</td>\n",
871+
" <td>NaN</td>\n",
872+
" <td>I</td>\n",
873+
" <td>NaN</td>\n",
874+
" </tr>\n",
875+
" <tr>\n",
876+
" <th>1</th>\n",
877+
" <td>ASN00074068</td>\n",
878+
" <td>1890-01-02</td>\n",
879+
" <td>PRCP</td>\n",
880+
" <td>0</td>\n",
881+
" <td>NaN</td>\n",
882+
" <td>NaN</td>\n",
883+
" <td>a</td>\n",
884+
" <td>NaN</td>\n",
885+
" </tr>\n",
886+
" <tr>\n",
887+
" <th>2</th>\n",
888+
" <td>ASN00083029</td>\n",
889+
" <td>1890-01-02</td>\n",
890+
" <td>PRCP</td>\n",
891+
" <td>25</td>\n",
892+
" <td>NaN</td>\n",
893+
" <td>NaN</td>\n",
894+
" <td>a</td>\n",
895+
" <td>NaN</td>\n",
896+
" </tr>\n",
897+
" <tr>\n",
898+
" <th>3</th>\n",
899+
" <td>ASN00064021</td>\n",
900+
" <td>1890-01-02</td>\n",
901+
" <td>PRCP</td>\n",
902+
" <td>0</td>\n",
903+
" <td>NaN</td>\n",
904+
" <td>NaN</td>\n",
905+
" <td>a</td>\n",
906+
" <td>NaN</td>\n",
907+
" </tr>\n",
908+
" <tr>\n",
909+
" <th>4</th>\n",
910+
" <td>ASN00077022</td>\n",
911+
" <td>1890-01-02</td>\n",
912+
" <td>PRCP</td>\n",
913+
" <td>0</td>\n",
914+
" <td>NaN</td>\n",
915+
" <td>NaN</td>\n",
916+
" <td>a</td>\n",
917+
" <td>NaN</td>\n",
918+
" </tr>\n",
919+
" <tr>\n",
920+
" <th>...</th>\n",
921+
" <td>...</td>\n",
922+
" <td>...</td>\n",
923+
" <td>...</td>\n",
924+
" <td>...</td>\n",
925+
" <td>...</td>\n",
926+
" <td>...</td>\n",
927+
" <td>...</td>\n",
928+
" <td>...</td>\n",
929+
" </tr>\n",
930+
" <tr>\n",
931+
" <th>29240012</th>\n",
932+
" <td>USC00395481</td>\n",
933+
" <td>1899-12-31</td>\n",
934+
" <td>SNOW</td>\n",
935+
" <td>0</td>\n",
936+
" <td>NaN</td>\n",
937+
" <td>NaN</td>\n",
938+
" <td>6</td>\n",
939+
" <td>NaN</td>\n",
940+
" </tr>\n",
941+
" <tr>\n",
942+
" <th>29240013</th>\n",
943+
" <td>ASN00063055</td>\n",
944+
" <td>1899-12-31</td>\n",
945+
" <td>PRCP</td>\n",
946+
" <td>0</td>\n",
947+
" <td>NaN</td>\n",
948+
" <td>NaN</td>\n",
949+
" <td>a</td>\n",
950+
" <td>NaN</td>\n",
951+
" </tr>\n",
952+
" <tr>\n",
953+
" <th>29240014</th>\n",
954+
" <td>USC00357814</td>\n",
955+
" <td>1899-12-31</td>\n",
956+
" <td>TMAX</td>\n",
957+
" <td>78</td>\n",
958+
" <td>NaN</td>\n",
959+
" <td>NaN</td>\n",
960+
" <td>6</td>\n",
961+
" <td>NaN</td>\n",
962+
" </tr>\n",
963+
" <tr>\n",
964+
" <th>29240015</th>\n",
965+
" <td>USC00357814</td>\n",
966+
" <td>1899-12-31</td>\n",
967+
" <td>TMIN</td>\n",
968+
" <td>0</td>\n",
969+
" <td>NaN</td>\n",
970+
" <td>NaN</td>\n",
971+
" <td>6</td>\n",
972+
" <td>NaN</td>\n",
973+
" </tr>\n",
974+
" <tr>\n",
975+
" <th>29240016</th>\n",
976+
" <td>USC00357814</td>\n",
977+
" <td>1899-12-31</td>\n",
978+
" <td>PRCP</td>\n",
979+
" <td>102</td>\n",
980+
" <td>NaN</td>\n",
981+
" <td>NaN</td>\n",
982+
" <td>6</td>\n",
983+
" <td>NaN</td>\n",
984+
" </tr>\n",
985+
" </tbody>\n",
986+
"</table>\n",
987+
"<p>29240017 rows × 8 columns</p>\n",
988+
"</div>"
989+
],
990+
"text/plain": [
991+
" id dt element value m_flag q_flag s_flag obs_time\n",
992+
"0 SF001465880 1890-01-02 PRCP 0 NaN NaN I NaN\n",
993+
"1 ASN00074068 1890-01-02 PRCP 0 NaN NaN a NaN\n",
994+
"2 ASN00083029 1890-01-02 PRCP 25 NaN NaN a NaN\n",
995+
"3 ASN00064021 1890-01-02 PRCP 0 NaN NaN a NaN\n",
996+
"4 ASN00077022 1890-01-02 PRCP 0 NaN NaN a NaN\n",
997+
"... ... ... ... ... ... ... ... ...\n",
998+
"29240012 USC00395481 1899-12-31 SNOW 0 NaN NaN 6 NaN\n",
999+
"29240013 ASN00063055 1899-12-31 PRCP 0 NaN NaN a NaN\n",
1000+
"29240014 USC00357814 1899-12-31 TMAX 78 NaN NaN 6 NaN\n",
1001+
"29240015 USC00357814 1899-12-31 TMIN 0 NaN NaN 6 NaN\n",
1002+
"29240016 USC00357814 1899-12-31 PRCP 102 NaN NaN 6 NaN\n",
1003+
"\n",
1004+
"[29240017 rows x 8 columns]"
1005+
]
1006+
},
1007+
"execution_count": 7,
1008+
"metadata": {},
1009+
"output_type": "execute_result"
1010+
}
1011+
],
1012+
"source": [
1013+
"%%time\n",
1014+
"\n",
1015+
"wr.athena.read_sql_query(\"SELECT * FROM noaa\", database=\"awswrangler_test\", categories=[\"id\", \"dt\", \"element\", \"value\", \"m_flag\", \"q_flag\", \"s_flag\", \"obs_time\"])"
1016+
]
1017+
},
8121018
{
8131019
"cell_type": "markdown",
8141020
"metadata": {},

0 commit comments

Comments
 (0)