Skip to content

Commit d28acd1

Browse files
explored global shapefiles
1 parent fa425ba commit d28acd1

File tree

1 file changed

+314
-11
lines changed

1 file changed

+314
-11
lines changed

notes/global_eda.ipynb

Lines changed: 314 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,25 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 6,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
99
"from utils.convert_gpkg_to_shp import convert_gpkg_to_shp\n",
10-
"from utils.plot_shp import plot_shapefiles"
10+
"from utils.plot_shp import plot_shapefiles\n",
11+
"import geopandas as gpd"
1112
]
1213
},
1314
{
1415
"cell_type": "code",
15-
"execution_count": 8,
16+
"execution_count": 2,
1617
"metadata": {},
1718
"outputs": [
1819
{
1920
"name": "stderr",
2021
"output_type": "stream",
2122
"text": [
22-
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle_0/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
23+
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
2324
" layer_gdf.to_file(layer_output_path, driver='ESRI Shapefile')\n"
2425
]
2526
},
@@ -34,7 +35,7 @@
3435
"name": "stderr",
3536
"output_type": "stream",
3637
"text": [
37-
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle_0/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
38+
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
3839
" layer_gdf.to_file(layer_output_path, driver='ESRI Shapefile')\n"
3940
]
4041
},
@@ -49,7 +50,7 @@
4950
"name": "stderr",
5051
"output_type": "stream",
5152
"text": [
52-
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle_0/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
53+
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle/utils/convert_gpkg_to_shp.py:21: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
5354
" layer_gdf.to_file(layer_output_path, driver='ESRI Shapefile')\n"
5455
]
5556
},
@@ -62,20 +63,20 @@
6263
}
6364
],
6465
"source": [
65-
"# temporarily stored the sample files in data/input\n",
66-
"convert_gpkg_to_shp('data/input', 'data/input')"
66+
"# temporarily stored the sample files in data/aux\n",
67+
"convert_gpkg_to_shp('data/aux', 'data/aux')"
6768
]
6869
},
6970
{
7071
"cell_type": "code",
71-
"execution_count": 9,
72+
"execution_count": 4,
7273
"metadata": {},
7374
"outputs": [
7475
{
7576
"name": "stderr",
7677
"output_type": "stream",
7778
"text": [
78-
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle_0/michelle/utils/plot_shp.py:16: UserWarning: Legend does not support handles for PatchCollection instances.\n",
79+
"/Users/mia694/Github/nsaph-data-processing/climate_types_raster2polygon/michelle/utils/plot_shp.py:16: UserWarning: Legend does not support handles for PatchCollection instances.\n",
7980
"See: https://matplotlib.org/stable/tutorials/intermediate/legend_guide.html#implementing-a-custom-legend-handler\n",
8081
" plt.legend()\n",
8182
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
@@ -93,7 +94,309 @@
9394
}
9495
],
9596
"source": [
96-
"plot_shapefiles('data/input')"
97+
"plot_shapefiles('data/aux')"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": 14,
103+
"metadata": {},
104+
"outputs": [
105+
{
106+
"data": {
107+
"text/plain": [
108+
"Index(['shapeID_3', 'shapeName_', 'shapeISO_3', 'shapeType_', 'ABYear_3',\n",
109+
" 'ABType_3', 'shapeID_2', 'shapeNam_1', 'shapeISO_2', 'shapeTyp_1',\n",
110+
" 'ABYear_2', 'ABType_2', 'shapeID_1', 'shapeNam_2', 'shapeISO_1',\n",
111+
" 'shapeTyp_2', 'ABYear_1', 'ABType_1', 'shapeNam_3', 'shapeISO_0',\n",
112+
" 'shapeTyp_3', 'shapeID_0', 'canonicalN', 'ABYear_0', 'geometry'],\n",
113+
" dtype='object')"
114+
]
115+
},
116+
"execution_count": 14,
117+
"metadata": {},
118+
"output_type": "execute_result"
119+
}
120+
],
121+
"source": [
122+
"# Read the shapefile\n",
123+
"shapefile = gpd.read_file('data/aux/CAN_ADM3all_20240716/CAN_ADM3all_20240716.shp')\n",
124+
"shapefile.columns"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 15,
130+
"metadata": {},
131+
"outputs": [
132+
{
133+
"data": {
134+
"text/html": [
135+
"<div>\n",
136+
"<style scoped>\n",
137+
" .dataframe tbody tr th:only-of-type {\n",
138+
" vertical-align: middle;\n",
139+
" }\n",
140+
"\n",
141+
" .dataframe tbody tr th {\n",
142+
" vertical-align: top;\n",
143+
" }\n",
144+
"\n",
145+
" .dataframe thead th {\n",
146+
" text-align: right;\n",
147+
" }\n",
148+
"</style>\n",
149+
"<table border=\"1\" class=\"dataframe\">\n",
150+
" <thead>\n",
151+
" <tr style=\"text-align: right;\">\n",
152+
" <th></th>\n",
153+
" <th>shapeID_3</th>\n",
154+
" <th>shapeName_</th>\n",
155+
" <th>shapeISO_3</th>\n",
156+
" <th>shapeType_</th>\n",
157+
" <th>ABYear_3</th>\n",
158+
" <th>ABType_3</th>\n",
159+
" <th>shapeID_2</th>\n",
160+
" <th>shapeNam_1</th>\n",
161+
" <th>shapeISO_2</th>\n",
162+
" <th>shapeTyp_1</th>\n",
163+
" <th>...</th>\n",
164+
" <th>shapeTyp_2</th>\n",
165+
" <th>ABYear_1</th>\n",
166+
" <th>ABType_1</th>\n",
167+
" <th>shapeNam_3</th>\n",
168+
" <th>shapeISO_0</th>\n",
169+
" <th>shapeTyp_3</th>\n",
170+
" <th>shapeID_0</th>\n",
171+
" <th>canonicalN</th>\n",
172+
" <th>ABYear_0</th>\n",
173+
" <th>geometry</th>\n",
174+
" </tr>\n",
175+
" </thead>\n",
176+
" <tbody>\n",
177+
" <tr>\n",
178+
" <th>0</th>\n",
179+
" <td>43193130B20270165052470</td>\n",
180+
" <td>Alberta Beach</td>\n",
181+
" <td>None</td>\n",
182+
" <td>ADM3</td>\n",
183+
" <td>2016</td>\n",
184+
" <td>Unknown</td>\n",
185+
" <td>811477B52238301703097</td>\n",
186+
" <td>Athabasca--Grande Prairie--Pe*</td>\n",
187+
" <td>None</td>\n",
188+
" <td>ADM2</td>\n",
189+
" <td>...</td>\n",
190+
" <td>ADM1</td>\n",
191+
" <td>2023</td>\n",
192+
" <td>Province</td>\n",
193+
" <td>Canada</td>\n",
194+
" <td>CAN</td>\n",
195+
" <td>ADM0</td>\n",
196+
" <td>50000805B18704692826824</td>\n",
197+
" <td>Unknown</td>\n",
198+
" <td>2020</td>\n",
199+
" <td>POLYGON ((-114.37004 53.66962, -114.36082 53.6...</td>\n",
200+
" </tr>\n",
201+
" <tr>\n",
202+
" <th>1</th>\n",
203+
" <td>43193130B95992773086</td>\n",
204+
" <td>Alexis 133</td>\n",
205+
" <td>None</td>\n",
206+
" <td>ADM3</td>\n",
207+
" <td>2016</td>\n",
208+
" <td>Unknown</td>\n",
209+
" <td>811477B52238301703097</td>\n",
210+
" <td>Athabasca--Grande Prairie--Pe*</td>\n",
211+
" <td>None</td>\n",
212+
" <td>ADM2</td>\n",
213+
" <td>...</td>\n",
214+
" <td>ADM1</td>\n",
215+
" <td>2023</td>\n",
216+
" <td>Province</td>\n",
217+
" <td>Canada</td>\n",
218+
" <td>CAN</td>\n",
219+
" <td>ADM0</td>\n",
220+
" <td>50000805B18704692826824</td>\n",
221+
" <td>Unknown</td>\n",
222+
" <td>2020</td>\n",
223+
" <td>MULTIPOLYGON (((-114.47612 53.73089, -114.4677...</td>\n",
224+
" </tr>\n",
225+
" <tr>\n",
226+
" <th>2</th>\n",
227+
" <td>43193130B95534760768636</td>\n",
228+
" <td>Athabasca</td>\n",
229+
" <td>None</td>\n",
230+
" <td>ADM3</td>\n",
231+
" <td>2016</td>\n",
232+
" <td>Unknown</td>\n",
233+
" <td>811477B52238301703097</td>\n",
234+
" <td>Athabasca--Grande Prairie--Pe*</td>\n",
235+
" <td>None</td>\n",
236+
" <td>ADM2</td>\n",
237+
" <td>...</td>\n",
238+
" <td>ADM1</td>\n",
239+
" <td>2023</td>\n",
240+
" <td>Province</td>\n",
241+
" <td>Canada</td>\n",
242+
" <td>CAN</td>\n",
243+
" <td>ADM0</td>\n",
244+
" <td>50000805B18704692826824</td>\n",
245+
" <td>Unknown</td>\n",
246+
" <td>2020</td>\n",
247+
" <td>POLYGON ((-113.29633 54.74075, -113.28491 54.7...</td>\n",
248+
" </tr>\n",
249+
" <tr>\n",
250+
" <th>3</th>\n",
251+
" <td>43193130B72459799875126</td>\n",
252+
" <td>Athabasca County</td>\n",
253+
" <td>None</td>\n",
254+
" <td>ADM3</td>\n",
255+
" <td>2016</td>\n",
256+
" <td>Unknown</td>\n",
257+
" <td>811477B52238301703097</td>\n",
258+
" <td>Athabasca--Grande Prairie--Pe*</td>\n",
259+
" <td>None</td>\n",
260+
" <td>ADM2</td>\n",
261+
" <td>...</td>\n",
262+
" <td>ADM1</td>\n",
263+
" <td>2023</td>\n",
264+
" <td>Province</td>\n",
265+
" <td>Canada</td>\n",
266+
" <td>CAN</td>\n",
267+
" <td>ADM0</td>\n",
268+
" <td>50000805B18704692826824</td>\n",
269+
" <td>Unknown</td>\n",
270+
" <td>2020</td>\n",
271+
" <td>POLYGON ((-112.44282 54.58736, -112.48257 54.5...</td>\n",
272+
" </tr>\n",
273+
" <tr>\n",
274+
" <th>4</th>\n",
275+
" <td>43193130B81229491876744</td>\n",
276+
" <td>Barrhead</td>\n",
277+
" <td>None</td>\n",
278+
" <td>ADM3</td>\n",
279+
" <td>2016</td>\n",
280+
" <td>Unknown</td>\n",
281+
" <td>811477B52238301703097</td>\n",
282+
" <td>Athabasca--Grande Prairie--Pe*</td>\n",
283+
" <td>None</td>\n",
284+
" <td>ADM2</td>\n",
285+
" <td>...</td>\n",
286+
" <td>ADM1</td>\n",
287+
" <td>2023</td>\n",
288+
" <td>Province</td>\n",
289+
" <td>Canada</td>\n",
290+
" <td>CAN</td>\n",
291+
" <td>ADM0</td>\n",
292+
" <td>50000805B18704692826824</td>\n",
293+
" <td>Unknown</td>\n",
294+
" <td>2020</td>\n",
295+
" <td>POLYGON ((-114.41499 54.13684, -114.41499 54.1...</td>\n",
296+
" </tr>\n",
297+
" </tbody>\n",
298+
"</table>\n",
299+
"<p>5 rows × 25 columns</p>\n",
300+
"</div>"
301+
],
302+
"text/plain": [
303+
" shapeID_3 shapeName_ shapeISO_3 shapeType_ ABYear_3 \\\n",
304+
"0 43193130B20270165052470 Alberta Beach None ADM3 2016 \n",
305+
"1 43193130B95992773086 Alexis 133 None ADM3 2016 \n",
306+
"2 43193130B95534760768636 Athabasca None ADM3 2016 \n",
307+
"3 43193130B72459799875126 Athabasca County None ADM3 2016 \n",
308+
"4 43193130B81229491876744 Barrhead None ADM3 2016 \n",
309+
"\n",
310+
" ABType_3 shapeID_2 shapeNam_1 shapeISO_2 \\\n",
311+
"0 Unknown 811477B52238301703097 Athabasca--Grande Prairie--Pe* None \n",
312+
"1 Unknown 811477B52238301703097 Athabasca--Grande Prairie--Pe* None \n",
313+
"2 Unknown 811477B52238301703097 Athabasca--Grande Prairie--Pe* None \n",
314+
"3 Unknown 811477B52238301703097 Athabasca--Grande Prairie--Pe* None \n",
315+
"4 Unknown 811477B52238301703097 Athabasca--Grande Prairie--Pe* None \n",
316+
"\n",
317+
" shapeTyp_1 ... shapeTyp_2 ABYear_1 ABType_1 shapeNam_3 shapeISO_0 \\\n",
318+
"0 ADM2 ... ADM1 2023 Province Canada CAN \n",
319+
"1 ADM2 ... ADM1 2023 Province Canada CAN \n",
320+
"2 ADM2 ... ADM1 2023 Province Canada CAN \n",
321+
"3 ADM2 ... ADM1 2023 Province Canada CAN \n",
322+
"4 ADM2 ... ADM1 2023 Province Canada CAN \n",
323+
"\n",
324+
" shapeTyp_3 shapeID_0 canonicalN ABYear_0 \\\n",
325+
"0 ADM0 50000805B18704692826824 Unknown 2020 \n",
326+
"1 ADM0 50000805B18704692826824 Unknown 2020 \n",
327+
"2 ADM0 50000805B18704692826824 Unknown 2020 \n",
328+
"3 ADM0 50000805B18704692826824 Unknown 2020 \n",
329+
"4 ADM0 50000805B18704692826824 Unknown 2020 \n",
330+
"\n",
331+
" geometry \n",
332+
"0 POLYGON ((-114.37004 53.66962, -114.36082 53.6... \n",
333+
"1 MULTIPOLYGON (((-114.47612 53.73089, -114.4677... \n",
334+
"2 POLYGON ((-113.29633 54.74075, -113.28491 54.7... \n",
335+
"3 POLYGON ((-112.44282 54.58736, -112.48257 54.5... \n",
336+
"4 POLYGON ((-114.41499 54.13684, -114.41499 54.1... \n",
337+
"\n",
338+
"[5 rows x 25 columns]"
339+
]
340+
},
341+
"execution_count": 15,
342+
"metadata": {},
343+
"output_type": "execute_result"
344+
}
345+
],
346+
"source": [
347+
"shapefile.head()"
348+
]
349+
},
350+
{
351+
"cell_type": "code",
352+
"execution_count": 12,
353+
"metadata": {},
354+
"outputs": [
355+
{
356+
"data": {
357+
"text/plain": [
358+
"Index(['shapeID_2', 'shapeName_', 'shapeISO_2', 'shapeType_', 'ABYear_2',\n",
359+
" 'ABType_2', 'shapeID_1', 'shapeNam_1', 'shapeISO_1', 'shapeTyp_1',\n",
360+
" 'ABYear_1', 'ABType_1', 'shapeNam_2', 'shapeISO_0', 'shapeTyp_2',\n",
361+
" 'shapeID_0', 'canonicalN', 'ABYear_0', 'geometry'],\n",
362+
" dtype='object')"
363+
]
364+
},
365+
"execution_count": 12,
366+
"metadata": {},
367+
"output_type": "execute_result"
368+
}
369+
],
370+
"source": [
371+
"# Read the shapefile\n",
372+
"shapefile = gpd.read_file('data/aux/MEX_ADM2all_20240716/MEX_ADM2all_20240716.shp')\n",
373+
"shapefile.columns"
374+
]
375+
},
376+
{
377+
"cell_type": "code",
378+
"execution_count": 13,
379+
"metadata": {},
380+
"outputs": [
381+
{
382+
"data": {
383+
"text/plain": [
384+
"Index(['shapeID_2', 'shapeName_', 'shapeISO_2', 'shapeType_', 'ABYear_2',\n",
385+
" 'ABType_2', 'shapeID_1', 'shapeNam_1', 'shapeISO_1', 'shapeTyp_1',\n",
386+
" 'ABYear_1', 'ABType_1', 'shapeNam_2', 'shapeISO_0', 'shapeTyp_2',\n",
387+
" 'shapeID_0', 'canonicalN', 'ABYear_0', 'geometry'],\n",
388+
" dtype='object')"
389+
]
390+
},
391+
"execution_count": 13,
392+
"metadata": {},
393+
"output_type": "execute_result"
394+
}
395+
],
396+
"source": [
397+
"# Read the shapefile\n",
398+
"shapefile = gpd.read_file('data/aux/USA_ADM2all_20240716/USA_ADM2all_20240716.shp')\n",
399+
"shapefile.columns"
97400
]
98401
}
99402
],

0 commit comments

Comments
 (0)