Skip to content

Commit d5b07e7

Browse files
gilmourjaudiracmichelle
authored andcommitted
code to download country shapefiles from geoboundaries
1 parent 05ce577 commit d5b07e7

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import requests
2+
import json
3+
import os
4+
import pycountry
5+
from datetime import datetime
6+
import geopandas as gpd
7+
from shapely import wkt
8+
import argparse
9+
10+
11+
def get_global_shapefiles(output_dir):
12+
# Set directories
13+
out_dir = output_dir
14+
os.makedirs(out_dir, exist_ok=True)
15+
16+
# Get the year and month
17+
date = datetime.today().strftime('%Y%m')
18+
19+
# Define the baseline URL for the API
20+
URL = "https://www.geoboundaries.org/api/current/"
21+
22+
# Release type
23+
RELEASE_TYPE = "gbOpen/"
24+
25+
# Boundary types
26+
BOUNDARY_TYPE = ["/ADM0/", "/ADM1/", "/ADM2/", "/ADM3/", "/ADM4/", "/ADM5/"]
27+
28+
# Get the country ISO codes
29+
# allISOcodes = [country.alpha_3 for country in list(pycountry.countries)[1:3]]
30+
countries_test = list(pycountry.countries)[1:3]
31+
32+
with open(f"logfile_jsonfiles_{date}.txt", "w") as log_file:
33+
for i, c in enumerate(countries_test):
34+
log_file.write("---------------------------------------------------------------------\n")
35+
#find on ISO code
36+
ISO_CODE3 = c.alpha_3
37+
log_file.write(f"Beginning download of {c.name} — ISO Code {ISO_CODE3}{i + 1} of {len(countries_test)}\n")
38+
39+
for j, boundary in enumerate(BOUNDARY_TYPE):
40+
dl_link = f"{URL}{RELEASE_TYPE}{ISO_CODE3}{boundary}"
41+
dl_file = f"{out_dir}{ISO_CODE3}_{boundary.strip('/')}_{date}.txt"
42+
43+
if os.path.exists(dl_file):
44+
log_file.write("File already downloaded. Proceeding to next. \n")
45+
else:
46+
try:
47+
response = requests.get(dl_link)
48+
response.raise_for_status()
49+
with open(dl_file, "wb") as file:
50+
file.write(response.content)
51+
log_file.write(":) file downloaded successfully \n")
52+
except requests.RequestException as e:
53+
log_file.write(f"ERROR! {ISO_CODE3} {boundary} file did not download successfully \n")
54+
55+
# Read in the JSON files to download the actual geometry features
56+
all_json = [f for f in os.listdir(out_dir) if f.endswith(f"{date}.txt")]
57+
unique_country_json = list(set(f[:3] for f in all_json))
58+
59+
with open(f"logfile_shapefiles_{date}.txt", "w") as log_file:
60+
for i, country_code in enumerate(unique_country_json):
61+
log_file.write("----------------------------------------------------------------------\n")
62+
log_file.write(f"Processing country {i + 1} of {len(unique_country_json)} - {country_code}\n")
63+
64+
files = [f for f in all_json if f.startswith(country_code)]
65+
all_admin_levels = sorted(set(int(f[7]) for f in files))
66+
log_file.write(f".....Highest admin. level: {max(all_admin_levels)}\n")
67+
log_file.write(f".....Total levels: {len(all_admin_levels)}\n")
68+
69+
for outlevel in all_admin_levels:
70+
file = [f for f in files if f.startswith(country_code+'_ADM'+str(outlevel))][0]
71+
72+
with open(os.path.join(out_dir, file)) as json_file:
73+
input_json = json.load(json_file)
74+
75+
year_boundary = input_json['boundaryYearRepresented']
76+
admin_level = input_json['boundaryType'][3]
77+
78+
response = requests.get(input_json['gjDownloadURL'])
79+
input_geojson = gpd.read_file(response.text)
80+
81+
input_geojson = input_geojson.apply(lambda x: wkt.loads(x) if isinstance(x, str) else x)
82+
input_geojson.rename(columns={c: c+"_"+admin_level for c in input_geojson.columns if c not in ['geometry']}, inplace=True)
83+
84+
input_geojson[f"ABYear_{admin_level}"] = year_boundary
85+
final = input_geojson.drop(columns=[col for col in input_geojson.columns if 'shapeGroup' in col])
86+
87+
outfilename = file.replace(".txt", ".gpkg")
88+
final.to_file(os.path.join(out_dir, outfilename), driver='GPKG')
89+
90+
91+
if __name__ == "__main__":
92+
parser = argparse.ArgumentParser(description="Plot multiple shapefiles")
93+
parser.add_argument("output_dir", help="Output directory")
94+
args = parser.parse_args()
95+
96+
get_global_shapefiles(args.output_dir)

0 commit comments

Comments
 (0)