Skip to content

Commit f1ef15c

Browse files
Merge pull request #35 from NSAPH-Data-Processing/download_shapefiles
code to download country shapefiles from geoboundaries
2 parents 05ce577 + 5c05cd3 commit f1ef15c

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

requirements.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ dependencies:
1616
- snakemake==8.1.2
1717
- wget==3.2
1818
- ipykernel==6.29.4
19+
- pycountry
1920

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import requests
2+
import json
3+
import os
4+
import pycountry
5+
from datetime import datetime
6+
import geopandas as gpd
7+
from shapely import wkt
8+
import argparse
9+
10+
11+
def get_global_shapefiles(output_dir):
12+
# Set directories
13+
out_dir = output_dir
14+
os.makedirs(out_dir, exist_ok=True)
15+
16+
# Get the year and month
17+
date = datetime.today().strftime('%Y%m')
18+
19+
# Define the baseline URL for the API
20+
URL = "https://www.geoboundaries.org/api/current/"
21+
22+
# Release type
23+
RELEASE_TYPE = "gbOpen/"
24+
25+
# Boundary types
26+
BOUNDARY_TYPE = ["/ADM0/", "/ADM1/", "/ADM2/", "/ADM3/", "/ADM4/", "/ADM5/"]
27+
28+
with open(f"logfile_jsonfiles_{date}.txt", "w") as log_file:
29+
for i, c in enumerate(pycountry.countries):
30+
log_file.write("---------------------------------------------------------------------\n")
31+
ISO_CODE3 = c.alpha_3
32+
log_file.write(f"Beginning download of {c.name} — ISO Code {ISO_CODE3}{i + 1} of {len(pycountry.countries)}\n")
33+
34+
for j, boundary in enumerate(BOUNDARY_TYPE):
35+
dl_link = f"{URL}{RELEASE_TYPE}{ISO_CODE3}{boundary}"
36+
dl_file = f"{out_dir}{ISO_CODE3}_{boundary.strip('/')}_{date}.txt"
37+
38+
if os.path.exists(dl_file):
39+
log_file.write("File already downloaded. Proceeding to next. \n")
40+
else:
41+
try:
42+
response = requests.get(dl_link)
43+
response.raise_for_status()
44+
with open(dl_file, "wb") as file:
45+
file.write(response.content)
46+
log_file.write(":) file downloaded successfully \n")
47+
except requests.RequestException as e:
48+
log_file.write(f"ERROR! {ISO_CODE3} {boundary} file did not download successfully \n")
49+
50+
# Read in the JSON files to download the actual geometry features
51+
all_json = [f for f in os.listdir(out_dir) if f.endswith(f"{date}.txt")]
52+
unique_country_json = list(set(f[:3] for f in all_json))
53+
54+
with open(f"logfile_shapefiles_{date}.txt", "w") as log_file:
55+
for i, country_code in enumerate(unique_country_json):
56+
log_file.write("----------------------------------------------------------------------\n")
57+
log_file.write(f"Processing country {i + 1} of {len(unique_country_json)} - {country_code}\n")
58+
59+
files = [f for f in all_json if f.startswith(country_code)]
60+
all_admin_levels = sorted(set(int(f[7]) for f in files))
61+
log_file.write(f".....Highest admin. level: {max(all_admin_levels)}\n")
62+
log_file.write(f".....Total levels: {len(all_admin_levels)}\n")
63+
64+
for outlevel in all_admin_levels:
65+
file = [f for f in files if f.startswith(country_code+'_ADM'+str(outlevel))][0]
66+
67+
with open(os.path.join(out_dir, file)) as json_file:
68+
input_json = json.load(json_file)
69+
70+
year_boundary = input_json['boundaryYearRepresented']
71+
admin_level = input_json['boundaryType'][3]
72+
73+
response = requests.get(input_json['gjDownloadURL'])
74+
input_geojson = gpd.read_file(response.text)
75+
76+
input_geojson = input_geojson.apply(lambda x: wkt.loads(x) if isinstance(x, str) else x)
77+
input_geojson.rename(columns={c: c+"_"+admin_level for c in input_geojson.columns if c not in ['geometry']}, inplace=True)
78+
79+
input_geojson[f"ABYear_{admin_level}"] = year_boundary
80+
final = input_geojson.drop(columns=[col for col in input_geojson.columns if 'shapeGroup' in col])
81+
82+
outfilename = file.replace(".txt", ".gpkg")
83+
final.to_file(os.path.join(out_dir, outfilename), driver='GPKG')
84+
85+
86+
if __name__ == "__main__":
87+
parser = argparse.ArgumentParser(description="Plot multiple shapefiles")
88+
parser.add_argument("output_dir", help="Output directory")
89+
args = parser.parse_args()
90+
91+
get_global_shapefiles(args.output_dir)

0 commit comments

Comments
 (0)