1+ import requests
2+ import json
3+ import os
4+ import pycountry
5+ from datetime import datetime
6+ import geopandas as gpd
7+ from shapely import wkt
8+ import argparse
9+
10+
11+ def get_global_shapefiles (output_dir ):
12+ # Set directories
13+ out_dir = output_dir
14+ os .makedirs (out_dir , exist_ok = True )
15+
16+ # Get the year and month
17+ date = datetime .today ().strftime ('%Y%m' )
18+
19+ # Define the baseline URL for the API
20+ URL = "https://www.geoboundaries.org/api/current/"
21+
22+ # Release type
23+ RELEASE_TYPE = "gbOpen/"
24+
25+ # Boundary types
26+ BOUNDARY_TYPE = ["/ADM0/" , "/ADM1/" , "/ADM2/" , "/ADM3/" , "/ADM4/" , "/ADM5/" ]
27+
28+ with open (f"logfile_jsonfiles_{ date } .txt" , "w" ) as log_file :
29+ for i , c in enumerate (pycountry .countries ):
30+ log_file .write ("---------------------------------------------------------------------\n " )
31+ ISO_CODE3 = c .alpha_3
32+ log_file .write (f"Beginning download of { c .name } — ISO Code { ISO_CODE3 } — { i + 1 } of { len (pycountry .countries )} \n " )
33+
34+ for j , boundary in enumerate (BOUNDARY_TYPE ):
35+ dl_link = f"{ URL } { RELEASE_TYPE } { ISO_CODE3 } { boundary } "
36+ dl_file = f"{ out_dir } { ISO_CODE3 } _{ boundary .strip ('/' )} _{ date } .txt"
37+
38+ if os .path .exists (dl_file ):
39+ log_file .write ("File already downloaded. Proceeding to next. \n " )
40+ else :
41+ try :
42+ response = requests .get (dl_link )
43+ response .raise_for_status ()
44+ with open (dl_file , "wb" ) as file :
45+ file .write (response .content )
46+ log_file .write (":) file downloaded successfully \n " )
47+ except requests .RequestException as e :
48+ log_file .write (f"ERROR! { ISO_CODE3 } { boundary } file did not download successfully \n " )
49+
50+ # Read in the JSON files to download the actual geometry features
51+ all_json = [f for f in os .listdir (out_dir ) if f .endswith (f"{ date } .txt" )]
52+ unique_country_json = list (set (f [:3 ] for f in all_json ))
53+
54+ with open (f"logfile_shapefiles_{ date } .txt" , "w" ) as log_file :
55+ for i , country_code in enumerate (unique_country_json ):
56+ log_file .write ("----------------------------------------------------------------------\n " )
57+ log_file .write (f"Processing country { i + 1 } of { len (unique_country_json )} - { country_code } \n " )
58+
59+ files = [f for f in all_json if f .startswith (country_code )]
60+ all_admin_levels = sorted (set (int (f [7 ]) for f in files ))
61+ log_file .write (f".....Highest admin. level: { max (all_admin_levels )} \n " )
62+ log_file .write (f".....Total levels: { len (all_admin_levels )} \n " )
63+
64+ for outlevel in all_admin_levels :
65+ file = [f for f in files if f .startswith (country_code + '_ADM' + str (outlevel ))][0 ]
66+
67+ with open (os .path .join (out_dir , file )) as json_file :
68+ input_json = json .load (json_file )
69+
70+ year_boundary = input_json ['boundaryYearRepresented' ]
71+ admin_level = input_json ['boundaryType' ][3 ]
72+
73+ response = requests .get (input_json ['gjDownloadURL' ])
74+ input_geojson = gpd .read_file (response .text )
75+
76+ input_geojson = input_geojson .apply (lambda x : wkt .loads (x ) if isinstance (x , str ) else x )
77+ input_geojson .rename (columns = {c : c + "_" + admin_level for c in input_geojson .columns if c not in ['geometry' ]}, inplace = True )
78+
79+ input_geojson [f"ABYear_{ admin_level } " ] = year_boundary
80+ final = input_geojson .drop (columns = [col for col in input_geojson .columns if 'shapeGroup' in col ])
81+
82+ outfilename = file .replace (".txt" , ".gpkg" )
83+ final .to_file (os .path .join (out_dir , outfilename ), driver = 'GPKG' )
84+
85+
86+ if __name__ == "__main__" :
87+ parser = argparse .ArgumentParser (description = "Plot multiple shapefiles" )
88+ parser .add_argument ("output_dir" , help = "Output directory" )
89+ args = parser .parse_args ()
90+
91+ get_global_shapefiles (args .output_dir )
0 commit comments