88import pandas as pd
99import geopandas as gpd
1010
11+ # configure logger to print at info level
12+ logging .basicConfig (level = logging .INFO )
13+ LOGGER = logging .getLogger (__name__ )
14+
1115@hydra .main (config_path = "../conf" , config_name = "config" , version_base = None )
1216def main (cfg ):
13- # log statistics (transform, crs, nodata)
17+ LOGGER .info ("""
18+ # Extract transform, crs, nodata from raster
19+ """ )
1420 raster_path = f"data/input/climate_types/{ cfg .climate_types_file } "
15- logging .info (f"Reading raster { raster_path } " )
21+ LOGGER .info (f"Reading raster { raster_path } " )
1622 with rasterio .open (raster_path ) as src :
1723 transform = src .transform
1824 crs = src .crs
@@ -25,9 +31,7 @@ def main(cfg):
2531 # plt.imshow(layer == 0) #modify layer value to see different climate types
2632 # plt.show()
2733
28-
29- logging .info (
30- "Read file with characteristics:\n "
34+ LOGGER .info ("Read file with characteristics:\n "
3135 f"Transform:\n { transform } \n "
3236 f"CRS: { crs } \n "
3337 f"NoData: { nodata } \n "
@@ -38,13 +42,13 @@ def main(cfg):
3842 # read shapefile
3943 idvar = cfg .shapefiles [cfg .shapefile_polygon_name ][cfg .shapefile_year ].idvar
4044 shp_path = f"data/input/shapefiles/shapefile_{ cfg .shapefile_polygon_name } _{ cfg .shapefile_year } /shapefile.shp"
41- logging .info (f"Reading shapefile { shp_path } " )
45+ LOGGER .info (f"Reading shapefile { shp_path } " )
4246 shp = gpd .read_file (shp_path )
43- logging .info (f"Read shapefile with head\n : { shp .drop (columns = 'geometry' ).head ()} " )
47+ LOGGER .info (f"Read shapefile with head\n : { shp .drop (columns = 'geometry' ).head ()} " )
4448 ids = shp [idvar ]
4549
4650 # compute zonal stats
47- logging .info (f"Computing zonal stats" )
51+ LOGGER .info (f"Computing zonal stats" )
4852 stats = rasterstats .zonal_stats (
4953 shp_path ,
5054 raster_path ,
@@ -54,13 +58,22 @@ def main(cfg):
5458 categorical = True ,
5559 nodata = nodata ,
5660 )
57- logging .info (f"Done." )
61+ LOGGER .info (f"Done." )
5862
5963 # for each entry in stats count the unique values
6064 avs = {}
6165 for i , s in enumerate (stats ):
62- n = s ["count" ] - s .get (0 , 0 )
63- avs [ids [i ]] = {k : v / n for k , v in s .items () if k != "count" }
66+ n0 = s .get (0 , 0 )
67+ n = s ["count" ] - n0
68+ if n > 0 :
69+ avs [ids [i ]] = {k : v / n for k , v in s .items () if k != "count" }
70+ else :
71+ # log polygon id if it has no climate intersection,
72+ # if the number of pixels with climate type 0 is equal to the total number of pixels
73+ # this is the case for some polygons with area intersecting raster grids classified as 0 only
74+ # climate class 0 is body of water
75+ LOGGER .info (f"Polygon { ids [i ]} has { n0 } pixels with climate type 0" )
76+ avs [ids [i ]] = {0 : 1.0 }
6477
6578 # log some statistics of what ran
6679 m0 = np .mean ([len (m ) == 1 for m in avs .values ()])
@@ -73,10 +86,10 @@ def main(cfg):
7386 if x [0 ] == x [1 ]:
7487 frac_ties += 1 / len (avs )
7588
76- logging .info (f"Fraction of locations with only one climate: { 100 * m0 :.2f} %" )
77- logging .info (f"Fraction of locations more than one climate: { 100 * m1 :.2f} %" )
78- logging .info (f"Fraction of locations more than two climates: { 100 * m2 :.2f} %" )
79- logging .info (f"Fraction of locations with ties: { 100 * frac_ties :.2f} %" )
89+ LOGGER .info (f"Fraction of locations with only one climate: { 100 * m0 :.2f} %" )
90+ LOGGER .info (f"Fraction of locations more than one climate: { 100 * m1 :.2f} %" )
91+ LOGGER .info (f"Fraction of locations more than two climates: { 100 * m2 :.2f} %" )
92+ LOGGER .info (f"Fraction of locations with ties: { 100 * frac_ties :.2f} %" )
8093
8194 # save files
8295 intermediate_dir = f"data/intermediate/climate_pcts/climate_pcts_{ cfg .shapefile_polygon_name } _{ cfg .shapefile_year } "
@@ -85,7 +98,7 @@ def main(cfg):
8598 class_file = f"{ intermediate_dir } /class_file.csv"
8699 output_file = f"data/output/climate_types_raster2polygon/climate_types_{ cfg .shapefile_polygon_name } _{ cfg .shapefile_year } .csv"
87100
88- logging .info (f"Saving pcts to { pcts_file } " )
101+ LOGGER .info (f"Saving pcts to { pcts_file } " )
89102 with open (pcts_file , "w" ) as f :
90103 json .dump (avs , f )
91104
@@ -99,7 +112,7 @@ def main(cfg):
99112 class_df ["climate_type_short" ] = class_df ["climate_type_num" ].map (codedict_short )
100113 class_df ["climate_type_long" ] = class_df ["climate_type_num" ].map (codedict_long )
101114 class_df = class_df .drop (columns = "climate_type_num" )
102- logging .info (f"Saving classification to { class_file } " )
115+ LOGGER .info (f"Saving classification to { class_file } " )
103116 class_df .to_csv (class_file , index = False )
104117
105118 # transform the percentages into a sparse dataframe
@@ -114,7 +127,7 @@ def main(cfg):
114127
115128 output_df = pd .merge (class_df , output_df , on = "id" )
116129
117- logging .info (f"Saving output into data/output/climate_types_zip_zcta \n { output_df .head ()} " )
130+ LOGGER .info (f"Saving output into data/output/climate_types_zip_zcta \n { output_df .head ()} " )
118131 output_df .to_csv (output_file , index = False )
119132
120133if __name__ == "__main__" :
0 commit comments