Skip to content

Commit d4ca04e

Browse files
handling of no climate intersections
1 parent 6d2ac57 commit d4ca04e

File tree

1 file changed

+31
-18
lines changed

1 file changed

+31
-18
lines changed

src/aggregate_climate_types.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@
88
import pandas as pd
99
import geopandas as gpd
1010

11+
# configure logger to print at info level
12+
logging.basicConfig(level=logging.INFO)
13+
LOGGER = logging.getLogger(__name__)
14+
1115
@hydra.main(config_path="../conf", config_name="config", version_base=None)
1216
def main(cfg):
13-
# log statistics (transform, crs, nodata)
17+
LOGGER.info("""
18+
# Extract transform, crs, nodata from raster
19+
""")
1420
raster_path = f"data/input/climate_types/{cfg.climate_types_file}"
15-
logging.info(f"Reading raster {raster_path}")
21+
LOGGER.info(f"Reading raster {raster_path}")
1622
with rasterio.open(raster_path) as src:
1723
transform = src.transform
1824
crs = src.crs
@@ -25,9 +31,7 @@ def main(cfg):
2531
# plt.imshow(layer == 0) #modify layer value to see different climate types
2632
# plt.show()
2733

28-
29-
logging.info(
30-
"Read file with characteristics:\n"
34+
LOGGER.info("Read file with characteristics:\n"
3135
f"Transform:\n{transform}\n"
3236
f"CRS: {crs}\n"
3337
f"NoData: {nodata}\n"
@@ -38,13 +42,13 @@ def main(cfg):
3842
# read shapefile
3943
idvar = cfg.shapefiles[cfg.shapefile_polygon_name][cfg.shapefile_year].idvar
4044
shp_path = f"data/input/shapefiles/shapefile_{cfg.shapefile_polygon_name}_{cfg.shapefile_year}/shapefile.shp"
41-
logging.info(f"Reading shapefile {shp_path}")
45+
LOGGER.info(f"Reading shapefile {shp_path}")
4246
shp = gpd.read_file(shp_path)
43-
logging.info(f"Read shapefile with head\n: {shp.drop(columns='geometry').head()}")
47+
LOGGER.info(f"Read shapefile with head\n: {shp.drop(columns='geometry').head()}")
4448
ids = shp[idvar]
4549

4650
# compute zonal stats
47-
logging.info(f"Computing zonal stats")
51+
LOGGER.info(f"Computing zonal stats")
4852
stats = rasterstats.zonal_stats(
4953
shp_path,
5054
raster_path,
@@ -54,13 +58,22 @@ def main(cfg):
5458
categorical=True,
5559
nodata=nodata,
5660
)
57-
logging.info(f"Done.")
61+
LOGGER.info(f"Done.")
5862

5963
# for each entry in stats count the unique values
6064
avs = {}
6165
for i, s in enumerate(stats):
62-
n = s["count"] - s.get(0, 0)
63-
avs[ids[i]] = {k: v / n for k, v in s.items() if k != "count"}
66+
n0 = s.get(0, 0)
67+
n = s["count"] - n0
68+
if n > 0:
69+
avs[ids[i]] = {k: v / n for k, v in s.items() if k != "count"}
70+
else:
71+
# log polygon id if it has no climate intersection,
72+
# if the number of pixels with climate type 0 is equal to the total number of pixels
73+
# this is the case for some polygons with area intersecting raster grids classified as 0 only
74+
# climate class 0 is body of water
75+
LOGGER.info(f"Polygon {ids[i]} has {n0} pixels with climate type 0")
76+
avs[ids[i]] = {0: 1.0}
6477

6578
# log some statistics of what ran
6679
m0 = np.mean([len(m) == 1 for m in avs.values()])
@@ -73,10 +86,10 @@ def main(cfg):
7386
if x[0] == x[1]:
7487
frac_ties += 1 / len(avs)
7588

76-
logging.info(f"Fraction of locations with only one climate: {100 * m0:.2f}%")
77-
logging.info(f"Fraction of locations more than one climate: {100 * m1:.2f}%")
78-
logging.info(f"Fraction of locations more than two climates: {100 * m2:.2f}%")
79-
logging.info(f"Fraction of locations with ties: {100 * frac_ties:.2f}%")
89+
LOGGER.info(f"Fraction of locations with only one climate: {100 * m0:.2f}%")
90+
LOGGER.info(f"Fraction of locations more than one climate: {100 * m1:.2f}%")
91+
LOGGER.info(f"Fraction of locations more than two climates: {100 * m2:.2f}%")
92+
LOGGER.info(f"Fraction of locations with ties: {100 * frac_ties:.2f}%")
8093

8194
# save files
8295
intermediate_dir = f"data/intermediate/climate_pcts/climate_pcts_{cfg.shapefile_polygon_name}_{cfg.shapefile_year}"
@@ -85,7 +98,7 @@ def main(cfg):
8598
class_file = f"{intermediate_dir}/class_file.csv"
8699
output_file = f"data/output/climate_types_raster2polygon/climate_types_{cfg.shapefile_polygon_name}_{cfg.shapefile_year}.csv"
87100

88-
logging.info(f"Saving pcts to {pcts_file}")
101+
LOGGER.info(f"Saving pcts to {pcts_file}")
89102
with open(pcts_file, "w") as f:
90103
json.dump(avs, f)
91104

@@ -99,7 +112,7 @@ def main(cfg):
99112
class_df["climate_type_short"] = class_df["climate_type_num"].map(codedict_short)
100113
class_df["climate_type_long"] = class_df["climate_type_num"].map(codedict_long)
101114
class_df = class_df.drop(columns="climate_type_num")
102-
logging.info(f"Saving classification to {class_file}")
115+
LOGGER.info(f"Saving classification to {class_file}")
103116
class_df.to_csv(class_file, index=False)
104117

105118
# transform the percentages into a sparse dataframe
@@ -114,7 +127,7 @@ def main(cfg):
114127

115128
output_df = pd.merge(class_df, output_df, on="id")
116129

117-
logging.info(f"Saving output into data/output/climate_types_zip_zcta \n {output_df.head()}")
130+
LOGGER.info(f"Saving output into data/output/climate_types_zip_zcta \n {output_df.head()}")
118131
output_df.to_csv(output_file , index=False)
119132

120133
if __name__ == "__main__":

0 commit comments

Comments
 (0)