Skip to content

Commit 1f3edf8

Browse files
Merge pull request #42 from NSAPH-Data-Processing/audiracmichelle/issue40
Audiracmichelle/issue40
2 parents f1ef15c + f84940f commit 1f3edf8

36 files changed

+691
-511
lines changed

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ RUN apt-get install -y libxrender-dev
1818
# Create paths to data placeholders
1919
RUN python utils/create_dir_paths.py datapaths=datapaths.yaml
2020

21-
# snakemake --configfile conf/config.yaml --cores 4 -C shapefile_polygon_name=zcta
21+
# snakemake --configfile conf/config.yaml --cores 4
2222
ENTRYPOINT ["snakemake"]
23-
CMD ["--cores", "1", "-C", "shapefile_tag=2015", "shapefile_polygon_name=county"]
23+
CMD ["--cores", "1", "--configfile", "conf/config.yaml"]

README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,7 @@ python src/aggregate_climate_types.py
101101
or run the pipeline:
102102

103103
```bash
104-
# to generate county aggregations
105104
snakemake --cores 1
106-
# to generate zcta aggregations
107-
snakemake --cores 1 -C shapefile_polygon_name=zcta
108105
```
109106

110107
## Dockerized Pipeline
@@ -122,7 +119,7 @@ docker pull nsaph/climate_types_raster2polygon
122119
# to generate county aggregations
123120
docker run -v <test_path>:/app/data/output/climate_types_raster2polygon -t nsaph/climate_types_raster2polygon
124121
# to generate zcta aggregations
125-
docker run -v <test_path>:/app/data/output/climate_types_raster2polygon -t nsaph/climate_types_raster2polygon --cores 1 -C shapefile_polygon_name=zcta
122+
docker run -v <test_path>:/app/data/output/climate_types_raster2polygon -t nsaph/climate_types_raster2polygon --cores 1
126123
```
127124

128125
### If you want to build your own image use

Snakefile

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@ import yaml
33
conda: "requirements.yaml"
44
configfile: "conf/config.yaml"
55

6-
tag=config["shapefile_tag"]
7-
polygon_name=config["shapefile_polygon_name"]
6+
# == Load configuration ==
7+
8+
# dynamic config files
9+
defaults_dict = {key: value for d in config['defaults'] if isinstance(d, dict) for key, value in d.items()}
10+
shapefiles_cfg = yaml.safe_load(open(f"conf/shapefiles/{defaults_dict['shapefiles']}.yaml", 'r'))
11+
# == Define variables ==
12+
shapefile_list = shapefiles_cfg.keys()
13+
print(shapefile_list)
814

915
rule all:
1016
input:
11-
f"data/output/climate_types_raster2polygon/climate_types_{polygon_name}_{tag}.parquet"
17+
expand(f"data/output/climate_types_raster2polygon/climate_types_{{shapefile_name}}.parquet",
18+
shapefile_name=shapefile_list
19+
)
1220

1321
rule download_climate_types:
1422
output:
@@ -19,17 +27,17 @@ rule download_climate_types:
1927
# temporarily removing download step in snakemake
2028
# rule download_shapefiles:
2129
# output:
22-
# f"data/input/shapefiles/shapefile_{polygon_name}_{tag}/shapefile.shp" #ext = ["shp", "shx", "dbf", "prj", "cpg", "xml"]
30+
# f"data/input/shapefiles/{{shapefile_name}}/{{shapefile_name}}.shp" #ext = ["shp", "shx", "dbf", "prj", "cpg", "xml"]
2331
# shell:
24-
# f"python src/download_shapefile.py shapefile_tag={tag} shapefile_polygon_name={polygon_name}"
32+
# f"python src/download_us_shapefile.py"
2533

2634
rule aggregate_climate_types:
2735
input:
2836
f"data/input/climate_types/{config['climate_types_file']}",
29-
f"data/input/shapefiles/shapefile_{polygon_name}_{tag}/shapefile.shp"
37+
f"data/input/shapefiles/{{shapefile_name}}/{{shapefile_name}}.shp"
3038
output:
31-
f"data/output/climate_types_raster2polygon/climate_types_{polygon_name}_{tag}.parquet",
32-
f"data/intermediate/climate_pcts/climate_pcts_{polygon_name}_{tag}.json",
33-
f"data/intermediate/climate_pcts/climate_types_{polygon_name}_{tag}.csv"
39+
f"data/output/climate_types_raster2polygon/climate_types_{{shapefile_name}}.parquet",
40+
f"data/intermediate/climate_pcts/climate_pcts_{{shapefile_name}}.json",
41+
f"data/intermediate/climate_pcts/climate_types_{{shapefile_name}}.csv"
3442
shell:
35-
f"python src/aggregate_climate_types.py shapefile_tag={tag} shapefile_polygon_name={polygon_name}"
43+
f"python src/aggregate_climate_types.py"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#@package climate_keys
12
1: ["Af", "Tropical, rainforest"]
23
2: ["Am", "Tropical, monsoon"]
34
3: ["Aw", "Tropical, savannah"]

conf/config.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
defaults:
22
- _self_
33
- datapaths: datapaths
4-
- shapefiles: global_shapefiles
5-
- climate_keys: climate_keys
4+
- shapefiles: county
5+
- climate_keys.yaml
66

77
# Climate types raw raster information
88
climate_types_url: https://figshare.com/ndownloader/files/12407516
@@ -13,10 +13,6 @@ climate_types_file: Beck_KG_V1_present_0p0083.tif # it's the one at highest res
1313
# - Beck_KG_V1_present_conf_0p083.tif
1414
# - Beck_KG_V1_present_conf_0p5.tif
1515

16-
# Shapefiles information
17-
shapefile_tag: ADM2all #to be matched with cfg.shapefiles
18-
shapefile_polygon_name: MEX #to be matched with cfg.shapefiles, column name of id in output file
19-
2016
hydra:
2117
run:
2218
dir: logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

conf/datapaths/example.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# if files are stored within the local copy of the repository, then use null:
2+
input:
3+
climate_types: null
4+
shapefiles: data/example/
5+
6+
intermediate:
7+
climate_pcts: null
8+
9+
output:
10+
climate_types_raster2polygon: null

conf/example_config.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
defaults:
2+
- _self_
3+
- datapaths: example
4+
- shapefiles: example
5+
- climate_keys.yaml
6+
7+
# Climate types raw raster information
8+
climate_types_url: https://figshare.com/ndownloader/files/12407516
9+
climate_types_file: Beck_KG_V1_present_0p0083.tif # it's the one at highest resolution
10+
# - Beck_KG_V1_present_0p083.tif
11+
# - Beck_KG_V1_present_0p5.tif
12+
# - Beck_KG_V1_present_conf_0p0083.tif # this is uncertainty
13+
# - Beck_KG_V1_present_conf_0p083.tif
14+
# - Beck_KG_V1_present_conf_0p5.tif
15+
16+
hydra:
17+
run:
18+
dir: logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

conf/shapefiles/county.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#County shapefiles (cartographic boudaries) https://www.census.gov/programs-surveys/geography/guidance/tiger-data-products-guide.html
2+
county_2015:
3+
url: https://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_county_20m.zip
4+
idvar: GEOID
5+
output_idvar: county

conf/shapefiles/example.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CAN_ADM2:
2+
url: null
3+
idvar: shapeID
4+
output_idvar: id
5+
MEX_ADM2:
6+
url: null
7+
idvar: shapeID
8+
output_idvar: id
9+
USA_ADM2:
10+
url: null
11+
idvar: shapeID
12+
output_idvar: id
Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
# Catalog of shapefiles
2-
# cfg.shapefile_tag and cfg.shapefile_polygon_name are used to match the shapefile to use
1+
#TODO: is it possible to have a single shapefile for all countries?
32

4-
CAN:
5-
ADM3all:
3+
global_ADM1:
64
url: null
7-
idvar: shapeID_3
8-
MEX:
9-
ADM2all:
5+
idvar: shapeID
6+
output_idvar: id
7+
global_ADM2:
108
url: null
11-
idvar: shapeID_2
12-
USA:
13-
ADM2all:
9+
idvar: shapeID
10+
output_idvar: id
11+
global_ADM3:
1412
url: null
15-
idvar: shapeID_2
13+
idvar: shapeID
14+
output_idvar: id

0 commit comments

Comments
 (0)