Skip to content

Commit b4a74b9

Browse files
author
Kezia Irene
committed
refactor codes, edit paths, add readme
1 parent b2bfe52 commit b4a74b9

16 files changed

+125
-380
lines changed

README.md

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,39 @@
11
# smoke_aggregation
2-
Repository for computing the standardized weight across different zipcode given the 10km grids
2+
Repository for computing the standardized weight across different zipcode given the 10km grids, then calculate the smoke values per zipcode from 2006-2016
3+
34
* Input:
4-
* # 10km_grid/10km_grid_wgs84/: This is a folder that contains the shapefile for the 10 km grid.
5+
* 10km_grid/10km_grid_wgs84/: This is a folder that contains the shapefile for the 10 km grid.
56
* 10km_grid/smokePM2pt5_predictions_daily_10km_20060101-20201231.rds: This is a file that contains a data frame with the final set of daily smoke PM2.5 predictions on smoke days at 10 km resolution from January 1, 2006 to December 31, 2020 for the contiguous US. The 'grid_id_10km' column in this file corresponds to the 'ID' column in the 10 km grid shapefile.
6-
* Yearly zipcode grid shapefile: ./data/input/Zipcode_Info/polygon/ESRI", year, "USZIP5_POLY_WGS84.shp
7+
* Yearly zipcode grid shapefile: ./data/input/Zipcode_Info/polygon/ESRI", year, "USZIP5_POLY_WGS84.shp"
78

8-
* Output: Rds file containing zip_id, grid_id, and weight
9+
* Output: CSV file containing zip_id, date, and smoke
910

1011
Example output:
1112
```
12-
zip_id grid_id w
13-
1 03281 104504 0.120474045
14-
2 03281 104505 0.489675352
15-
3 03281 104506 0.001825444
16-
4 03281 105019 0.080026705
17-
5 03281 105020 0.307998349
13+
zip date smoke
14+
<chr> <date> <dbl>
15+
1 00012 2016-01-01 0
16+
2 00012 2016-01-02 0
17+
3 00012 2016-01-03 0
18+
4 00012 2016-01-04 0
19+
5 00012 2016-01-05 0
20+
6 00012 2016-01-06 0
1821
```
1922

2023
To run example weights:
2124
```
22-
mkdir $HOME/singularity_images
23-
cd $HOME/singularity_images
24-
singularity pull docker://nsaph/r_exposures:v0
2525
cd code
2626
Rscript 01_yearly_grid_mean.R
2727
Rscript 02_match_cts_extent_list.R
28-
Rscript test_get_weights.R
29-
sbatch 03_run_get_weights_par.sbatch
28+
Rscript 03_run_get_weights_par_script.R
29+
Rscript 04_daily_aggregation.R
30+
3031
```
32+
## Notebook EDA
33+
These are the high level explanation of what each code in the notebooks folder do:
34+
1. 01_eda_preds.Rmd: Identify grids with smoke per day, distribution of smoke values
35+
2. 02_yearly_grid_mean.Rmd: Aggregation to obtain grid_id, year, and smoke values
36+
3. 03_match_cts_extent.Rmd: Match the crs and extents of sf objects, create bounding box, plot the mapping between zipcode and grid_id
37+
4. 04_test_grid_to_zip.Rmd: Join grid with smoke values with 'over' function
38+
5. 07_get_weights_par_test.Rmd: get_weights_par unit test. Testing if the sum of weights for 1 zipcode is equals to 1
39+
6. 08_zip_code_EDA.Rmd: ZIP code EDA to see the coverage of the smoke aggregation files

code/03_run_get_weights_par.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ library(parallel)
88
library(argparse)
99
print("This is the sf package version we are using:")
1010
print(packageVersion("sf"))
11-
11+
print(Sys.time())
1212
## define parser arguments ----
1313
parser <- ArgumentParser()
1414
parser$add_argument("-y", "--year", default=2006,
@@ -22,7 +22,7 @@ print("use R script get_weights_par")
2222
# args$cores = 24
2323

2424
## read functions ----
25-
source("../../lib/get_weights_par.R")
25+
source("../lib/get_weights_par.R")
2626

2727
print("load data")
2828
## Load grid and zip sf objects ----
@@ -50,8 +50,8 @@ zip_weights_df$year <- args$year
5050
## save output ----
5151
write_csv(
5252
zip_weights_df,
53-
paste0("../data/output/scratch/",
54-
"zip_weights_df_test_par", as.character(args$year), ".csv")
53+
paste0("../data/output/",
54+
"zip_weights_df_", as.character(args$year), ".csv")
5555
)
56-
56+
print(Sys.time())
5757
print("completed")

code/03_run_get_weights_par.sbatch

Lines changed: 0 additions & 9 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
source("03_run_get_weights_par.R")
2+
3+
4+
# Define the range of years to loop through
5+
years <- 2006:2016
6+
7+
# Loop through each year and call the 03_run_get_weights_par.R script with the appropriate -y parameter
8+
for (year in years) {
9+
command <- paste("Rscript 03_run_get_weights_par.R -y", year, "-c 40", sep=" ")
10+
system(command)
11+
}

code/04_daily_aggregation.R

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ grid_10km = read_sf("../data/input/local_data/10km_grid_wgs84/10km_grid_wgs84.sh
1313
)
1414

1515
# Load smokePM predictions on smoke days
16-
preds = read_csv("/net/rcstorenfs02/ifs/rc_labs/dominici_lab/lab/data/exposures/smoke/smokePM2pt5_predictions_daily_10km_20060101-20201231.csv") %>%
16+
preds = read_csv("../data/input/local_data/smokePM2pt5_predictions_daily_10km_20060101-20201231.csv") %>%
1717
mutate(
1818
date = ymd(date)
1919
) %>%
@@ -28,9 +28,10 @@ zip_smoke_df = data.frame(zip = character(),
2828
smoke = double(),
2929
stringsAsFactors = FALSE)
3030

31-
for(y_ in 2006:2007){
32-
y_ <- 2006
31+
for(y_ in 2016:2016){
3332
# Load full set of dates
33+
print("Now Processing year:")
34+
print(y_)
3435
dates = seq.Date(ymd(paste0(y_, "0101")),
3536
ymd(paste0(y_, "1231")),
3637
by = "day")
@@ -59,38 +60,11 @@ for(y_ in 2006:2007){
5960
group_by(zip, date) %>%
6061
summarise(smoke = weighted.mean(smoke, w=w))
6162

62-
zip_smoke_df = rbind(zip_smoke_df, zip_smoke_df_y)
63-
64-
}
65-
66-
zip_sf = read_rds("../data/intermediate/scratch/zip_sf_list.rds")
67-
68-
save(zip_smoke_df, file = "../data/output/smoke/daily_zip_test.RData")
69-
70-
71-
72-
# Load required libraries
73-
library(tidyverse)
74-
library(sf)
75-
76-
# Read in the shapefile for zipcodes
77-
zip_sf <- read_sf("../data/input/local_data/Zipcode_Info/polygon/ESRI06USZIP5_POLY_WGS84.shp")
78-
79-
zip_smoke_df$date <- as.Date(zip_smoke_df$date)
80-
81-
# Subset the data for the given date
82-
zip_smoke_subset <- zip_smoke_df %>% filter(date == "2006-01-01")
83-
84-
# Join the data with the shapefile based on the zipcode
85-
zip_sf <- left_join(zip_sf, zip_smoke_subset, by = c("ZIP" = "zip"))
86-
87-
# Drop rows with NULL values in smoke column
88-
zip_sf <- zip_sf %>% drop_na(smoke)
89-
90-
91-
# Create a map of smoke in every zipcode with thinner line width
92-
ggplot() +
93-
geom_sf(data = zip_sf, aes(fill = smoke), lwd = 0.1) +
94-
scale_fill_gradient(low = "yellow", high = "red") +
95-
theme_void()
63+
#save(zip_smoke_df_y, file = "../data/output/smoke/daily_zip_.RData")
64+
write_csv(
65+
zip_smoke_df_y,
66+
paste0("../data/output/",
67+
"daily_zip_", as.character(y_), ".csv")
68+
)
9669

70+
}

code/05_pobox_zip_association.R

Lines changed: 0 additions & 65 deletions
This file was deleted.

code/06_pobox_smoke_association.R

Lines changed: 0 additions & 46 deletions
This file was deleted.

code/07_annual_average.R

Lines changed: 0 additions & 30 deletions
This file was deleted.

0 commit comments

Comments
 (0)