Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conf/datapaths/datapaths_cannon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ name: null
dirs:
input:
lego: /n/dominici_lab/lab/lego
output: /n/dominici_lab/lab/lego_loader_x/output
covars: /n/dominici_lab/lab/lego_loader_x/output
health: /n/dominici_lab/lab/lego_loader_x/synthetic_health
54 changes: 54 additions & 0 deletions conf/synthetic/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
year: 2010
horizons: [30, 90, 180] # Horizons in days (including daily)

# conf
var_group: health
vg_name: synthetic_health

var: diabetes

spatial_res: zcta
temporal_res: daily

input_dir: data/input/
output_dir: data/health/

#var_group
min_year: 2000
max_year: 2015
min_spatial_res: zcta
min_temporal_res: daily
lego_nm: synthetic_sparse_counts
lego_dir: lego/synthetic/medpar_outcomes/ccw/zcta_daily

# Debug options
debug_days: 3 # Set to null or remove for full year processing

# Synthetic data parameters
synthetic:
# Paths for ZCTA data
zcta_unique_path: data/input/lego/geoboundaries/us_geoboundaries__census/us_uniqueid__census/zcta_yearly
zcta_shapefile_path: data/input/lego/geoboundaries/us_geoboundaries__census/us_shapefile__census/zcta_yearly
population_path: data/input/lego/social/demographics__census/raw/core/zcta__dec__population.parquet

# Poisson distribution parameters for synthetic data generation
poisson_params:
base_rate: 2.5 # Base rate for Poisson distribution
seasonal_amplitude: 0.3 # Seasonal variation amplitude
spatial_variance: 0.5 # Spatial variance across ZCTAs
latitude_effect: 0.2 # Effect of latitude on incidence
longitude_effect: 0.1 # Effect of longitude on incidence
population_effect: 0.0001 # Population scaling factor (per capita effect)
random_seed: 42 # For reproducibility

# Geographic constraints
mainland_only: true # Filter for continental US only

# Date range for synthetic data
date_range:
start_year: 2000
end_year: 2015

hydra:
run:
dir: logs/synthetic/${now:%Y-%m-%d}/${now:%H-%M-%S}
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
numpy
torch
pandas==2.2.2
pyarrow==11.0.0
pandas>2.2.2
pyarrow
duckdb==0.9.2
hydra-core==1.3.2
snakemake==8.16
tqdm
ipykernel
geopandas
Loading