diff --git a/Dockerfile b/Dockerfile
index c871f80..d69b3fd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,8 +13,8 @@ RUN mamba env update -n base -f requirements.yaml
 #&& mamba clean -a
 
 # Create paths to data placeholders
-RUN python utils/create_dir_paths.py datapaths.input.satellite_pm25.annual=null datapaths.input.satellite_pm25.monthly=null
+RUN python utils/create_dir_paths.py datapaths.input.satellite_pm25.yearly=null datapaths.input.satellite_pm25.monthly=null
 
-# snakemake --configfile conf/config.yaml --cores 4 -C temporal_freq=annual
+# snakemake --configfile conf/config.yaml --cores 4 -C temporal_freq=yearly
 ENTRYPOINT ["snakemake", "--configfile", "conf/config.yaml"]
-CMD ["--cores", "4", "-C", "polygon_name=county", "temporal_freq=annual"]
+CMD ["--cores", "4", "-C", "polygon_name=county", "temporal_freq=yearly"]
diff --git a/README.md b/README.md
index cb3a85f..83af885 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# pm25_washu_raster2polygon
+# pm25_randall_raster2polygon
 
 Code to produce spatial aggregations of pm25 estimates as generated by the [Atmospheric Composition Analysis Group](https://sites.wustl.edu/acag/datasets/surface-pm2-5/). The spatial aggregation are performed for satellite pm25 from grid/raster (NetCDF) to polygons (shp).
 
@@ -10,7 +10,7 @@ The [Atmospheric Composition Analysis Group](https://sites.wustl.edu/acag/datase
 
 The version [V5.GL.04](https://sites.wustl.edu/acag/datasets/surface-pm2-5/#V5.GL.04) consists of mean PM2.5 (ug/m3) available at:
 
-*  Temporal frequency: Annual and monthly  
+*  Temporal frequency: yearly and monthly  
 *  Grid resolutions: (0.1° × 0.1°) and (0.01° × 0.01°)  
 *  Geographic regions: North America, Europe, Asia, and Global
 
@@ -47,7 +47,7 @@ The configuration structure withing the `/conf` folder allow you to modify the i
 * aggregate pm25: `src/aggregate_pm25.py`
 
 The key parameters are:
-* `temporal_freq` which determines whether the original annual or monthly pm25 files will be aggregated. The options are: `annual` and `monthly`.
+* `temporal_freq` which determines whether the original yearly or monthly pm25 files will be aggregated. The options are: `yearly` and `monthly`.
 * `polygon_name` which determines into which polygons the pm25 grid will the aggregated. The options are: `zcta` and `county`.
 
 ---
@@ -98,7 +98,7 @@ python src/aggregate_pm25.py
 or run the pipeline:
 
 ```bash
-snakemake --cores 4 -C polygon_name=county temporal_freq=annual 
+snakemake --cores 4 -C polygon_name=county temporal_freq=yearly 
 ```
 
 Modify `cores`, `polygon_name` and `temporal_freq` as you find convenient.
@@ -115,7 +115,7 @@ mkdir <path>/satellite_pm25_raster2polygon
 
 ```bash
 docker pull nsaph/satellite_pm25_raster2polygon
-docker run -v <path>:/app/data/input/satellite_pm25/annual <path>/satellite_pm25_raster2polygon/:/app/data/output/satellite_pm25_raster2polygon nsaph/satellite_pm25_raster2polygon
+docker run -v <path>:/app/data/input/satellite_pm25/yearly <path>/satellite_pm25_raster2polygon/:/app/data/output/satellite_pm25_raster2polygon nsaph/satellite_pm25_raster2polygon
 ```  
 
 If you are interested in storing the input raw and intermediate data run
diff --git a/Snakefile b/Snakefile
index df4aa44..b6e63a1 100644
--- a/Snakefile
+++ b/Snakefile
@@ -17,21 +17,21 @@ temporal_freq = config['temporal_freq']
 polygon_name = config['polygon_name']
 
 with initialize(version_base=None, config_path="conf"):
-    hydra_cfg = compose(config_name="config", overrides=[f"temporal_freq={temporal_freq}", f"polygon_name={polygon_name}"])
+    cfg = compose(config_name="config", overrides=[f"temporal_freq={temporal_freq}", f"polygon_name={polygon_name}"])
 
-satellite_pm25_cfg = hydra_cfg.satellite_pm25
-shapefiles_cfg = hydra_cfg.shapefiles
+satellite_pm25_cfg = cfg.satellite_pm25
+shapefiles_cfg = cfg.shapefiles
 
 shapefile_years_list = list(shapefiles_cfg[polygon_name].keys())
 
 months_list = "01" if temporal_freq == 'yearly' else [str(i).zfill(2) for i in range(1, 12 + 1)]
-years_list = list(range(1998, 2022 + 1))
+years_list = list(range(1998, 2023 + 1))
 
 # == Define rules ==
 rule all:
     input:
         expand(
-            f"data/output/pm25__washu/{polygon_name}_{temporal_freq}/pm25__washu__{polygon_name}_{temporal_freq}__" +  
+            f"{cfg.datapaths.base_path}/output/{polygon_name}_{temporal_freq}/pm25__randall__{polygon_name}_{temporal_freq}__" +  
                 ("{year}.parquet" if temporal_freq == 'yearly' else "{year}_{month}.parquet"), 
             year=years_list,
             month=months_list
@@ -40,14 +40,14 @@ rule all:
 # remove and use symlink to the us census geoboundaries 
 rule download_shapefiles:
     output:
-        f"data/input/shapefiles/shapefile_{polygon_name}_" + "{shapefile_year}/shapefile.shp" 
+        f"{cfg.datapaths.base_path}/input/shapefiles/shapefile_{polygon_name}_" + "{shapefile_year}/shapefile.shp" 
     shell:
         f"python src/download_shapefile.py polygon_name={polygon_name} " + "shapefile_year={wildcards.shapefile_year}"
 
 rule download_satellite_pm25:
     output:
         expand(
-            f"data/input/pm25__washu__raw/{temporal_freq}/{satellite_pm25_cfg[temporal_freq]['file_prefix']}." + 
+            f"{cfg.datapaths.base_path}/input/raw/{temporal_freq}/{satellite_pm25_cfg[temporal_freq]['file_prefix']}." + 
             ("{year}01-{year}12.nc" if temporal_freq == 'yearly' else "{year}{month}-{year}{month}.nc"), 
             year=years_list,
             month=months_list)
@@ -58,20 +58,20 @@ rule download_satellite_pm25:
 
 def get_shapefile_input(wildcards):
     shapefile_year = available_shapefile_year(int(wildcards.year), shapefile_years_list)
-    return f"data/input/shapefiles/shapefile_{polygon_name}_{shapefile_year}/shapefile.shp"
+    return f"{cfg.datapaths.base_path}/input/shapefiles/shapefile_{polygon_name}_{shapefile_year}/shapefile.shp"
 
 rule aggregate_pm25:
     input:
         get_shapefile_input,
         expand(
-            f"data/input/pm25__washu__raw/{temporal_freq}/{satellite_pm25_cfg[temporal_freq]['file_prefix']}." + 
+            f"{cfg.datapaths.base_path}/input/raw/{temporal_freq}/{satellite_pm25_cfg[temporal_freq]['file_prefix']}." + 
             ("{{year}}01-{{year}}12.nc" if temporal_freq == 'yearly' else "{{year}}{month}-{{year}}{month}.nc"), 
             month=months_list
         )
 
     output:
         expand(
-            f"data/output/pm25__washu/{polygon_name}_{temporal_freq}/pm25__washu__{polygon_name}_{temporal_freq}__" + 
+            f"{cfg.datapaths.base_path}/output/{polygon_name}_{temporal_freq}/pm25__randall__{polygon_name}_{temporal_freq}__" + 
             ("{{year}}.parquet" if temporal_freq == 'yearly' else "{{year}}_{month}.parquet"), 
             month=months_list  # we only want to expand months_list and keep year as wildcard
         )
diff --git a/conf/config.yaml b/conf/config.yaml
index c3c4483..3c502cd 100644
--- a/conf/config.yaml
+++ b/conf/config.yaml
@@ -1,15 +1,15 @@
 defaults:
   - _self_
-  - datapaths: cannon_datapaths
+  - datapaths: cannon_v5gl
   - shapefiles: shapefiles
-  - satellite_pm25: us_pm25
+  - satellite_pm25: V5GL0502.HybridPM25c_0p05.NorthAmerica
 
 # == aggregation args
 temporal_freq: yearly # yearly, monthly to be matched with cfg.satellite_pm25
 year: 2020
 
 # == shapefile download args
-polygon_name: zcta # zcta, county to be matched with cfg.shapefiles
+polygon_name: county # zcta, county to be matched with cfg.shapefiles
 shapefile_year: 2020 #to be matched with cfg.shapefiles
 
 show_progress: false
diff --git a/conf/datapaths/cannon_datapaths.yaml b/conf/datapaths/cannon_datapaths.yaml
deleted file mode 100644
index c41a3fe..0000000
--- a/conf/datapaths/cannon_datapaths.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-# if files are stored within the local copy of the repository, then use null:
-input:
-  pm25__washu__raw: 
-    yearly: /n/netscratch/dominici_lab/Lab/pm25__washu__raw/yearly/ #/n/dominici_lab/lab/lego/environmnetal/pm25__washu/raw/annual
-    monthly: /n/netscratch/dominici_lab/Lab/pm25__washu__raw/monthly/ #/n/dominici_lab/lab/lego/environmnetal/pm25__washu/raw/monthly
-  shapefiles: null
-
-output:
-  pm25__washu: 
-    zcta_yearly: /n/dominici_lab/lab/lego/environmental/pm25__washu/zcta_yearly
-    zcta_monthly: /n/dominici_lab/lab/lego/environmental/pm25__washu/zcta_monthly
-    county_yearly: /n/dominici_lab/lab/lego/environmental/pm25__washu/county_yearly
-    county_monthly: /n/dominici_lab/lab/lego/environmental/pm25__washu/county_monthly
diff --git a/conf/datapaths/cannon_v5gl.yaml b/conf/datapaths/cannon_v5gl.yaml
new file mode 100644
index 0000000..45845f8
--- /dev/null
+++ b/conf/datapaths/cannon_v5gl.yaml
@@ -0,0 +1,14 @@
+base_path: data/V5GL
+
+dirs:
+  input:
+    raw: 
+      yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/raw/yearly #/n/netscratch/dominici_lab/Lab/pm25__randall__raw/yearly 
+      monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/raw/monthly #/n/netscratch/dominici_lab/Lab/pm25__randall__raw/monthly
+    shapefiles: null
+
+  output:
+    zcta_yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/zcta_yearly
+    zcta_monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/zcta_monthly
+    county_yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/county_yearly
+    county_monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V5GL/county_monthly
diff --git a/conf/datapaths/cannon_v6gl.yaml b/conf/datapaths/cannon_v6gl.yaml
new file mode 100644
index 0000000..50fcbf7
--- /dev/null
+++ b/conf/datapaths/cannon_v6gl.yaml
@@ -0,0 +1,14 @@
+base_path: data/V6GL
+
+dirs:
+  input:
+    raw: 
+      yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/raw/yearly #/n/netscratch/dominici_lab/Lab/pm25__randall__raw/yearly 
+      monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/raw/monthly #/n/netscratch/dominici_lab/Lab/pm25__randall__raw/monthly
+    shapefiles: null
+
+  output:
+    zcta_yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/zcta_yearly
+    zcta_monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/zcta_monthly
+    county_yearly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/county_yearly
+    county_monthly: /n/dominici_lab/lab/lego/environmental/pm25__randall/V6GL/county_monthly
diff --git a/conf/datapaths/datapaths.yaml b/conf/datapaths/datapaths.yaml
index e9d1be7..3250093 100644
--- a/conf/datapaths/datapaths.yaml
+++ b/conf/datapaths/datapaths.yaml
@@ -1,12 +1,13 @@
-# if files are stored within the local copy of the repository, then use null:
-input:
-  pm25__washu__raw: 
-    yearly: null
-    monthly: null
-  shapefiles: null
+base_path: data/V6GL
 
-output:
-  pm25__washu: 
+dirs:
+  input:
+    raw: 
+      yearly: null
+      monthly: null
+    shapefiles: null
+
+  output:
     zcta_yearly: null
     zcta_monthly: null
     county_yearly: null
diff --git a/conf/satellite_pm25/us_pm25.yaml b/conf/satellite_pm25/V5GL04.HybridPM25c_0p10.NorthAmerica.yaml
similarity index 100%
rename from conf/satellite_pm25/us_pm25.yaml
rename to conf/satellite_pm25/V5GL04.HybridPM25c_0p10.NorthAmerica.yaml
diff --git a/conf/satellite_pm25/V5GL0502.HybridPM25c_0p05.NorthAmerica.yaml b/conf/satellite_pm25/V5GL0502.HybridPM25c_0p05.NorthAmerica.yaml
new file mode 100644
index 0000000..94ab872
--- /dev/null
+++ b/conf/satellite_pm25/V5GL0502.HybridPM25c_0p05.NorthAmerica.yaml
@@ -0,0 +1,19 @@
+yearly:
+  url: https://wustl.app.box.com/v/ACAG-V5GL0502-GWRPM25c0p05/folder/293383209520
+
+  zipname: Annual
+
+  file_prefix: "V5GL0502.HybridPM25c_0p05.NorthAmerica"
+  #file name convention is V5GL0502.HybridPM25c_0p05.NorthAmerica.yyyymm-yyyymm.nc
+
+monthly:
+  url: https://wustl.app.box.com/v/ACAG-V5GL0502-GWRPM25c0p05/folder/293385030318
+
+  zipname: Monthly
+
+  file_prefix: "V5GL0502.HybridPM25c_0p05.NorthAmerica"
+  #file name convention is V5GL0502.HybridPM25c_0p05.NorthAmerica.yyyymm-yyyymm.nc
+
+layer: "GWRPM25" #geographic weighted regression PM2.5
+latitude_layer: "lat"
+longitude_layer: "lon"
diff --git a/conf/satellite_pm25/V6GL02.04.CNNPM25.0p10.NA.yaml b/conf/satellite_pm25/V6GL02.04.CNNPM25.0p10.NA.yaml
new file mode 100644
index 0000000..97cb511
--- /dev/null
+++ b/conf/satellite_pm25/V6GL02.04.CNNPM25.0p10.NA.yaml
@@ -0,0 +1,18 @@
+yearly:
+  url: https://wustl.app.box.com/s/s7eiaxytjr9w1z7glat45cesitcemprv/folder/327763225614
+
+  zipname: Annual
+
+  file_prefix: "V6GL02.04.CNNPM25.0p10.NA"
+  #file name convention is V6GL02.04.CNNPM25.0p10.NA.yyyymm-yyyymm.nc
+
+monthly: 
+  url: https://wustl.app.box.com/s/s7eiaxytjr9w1z7glat45cesitcemprv/folder/327764742544
+  zipname: Monthly
+
+  file_prefix: "V6GL02.04.CNNPM25.0p10.NA"
+  #file name convention is V6GL02.04.CNNPM25.0p10.NA.yyyymm-yyyymm.nc
+
+layer: "GWRPM25" #geographic weighted regression PM2.5
+latitude_layer: "lat"
+longitude_layer: "lon"
diff --git a/requirements.yaml b/environment.yaml
similarity index 78%
rename from requirements.yaml
rename to environment.yaml
index 6f0ee4d..596e80a 100644
--- a/requirements.yaml
+++ b/environment.yaml
@@ -1,4 +1,4 @@
-name: satellite_pm25_raster2polygon
+name: pm25_randall
 channels:
   - conda-forge
   - defaults
@@ -19,6 +19,3 @@ dependencies:
     - selenium==4.29.0
     - chromedriver-binary==135.0.7030.0.0
     - tqdm==4.67.1
-    - torch==2.6.0
-    - torchaudio==2.6.0
-    - torchvision==0.21.0
diff --git a/fasrc_jobs/county_monthly.sbatch b/fasrc_jobs/county_monthly.sbatch
deleted file mode 100644
index 2f47486..0000000
--- a/fasrc_jobs/county_monthly.sbatch
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-#SBATCH -p serial_requeue # partition (queue)
-#SBATCH -c 16 # number of cores 
-#SBATCH --mem 96GB # memory 
-#SBATCH -t 0-02:00 # time (D-HH:MM)
-
-singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=county temporal_freq=monthly
diff --git a/fasrc_jobs/zcta_monthly.sbatch b/fasrc_jobs/zcta_monthly.sbatch
deleted file mode 100644
index 38c04a4..0000000
--- a/fasrc_jobs/zcta_monthly.sbatch
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-#SBATCH -p shared # partition (queue)
-#SBATCH -c 32 # number of cores 
-#SBATCH --mem 96GB # memory 
-#SBATCH -t 0-01:00 # time (D-HH:MM)
-
-singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=zcta temporal_freq=monthly
diff --git a/fasrc_jobs/README.md b/jobs/README.md
similarity index 100%
rename from fasrc_jobs/README.md
rename to jobs/README.md
diff --git a/jobs/county_monthly.sbatch b/jobs/county_monthly.sbatch
new file mode 100644
index 0000000..4e28318
--- /dev/null
+++ b/jobs/county_monthly.sbatch
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+#SBATCH -p serial_requeue # partition (queue)
+#SBATCH -c 16 # number of cores 
+#SBATCH --mem 96GB # memory 
+#SBATCH -t 0-02:00 # time (D-HH:MM)
+
+#singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=county temporal_freq=monthly
+
+snakemake --cores 16 -C polygon_name=county temporal_freq=monthly
diff --git a/jobs/v5gl.sbatch b/jobs/v5gl.sbatch
new file mode 100644
index 0000000..8bb89cc
--- /dev/null
+++ b/jobs/v5gl.sbatch
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+#SBATCH -p serial_requeue # partition (queue)
+#SBATCH -c 48 # number of cores 
+#SBATCH --mem 184GB # memory 
+#SBATCH -t 0-12:00 # time (D-HH:MM)
+
+#singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=county temporal_freq=monthly
+
+snakemake --cores 24 -C polygon_name=county temporal_freq=yearly
+snakemake --cores 24 -C polygon_name=county temporal_freq=monthly
+snakemake --cores 24 -C polygon_name=zcta temporal_freq=yearly
+snakemake --cores 24 -C polygon_name=zcta temporal_freq=monthly
diff --git a/jobs/zcta_monthly.sbatch b/jobs/zcta_monthly.sbatch
new file mode 100644
index 0000000..2f1d953
--- /dev/null
+++ b/jobs/zcta_monthly.sbatch
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+#SBATCH -p shared # partition (queue)
+#SBATCH -c 32 # number of cores 
+#SBATCH --mem 96GB # memory 
+#SBATCH -t 0-01:00 # time (D-HH:MM)
+
+#singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=zcta temporal_freq=monthly
+
+snakemake --cores 32 -C polygon_name=zcta temporal_freq=yearly
+
diff --git a/jobs/zcta_yearly.sbatch b/jobs/zcta_yearly.sbatch
new file mode 100644
index 0000000..9d94155
--- /dev/null
+++ b/jobs/zcta_yearly.sbatch
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+#SBATCH -p shared # partition (queue)
+#SBATCH -c 32 # number of cores 
+#SBATCH --mem 96GB # memory 
+#SBATCH -t 0-01:00 # time (D-HH:MM)
+
+#singularity exec $HOME/singularity_images/satellite_pm25_raster2polygon_latest.sif snakemake --cores 16 -C polygon_name=zcta temporal_freq=monthly
+
+snakemake --cores 32 -C polygon_name=zcta temporal_freq=yearly
diff --git a/notes/eda_input.ipynb b/notes/eda_input.ipynb
index 9bb877c..55a1ae2 100644
--- a/notes/eda_input.ipynb
+++ b/notes/eda_input.ipynb
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -38,7 +38,7 @@
    ],
    "source": [
     "# Open the netCDF file\n",
-    "file_path = \"../data/input/satellite_pm25/annual/V5GL04.HybridPM25c_0p10.NorthAmerica.202201-202212.nc\"\n",
+    "file_path = f\"../{cfg.datapaths.base_path}/input/satellite_pm25/yearly/V5GL04.HybridPM25c_0p10.NorthAmerica.202201-202212.nc\"\n",
     "dataset = netCDF4.Dataset(file_path)\n",
     "\n",
     "# Print the global attributes\n",
@@ -183,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -210,7 +210,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Open the netCDF file\n",
-    "file_path = \"data/V5GL04.HybridPM25.NorthAmerica.202201-202212.nc\"\n",
+    "file_path = f\"{cfg.datapaths.base_path}/V5GL04.HybridPM25.NorthAmerica.202201-202212.nc\"\n",
     "dataset = netCDF4.Dataset(file_path)\n",
     "\n",
     "# Get the latitude and longitude variables\n",
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -280,12 +280,12 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Read the CSV file\n",
-    "pm25_data = pd.read_csv('data/county_pm25.csv')\n",
+    "pm25_data = pd.read_csv(f'{cfg.datapaths.base_path}/county_pm25.csv')\n",
     "# Convert GEOID to string using trailing zeros\n",
     "pm25_data['GEOID'] = pm25_data['GEOID'].astype(str).str.zfill(5)\n",
     "\n",
     "# Read the shapefile\n",
-    "shapefile = gpd.read_file('data/shapefile_cb_county_2015/shapefile.shp')\n",
+    "shapefile = gpd.read_file(f\"{cfg.datapaths.base_path}/shapefile_cb_county_2015/shapefile.shp\")\n",
     "\n",
     "# Merge the data\n",
     "merged_data = shapefile.merge(pm25_data, on='GEOID', how='left')\n",
diff --git a/notes/eda_output.ipynb b/notes/eda_output.ipynb
index 9ab63ca..dcc5f7e 100644
--- a/notes/eda_output.ipynb
+++ b/notes/eda_output.ipynb
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -47,7 +47,7 @@
    ],
    "source": [
     "# Open the netCDF file\n",
-    "file_path = \"data/V5GL04.HybridPM25.NorthAmerica.202201-202212.nc\"\n",
+    "file_path = f\"{cfg.datapaths.base_path}/input/satellite_pm25/yearly/V5GL04.HybridPM25c_0p10.NorthAmerica.202201-202212.nc\"\n",
     "dataset = netCDF4.Dataset(file_path)\n",
     "\n",
     "# Print the global attributes\n",
@@ -192,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -219,7 +219,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Open the netCDF file\n",
-    "file_path = \"data/V5GL04.HybridPM25.NorthAmerica.202201-202212.nc\"\n",
+    "file_path = f\"{cfg.datapaths.base_path}/input/satellite_pm25/yearly/V5GL04.HybridPM25c_0p10.NorthAmerica.202201-202212.nc\"\n",
     "dataset = netCDF4.Dataset(file_path)\n",
     "\n",
     "# Get the latitude and longitude variables\n",
@@ -269,7 +269,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -279,7 +279,7 @@
     "import pyarrow.parquet as pq\n",
     "\n",
     "# Read parquet file with pm25 at county level for 2015\n",
-    "pm25_data = pq.read_table(\"data/output/satellite_pm25_raster2polygon/monthly/satellite_pm25_zcta_2015_01.parquet\").to_pandas()"
+    "pm25_data = pq.read_table(f\"{cfg.datapaths.base_path}/datapaths.base_path}/datapaths.base_path}/datapaths.base_path}/output/satellite_pm25_raster2polygon/monthly/satellite_pm25_zcta_2015_01.parquet\").to_pandas()"
    ]
   },
   {
@@ -368,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -388,12 +388,12 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Read the CSV file\n",
-    "pm25_data = pd.read_csv('data/county_pm25.csv')\n",
+    "pm25_data = pd.read_csv(f'{cfg.datapaths.base_path}/county_pm25.csv')\n",
     "# Convert GEOID to string using trailing zeros\n",
     "pm25_data['GEOID'] = pm25_data['GEOID'].astype(str).str.zfill(5)\n",
     "\n",
     "# Read the shapefile\n",
-    "shapefile = gpd.read_file('data/shapefile_cb_county_2015/shapefile.shp')\n",
+    "shapefile = gpd.read_file('{cfg.datapaths.base_path}/datapaths.base_path}/datapaths.base_path}/shapefile_cb_county_2015/shapefile.shp')\n",
     "\n",
     "# Merge the data\n",
     "merged_data = shapefile.merge(pm25_data, on='GEOID', how='left')\n",
diff --git a/src/aggregate_pm25.py b/src/aggregate_pm25.py
index 21a7dd1..cb320b5 100644
--- a/src/aggregate_pm25.py
+++ b/src/aggregate_pm25.py
@@ -39,7 +39,7 @@ def main(cfg):
     #use previously available shapefile
     shapefile_year = available_shapefile_year(cfg.year, shapefile_years_list)
 
-    shape_path = f'data/input/shapefiles/shapefile_{cfg.polygon_name}_{shapefile_year}/shapefile.shp'
+    shape_path = f'{cfg.datapaths.base_path}/input/shapefiles/shapefile_{cfg.polygon_name}_{shapefile_year}/shapefile.shp'
     polygon = gpd.read_file(shape_path)
     polygon_ids = polygon[cfg.shapefiles[cfg.polygon_name][shapefile_year].idvar].values
 
@@ -62,7 +62,7 @@ def main(cfg):
     # load the first file to obtain the affine transform/boundaries
     LOGGER.info("Mapping polygons to raster cells.")
 
-    ds = xarray.open_dataset(f"data/input/pm25__washu__raw/{cfg.temporal_freq}/{filenames[0]}")
+    ds = xarray.open_dataset(f"{cfg.datapaths.base_path}/input/raw/{cfg.temporal_freq}/{filenames[0]}")
     layer = getattr(ds, cfg.satellite_pm25.layer)
 
     # obtain affine transform/boundaries
@@ -90,7 +90,7 @@ def main(cfg):
 
         if i > 0:
             # reload the file only if it is different from the first one
-            ds = xarray.open_dataset(f"data/input/pm25__washu__raw/{cfg.temporal_freq}/{filename}")
+            ds = xarray.open_dataset(f"{cfg.datapaths.base_path}/input/raw/{cfg.temporal_freq}/{filename}")
             layer = getattr(ds, cfg.satellite_pm25.layer)
 
         # === obtain stats quickly using precomputed mapping
@@ -111,15 +111,15 @@ def main(cfg):
         # == save output file
         if cfg.temporal_freq == "yearly":
             # ignore month since len(filenames) == 1
-            output_filename = f"pm25__washu__{cfg.polygon_name}_{cfg.temporal_freq}__{cfg.year}.parquet"
+            output_filename = f"pm25__randall__{cfg.polygon_name}_{cfg.temporal_freq}__{cfg.year}.parquet"
 
         elif cfg.temporal_freq == "monthly":
             # use month in filename since len(filenames) = 12
             month = f"{i + 1:02d}"
             df["month"] = month
-            output_filename = f"pm25__washu__{cfg.polygon_name}_{cfg.temporal_freq}__{cfg.year}_{month}.parquet"
+            output_filename = f"pm25__randall__{cfg.polygon_name}_{cfg.temporal_freq}__{cfg.year}_{month}.parquet"
 
-        output_path = f"data/output/pm25__washu/{cfg.polygon_name}_{cfg.temporal_freq}/{output_filename}"
+        output_path = f"{cfg.datapaths.base_path}/output/{cfg.polygon_name}_{cfg.temporal_freq}/{output_filename}"
         df.to_parquet(output_path)
 
         # plot aggregation map using geopandas
diff --git a/utils/create_dir_paths.py b/src/create_datapaths.py
similarity index 77%
rename from utils/create_dir_paths.py
rename to src/create_datapaths.py
index 696390d..a3d0d13 100644
--- a/utils/create_dir_paths.py
+++ b/src/create_datapaths.py
@@ -5,14 +5,31 @@
 
 LOGGER = logging.getLogger(__name__)
 
+def init_folder(folder_cfg=None):
+    folder_dict = folder_cfg.dirs
+    
+    # defines a base path for the data
+    datapath = folder_cfg.base_path
+    if datapath is None:
+        datapath = "data"
+    # check if datapath exists, if not create it 
+    if os.path.exists(datapath):
+        LOGGER.info(f"Base path {datapath} already exists")
+    else:
+        LOGGER.info(f"Creating base path {datapath}")
+        os.makedirs(datapath, exist_ok=True)
+
+    # create subfolders and symbolic links
+    create_subfolders_and_links(datapath=datapath, folder_dict=folder_dict)
 
 def create_subfolders_and_links(datapath="data", folder_dict=None):
     """
     Recursively create subfolders and symbolic links.
     """
     if not os.path.exists(datapath):
-        LOGGER.info(f"Error: {datapath} does not exists.")
+        LOGGER.info(f"Error: {datapath} does not exist.")
         return
+
     if isinstance(folder_dict, DictConfig):
         for path, subfolder_dict in folder_dict.items():
             sub_datapath = os.path.join(datapath, path)
@@ -50,7 +67,7 @@ def create_subfolders_and_links(datapath="data", folder_dict=None):
 @hydra.main(config_path="../conf", config_name="config", version_base=None)
 def main(cfg):
     """Create data subfolders and symbolic links as indicated in config file."""
-    create_subfolders_and_links(folder_dict=cfg.datapaths)
+    init_folder(folder_cfg=cfg.datapaths)
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/src/download_pm25.py b/src/download_pm25.py
index a063c41..2466aec 100644
--- a/src/download_pm25.py
+++ b/src/download_pm25.py
@@ -25,7 +25,7 @@ def main(cfg):
 
     # == setup chrome driver
     # Expand the tilde to the user's home directory
-    download_dir = f"data/input/pm25__washu__raw/"
+    download_dir = f"{cfg.datapaths.base_path}/input/raw/"
     download_dir = os.path.abspath(download_dir)
     download_zip = f"{download_dir}/{cfg.satellite_pm25[cfg.temporal_freq].zipname}.zip"
     src_dir = f"{download_dir}/{cfg.satellite_pm25[cfg.temporal_freq].zipname}"
diff --git a/src/download_shapefile.py b/src/download_shapefile.py
index 492c9ca..0ea0059 100644
--- a/src/download_shapefile.py
+++ b/src/download_shapefile.py
@@ -8,7 +8,7 @@
 def main(cfg):
     url = cfg.shapefiles[cfg.polygon_name][cfg.shapefile_year].url
 
-    tgt = f"data/input/shapefiles/shapefile_{cfg.polygon_name}_{cfg.shapefile_year}"
+    tgt = f"{cfg.datapaths.base_path}/input/shapefiles/shapefile_{cfg.polygon_name}_{cfg.shapefile_year}"
 
     tgtdir = os.path.dirname(tgt)
     tgtfile = os.path.basename(tgt)