NCAR · scrasmussen · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,7 @@ docs/doxygen_sqlite3.db
 *~
 *.pyc
 *.ipynb
+
+# ignore files generated during ideal testing
+tests/*.nc
+tests/icar_options.nml
diff --git a/.travis.yml b/.travis.yml
diff --git a/docs/settings_documentation.md b/docs/settings_documentation.md
@@ -104,7 +104,7 @@ This optional namelist specifies various parameters that are used in the advecti
     water=2,  ! 1=use prescribed (w/lsm=1) 2=Simple sea surface fluxes
     mp  = 1,  ! 1=Thompson                 2=Simple (SB04)              3=Morrison        (wishlist)
     rad = 0,  ! 1=use prescribed fluxes    2=Simple (empirical)         3=RRTMG           (wishlist)
-    conv= 0,  ! 1=Tiedke Scheme            2=Simple Scheme (wishlist)   3=Kain-Fritsch
+    conv= 0,  ! 1=Tiedtke Scheme           2=Simple Scheme (wishlist)   3=Kain-Fritsch
     adv = 1,  ! 1=Upwind                   2=MPDATA                     3=Adams-Bashforth (wishlist)
     wind= 1   ! 1=Linear Theory            2=INFORM style (wishlist)    3=Dynamical?      (wishlist)
 /
@@ -274,7 +274,7 @@ This optional namelist specifies various parameters that are used in the advecti
     swdown_var = "SWDOWN",  ! Shortwave down            [W/m^2]
     lwdown_var = "GLW",     ! Longwave down             [W/m^2]
 
-    ! only required for some physics code (Noah LSM, water, Tiedke, KF(?))
+    ! only required for some physics code (Noah LSM, water, Tiedtke, KF(?))
     landvar = "LANDMASK",   ! land-water mask (as in WRF) 1=land, 0 or 2=water
 
     ! NOTE, these variables should be in the high-resolution initial conditions netcdf file

diff --git a/helpers/README.md b/helpers/README.md
@@ -0,0 +1,30 @@
+# Install Python Dependencies
+The following instructions and dependecy files work for the core ICAR scripts.
+Tools in `make_domain.py` and ccsm, cesm, cmip, erai, and wrf directories will require the mygis packages as well.
+The Python script `ideal_linear.py` will require Nio to be installed with `pip install nio`.
+
+## Setup Environment
+### Install With Conda
+```bash
+$ conda env create -f environment.yml --prefix /path/to/install/icar_env
+$ conda activate icar_env
+
+set PYTHONPATH, this will be saved for future use by the environment
+$ conda env config vars set PYTHONPATH=$(pwd)/lib:$PYTHONPATH
+
+reactivate environment
+$ conda activate icar_env
+```
+
+### Install With Pip
+```bash
+$ pip install -r requirements.txt
+```
+Make sure the `lib` directory is in the `PYTHONPATH`, add to `.bashrc` or other startup files for repeat use.
+```bash
+$ export PYTHONPATH=$(pwd)/lib:$PYTHONPATH
+```
+
+
+## Deprecated Scripts
+The [Nio package](https://www.pyngl.ucar.edu/Nio.shtml) used in `create_geo_testfiles.py` is not installed as a dependency.
diff --git a/helpers/aggregate_parallel_files.py b/helpers/aggregate_parallel_files.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 import os
 import glob
+import re
 import time
 # import multiprocessing as mp
 
@@ -9,11 +10,10 @@
 
 import sys
 
+# global variables
 pool = None
-
-# This should be an input, this is the search string that is assumed to match
-# the output files to be aggregated.
-file_search = "icar_out_{ens}_*"
+no_restarted_from_s = 'No restarted_from Attribute'
+date_search_s = '[1-9][0-9][0-9][0-9]-[0-2][0-9]-[0-3][0-9]_[0-9][0-9]-[0-9][0-9]-[0-9][0-9]'
 
 
 def load_file(file_name):
@@ -29,6 +29,7 @@ def get_dims(dataset, section="d"):
             results.append(int(dataset.attrs[axis + section + position]))
     return results
 
+
 def get_dim_offset(dims):
     '''Return x_offset, y_offset
     For the staggered dims, offset=1, otherwise offset=0'''
@@ -40,6 +41,7 @@ def get_dim_offset(dims):
 
     return x_off, y_off
 
+
 def set_up_dataset(d):
     '''Create a dataset to cover the entire domain with the variables present in d
 
@@ -159,14 +161,117 @@ def agg_file(first_file, verbose=True):
     print(outputfile)
     data_set.to_netcdf(outputfile)
 
-def main(file_search = "icar_out_{ens}_*"):
+
+def find_aggregate_from_date(file_search):
+    '''
+    Finds the date of the last aggregated file based on a specific prefix and date format.
+
+    Returns:
+        str or False: The date of the last aggregated file if found, otherwise False.
+    '''
+    first_files = glob.glob(file_search.format(ens="000001"))
+    if not first_files:
+        print("Exiting: no output files matching", file_search.format(ens="000001"))
+        sys.exit()
+    first_files.sort()
+
+    # following 2000-01-01_00-00-00 format
+    find_agg_s = file_search.format(ens=date_search_s)
+    first_agg_files = glob.glob(find_agg_s)
+    # if there are no aggregated files
+    if not first_agg_files:
+        return False #first_files[0]
+
+    last_agg_file = first_agg_files[-1]
+    print("last_agg_file", last_agg_file)
+
+    last_agg_file_date = re.findall(date_search_s, last_agg_file)[0]
+    return last_agg_file_date
+
+
+def get_restart_from_date(file_search, file_date):
+    '''
+    Gets the value of the attribute 'restarted_from' from an output file based on a given date.
+
+    Args:
+        file_date (str): The date used to construct the filename.
+
+    Returns:
+        str: The date from the 'restarted_from' attribute if the run was restarted, otherwise
+             'No restarted_from Attribute' or 'Not Restarted'
+    '''
+    out_filename = file_search.replace('*','').format(ens='000001_') + file_date + '.nc'
+
+    ds = xr.open_dataset(out_filename)
+    try:
+        restarted_from = ds.attrs['restarted_from']
+    except:
+        restarted_from = no_restarted_from_s
+
+    return restarted_from
+
+
+def aggregate_prep(file_search):
+    '''
+    Prepares aggregated files based on the current state of the output.
+
+    This function determines the latest aggregated file, checks if the current
+    netcdff output is from a restarted run. If from restarted run will remove
+    aggregated files from that restarted_from data onward, otherwise let the
+    user know if files are not from a restarted run or if they do not have the
+    restarted_from attribute.
+
+    Returns:
+        None
+    '''
+    find_agg_s = file_search.format(ens=date_search_s)
+
+    # find the last aggregated file, if no aggregated files, no prep needed
+    agg_from_date = find_aggregate_from_date(file_search)
+    if agg_from_date == False:
+        print("No aggregated files")
+        return
+
+    # check the current output's restarted-from date and get list of aggregated files
+    restarted_from = get_restart_from_date(file_search, agg_from_date)
+    agg_files = glob.glob(find_agg_s)
+    agg_files.sort()
+
+    remove_from_file = None
+    # only remove aggregate files from "restarted_from" date onward, otherwise
+    # print warnings
+    if restarted_from == 'Not Restarted':
+        print("Note: output files not from a restart run")
+        # print("Outputted files not from restart run, removing all aggregated files")
+        # remove_from_file = agg_files[0]
+    elif restarted_from == no_restarted_from_s:
+        print("Note: output files do not have 'restarted_from' attribute")
+        # print("Output files do not have 'restarted_from' attribute, removing all aggregated files")
+        # remove_from_file = agg_files[0]
+    else:
+        # delete every aggregated file from restarted_from date on
+        print("Recreating aggregated files from", restarted_from, "onward")
+        remove_from_file = prefix + restarted_from + '.nc'
+
+    # remove files
+    start_deleting = False
+    for f in agg_files:
+        if f == remove_from_file:
+            start_deleting = True
+        if start_deleting:
+            os.remove(f)
+
+
+def main(file_search = "icar_out_{ens}*"):
     first_files = glob.glob(file_search.format(ens="000001"))
     first_files.sort()
 
     # For some reason running the parallelization this far out seems to have far worse performance...
     #  would map_async be faster for some reason?  I assume map is still parallel.
     # pool.map(agg_file, first_files)
 
+    aggregate_prep(file_search)
+
     for f in first_files:
         agg_file(f)
 
@@ -176,7 +281,15 @@ def main(file_search = "icar_out_{ens}_*"):
 
 
 def continuous(file_search):
+    '''
+    Runs continuous aggregation of output files
+
+    Returns:
+        None
+    '''
     print("Running continuous aggregation, Ctrl-C to stop")
+    aggregate_prep(file_search)
+
     while True:
         first_files = glob.glob(file_search.format(ens="000001"))
         first_files.sort()

diff --git a/helpers/ccsm/config.py b/helpers/ccsm/config.py
@@ -15,28 +15,28 @@ def set_bounds(info):
     ccsm_file=atm_file.replace("_Y_","2006").replace("_M_","01").replace("_D_","01").replace("_VAR_","hus")
     ccsm_file=glob.glob(ccsm_file)[0]
     varlist=["lat","lon"]
-    
+
     lat=io.read_nc(ccsm_file,varlist[0]).data
     lon=io.read_nc(ccsm_file,varlist[1]).data-360
-    
+
     info.xmin=np.where(lon>=info.lon[0])[0][0]
     info.xmax=np.where(lon<=info.lon[1])[0][-1]+1
     info.ymin=np.where(lat>=info.lat[0])[0][0]
     info.ymax=np.where(lat<=info.lat[1])[0][-1]+1
-    
+
     lon,lat=np.meshgrid(lon[info.xmin:info.xmax],lat[info.ymin:info.ymax])
     info.lat_data=lat
     info.lon_data=lon
-    
+
 def make_timelist(info,hrs=6.0):
     dt=datetime.timedelta(hrs/24)
-    info.ntimes=np.int(np.round((info.end_date-info.start_date).total_seconds()/60./60./hrs))
+    info.ntimes=np.int64(np.round((info.end_date-info.start_date).total_seconds()/60./60./hrs))
     info.times=[info.start_date+dt*i for i in range(info.ntimes)]
 
 def update_info(info):
     make_timelist(info)
     set_bounds(info)
-    
+
 
 def parse():
     parser= argparse.ArgumentParser(description='Convert CCSM files to ICAR input forcing files')
@@ -51,24 +51,24 @@ def parse():
     parser.add_argument('sfcdir',    nargs="?",action='store',help="CCSM surface data file location",    default="ccsm_sfc/")
     parser.add_argument('atmfile',   nargs="?",action='store',help="CCSM atmospheric files",             default="_VAR__6hrLev_CCSM4_rcp85_r6i1p1__Y__M__D_00-*.nc")
     parser.add_argument('sfcfile',   nargs="?",action='store',help="CCSM surface files",                 default="_VAR__3hr_CCSM4_rcp85_r6i1p1__Y__M__D_0000-*.nc")
-    
+
     parser.add_argument('-v', '--version',action='version',
             version='CCSM2ICAR v'+version)
     parser.add_argument ('--verbose', action='store_true',
             default=False, help='verbose output', dest='verbose')
     args = parser.parse_args()
-    
+
     date0=args.start_date.split("-")
     start_date=datetime.datetime(int(date0[0]),int(date0[1]),int(date0[2]))
 
     date0=args.end_date.split("-")
     end_date=datetime.datetime(int(date0[0]),int(date0[1]),int(date0[2]))
-    
+
     info=Bunch(lat=[float(args.lat_s),float(args.lat_n)],
                lon=[float(args.lon_w),float(args.lon_e)],
                start_date=start_date,         end_date=end_date,
                atmdir=args.dir+args.atmdir,   sfcdir=args.dir+args.sfcdir,
                atmfile=args.atmfile,          sfcfile=args.sfcfile,
                version=version)
-    
+
     return info