Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
52cef78
cleanup: no longer using travis ci
scrasmussen Apr 5, 2024
cd739e3
Adding restarted_from global attribute to indicate at what point a ru…
scrasmussen Apr 5, 2024
a4668cb
Small fixes and removing pandas dependency from files for generating …
scrasmussen Apr 5, 2024
213fd18
Adding makefile to test directory to make generating and running idea…
scrasmussen Apr 5, 2024
44303eb
Adding restarted_from global attribute to NetCDF files to help make s…
scrasmussen Apr 8, 2024
58639ed
Added a prep step to the aggregation. Checks the restarted_from globa…
scrasmussen Apr 8, 2024
da8d3e2
Add post target to makefile in ideal testing directory
scrasmussen Apr 8, 2024
8445831
Adding requirement files and README instructions for Python tools
scrasmussen Apr 8, 2024
ec8262d
Ignoring git status of generated and NetCDF files produced by ideal t…
scrasmussen Apr 8, 2024
74801b1
do aggregate prep during continous runs
scrasmussen Apr 8, 2024
7710b32
aggregate script now deals with case if no restarted_from attribute e…
scrasmussen Apr 8, 2024
8d2461c
Small Tiedtke spelling fix
scrasmussen Apr 9, 2024
2f67418
Calling xarray function instead of pandas to remove the pandas requir…
scrasmussen Apr 19, 2024
bb5851d
Removing Python matrix-nio package from requirements and adding matpl…
scrasmussen Apr 19, 2024
7521d70
Calling aggregate_prep earlier in continuous function. If outputted f…
scrasmussen Apr 19, 2024
7bbfc60
Outputted files have a new init function that calls the three functio…
scrasmussen Apr 19, 2024
68df783
Update to README on how to setup PYTHONPATH and what scripts use depr…
scrasmussen Apr 23, 2024
652b436
updating comments
scrasmussen Apr 29, 2024
c76f98d
Updating helper/README and Python tools dependecy files, removed Pandas
scrasmussen Apr 29, 2024
ff80fdf
switching prefix back to file_search string
scrasmussen Jun 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ docs/doxygen_sqlite3.db
*~
*.pyc
*.ipynb

# ignore files generated during ideal testing
tests/*.nc
tests/icar_options.nml
25 changes: 0 additions & 25 deletions .travis.yml

This file was deleted.

4 changes: 2 additions & 2 deletions docs/settings_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ This optional namelist specifies various parameters that are used in the advecti
water=2, ! 1=use prescribed (w/lsm=1) 2=Simple sea surface fluxes
mp = 1, ! 1=Thompson 2=Simple (SB04) 3=Morrison (wishlist)
rad = 0, ! 1=use prescribed fluxes 2=Simple (empirical) 3=RRTMG (wishlist)
conv= 0, ! 1=Tiedke Scheme 2=Simple Scheme (wishlist) 3=Kain-Fritsch
conv= 0, ! 1=Tiedtke Scheme 2=Simple Scheme (wishlist) 3=Kain-Fritsch
adv = 1, ! 1=Upwind 2=MPDATA 3=Adams-Bashforth (wishlist)
wind= 1 ! 1=Linear Theory 2=INFORM style (wishlist) 3=Dynamical? (wishlist)
/
Expand Down Expand Up @@ -274,7 +274,7 @@ This optional namelist specifies various parameters that are used in the advecti
swdown_var = "SWDOWN", ! Shortwave down [W/m^2]
lwdown_var = "GLW", ! Longwave down [W/m^2]

! only required for some physics code (Noah LSM, water, Tiedke, KF(?))
! only required for some physics code (Noah LSM, water, Tiedtke, KF(?))
landvar = "LANDMASK", ! land-water mask (as in WRF) 1=land, 0 or 2=water

! NOTE, these variables should be in the high-resolution initial conditions netcdf file
Expand Down
30 changes: 30 additions & 0 deletions helpers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Install Python Dependencies
The following instructions and dependecy files work for the core ICAR scripts.
Tools in `make_domain.py` and ccsm, cesm, cmip, erai, and wrf directories will require the mygis packages as well.
The Python script `ideal_linear.py` will require Nio to be installed with `pip install nio`.

## Setup Environment
### Install With Conda
```bash
$ conda env create -f environment.yml --prefix /path/to/install/icar_env
$ conda activate icar_env

set PYTHONPATH, this will be saved for future use by the environment
$ conda env config vars set PYTHONPATH=$(pwd)/lib:$PYTHONPATH

reactivate environment
$ conda activate icar_env
```

### Install With Pip
```bash
$ pip install -r requirements.txt
```
Make sure the `lib` directory is in the `PYTHONPATH`, add to `.bashrc` or other startup files for repeat use.
```bash
$ export PYTHONPATH=$(pwd)/lib:$PYTHONPATH
```


## Deprecated Scripts
The [Nio package](https://www.pyngl.ucar.edu/Nio.shtml) used in `create_geo_testfiles.py` is not installed as a dependency.
123 changes: 118 additions & 5 deletions helpers/aggregate_parallel_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import os
import glob
import re
import time
# import multiprocessing as mp

Expand All @@ -9,11 +10,10 @@

import sys

# global variables
pool = None

# This should be an input, this is the search string that is assumed to match
# the output files to be aggregated.
file_search = "icar_out_{ens}_*"
no_restarted_from_s = 'No restarted_from Attribute'
date_search_s = '[1-9][0-9][0-9][0-9]-[0-2][0-9]-[0-3][0-9]_[0-9][0-9]-[0-9][0-9]-[0-9][0-9]'


def load_file(file_name):
Expand All @@ -29,6 +29,7 @@ def get_dims(dataset, section="d"):
results.append(int(dataset.attrs[axis + section + position]))
return results


def get_dim_offset(dims):
'''Return x_offset, y_offset
For the staggered dims, offset=1, otherwise offset=0'''
Expand All @@ -40,6 +41,7 @@ def get_dim_offset(dims):

return x_off, y_off


def set_up_dataset(d):
'''Create a dataset to cover the entire domain with the variables present in d

Expand Down Expand Up @@ -159,14 +161,117 @@ def agg_file(first_file, verbose=True):
print(outputfile)
data_set.to_netcdf(outputfile)

def main(file_search = "icar_out_{ens}_*"):

def find_aggregate_from_date(file_search):
'''
Finds the date of the last aggregated file based on a specific prefix and date format.

Returns:
str or False: The date of the last aggregated file if found, otherwise False.
'''
first_files = glob.glob(file_search.format(ens="000001"))
if not first_files:
print("Exiting: no output files matching", file_search.format(ens="000001"))
sys.exit()
first_files.sort()

# following 2000-01-01_00-00-00 format
find_agg_s = file_search.format(ens=date_search_s)
first_agg_files = glob.glob(find_agg_s)
# if there are no aggregated files
if not first_agg_files:
return False #first_files[0]

last_agg_file = first_agg_files[-1]
print("last_agg_file", last_agg_file)

last_agg_file_date = re.findall(date_search_s, last_agg_file)[0]
return last_agg_file_date


def get_restart_from_date(file_search, file_date):
'''
Gets the value of the attribute 'restarted_from' from an output file based on a given date.

Args:
file_date (str): The date used to construct the filename.

Returns:
str: The date from the 'restarted_from' attribute if the run was restarted, otherwise
'No restarted_from Attribute' or 'Not Restarted'
'''
out_filename = file_search.replace('*','').format(ens='000001_') + file_date + '.nc'

ds = xr.open_dataset(out_filename)
try:
restarted_from = ds.attrs['restarted_from']
except:
restarted_from = no_restarted_from_s

return restarted_from


def aggregate_prep(file_search):
'''
Prepares aggregated files based on the current state of the output.

This function determines the latest aggregated file, checks if the current
netcdff output is from a restarted run. If from restarted run will remove
aggregated files from that restarted_from data onward, otherwise let the
user know if files are not from a restarted run or if they do not have the
restarted_from attribute.

Returns:
None
'''
find_agg_s = file_search.format(ens=date_search_s)

# find the last aggregated file, if no aggregated files, no prep needed
agg_from_date = find_aggregate_from_date(file_search)
if agg_from_date == False:
print("No aggregated files")
return

# check the current output's restarted-from date and get list of aggregated files
restarted_from = get_restart_from_date(file_search, agg_from_date)
agg_files = glob.glob(find_agg_s)
agg_files.sort()

remove_from_file = None
# only remove aggregate files from "restarted_from" date onward, otherwise
# print warnings
if restarted_from == 'Not Restarted':
print("Note: output files not from a restart run")
# print("Outputted files not from restart run, removing all aggregated files")
# remove_from_file = agg_files[0]
elif restarted_from == no_restarted_from_s:
print("Note: output files do not have 'restarted_from' attribute")
# print("Output files do not have 'restarted_from' attribute, removing all aggregated files")
# remove_from_file = agg_files[0]
else:
# delete every aggregated file from restarted_from date on
print("Recreating aggregated files from", restarted_from, "onward")
remove_from_file = prefix + restarted_from + '.nc'

# remove files
start_deleting = False
for f in agg_files:
if f == remove_from_file:
start_deleting = True
if start_deleting:
os.remove(f)


def main(file_search = "icar_out_{ens}*"):
first_files = glob.glob(file_search.format(ens="000001"))
first_files.sort()

# For some reason running the parallelization this far out seems to have far worse performance...
# would map_async be faster for some reason? I assume map is still parallel.
# pool.map(agg_file, first_files)

aggregate_prep(file_search)

for f in first_files:
agg_file(f)

Expand All @@ -176,7 +281,15 @@ def main(file_search = "icar_out_{ens}_*"):


def continuous(file_search):
'''
Runs continuous aggregation of output files

Returns:
None
'''
print("Running continuous aggregation, Ctrl-C to stop")
aggregate_prep(file_search)

while True:
first_files = glob.glob(file_search.format(ens="000001"))
first_files.sort()
Expand Down
20 changes: 10 additions & 10 deletions helpers/ccsm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,28 @@ def set_bounds(info):
ccsm_file=atm_file.replace("_Y_","2006").replace("_M_","01").replace("_D_","01").replace("_VAR_","hus")
ccsm_file=glob.glob(ccsm_file)[0]
varlist=["lat","lon"]

lat=io.read_nc(ccsm_file,varlist[0]).data
lon=io.read_nc(ccsm_file,varlist[1]).data-360

info.xmin=np.where(lon>=info.lon[0])[0][0]
info.xmax=np.where(lon<=info.lon[1])[0][-1]+1
info.ymin=np.where(lat>=info.lat[0])[0][0]
info.ymax=np.where(lat<=info.lat[1])[0][-1]+1

lon,lat=np.meshgrid(lon[info.xmin:info.xmax],lat[info.ymin:info.ymax])
info.lat_data=lat
info.lon_data=lon

def make_timelist(info,hrs=6.0):
dt=datetime.timedelta(hrs/24)
info.ntimes=np.int(np.round((info.end_date-info.start_date).total_seconds()/60./60./hrs))
info.ntimes=np.int64(np.round((info.end_date-info.start_date).total_seconds()/60./60./hrs))
info.times=[info.start_date+dt*i for i in range(info.ntimes)]

def update_info(info):
make_timelist(info)
set_bounds(info)


def parse():
parser= argparse.ArgumentParser(description='Convert CCSM files to ICAR input forcing files')
Expand All @@ -51,24 +51,24 @@ def parse():
parser.add_argument('sfcdir', nargs="?",action='store',help="CCSM surface data file location", default="ccsm_sfc/")
parser.add_argument('atmfile', nargs="?",action='store',help="CCSM atmospheric files", default="_VAR__6hrLev_CCSM4_rcp85_r6i1p1__Y__M__D_00-*.nc")
parser.add_argument('sfcfile', nargs="?",action='store',help="CCSM surface files", default="_VAR__3hr_CCSM4_rcp85_r6i1p1__Y__M__D_0000-*.nc")

parser.add_argument('-v', '--version',action='version',
version='CCSM2ICAR v'+version)
parser.add_argument ('--verbose', action='store_true',
default=False, help='verbose output', dest='verbose')
args = parser.parse_args()

date0=args.start_date.split("-")
start_date=datetime.datetime(int(date0[0]),int(date0[1]),int(date0[2]))

date0=args.end_date.split("-")
end_date=datetime.datetime(int(date0[0]),int(date0[1]),int(date0[2]))

info=Bunch(lat=[float(args.lat_s),float(args.lat_n)],
lon=[float(args.lon_w),float(args.lon_e)],
start_date=start_date, end_date=end_date,
atmdir=args.dir+args.atmdir, sfcdir=args.dir+args.sfcdir,
atmfile=args.atmfile, sfcfile=args.sfcfile,
version=version)

return info
Loading