diff --git a/CHANGELOG.md b/CHANGELOG.md index 3460b02a..f8201caa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added new constants for default lon/lat and stretched-grid settings in `gcpy/constants.py` - Added PyDoc headers in routines where they were missing - Added `examples/grids/display_gcclassic_grid_info.py` to display info about a GEOS-Chem Classic horizontal grid +- Added functions `get_molwt_from_metadata` and `read_species_metadata` to `gcpy/util.py` +- Added function `get_species_database_files` to `gcpy/benchmark/modules/benchmark_utils.py` +- Added constant `SPECIES_DATABASE` to `gcpy/benchmark/modules/benchmark_utils.py` ### Changed - Modified criteria for terminating read of log files in `benchmark_scrape_gcclassic_timers.py` to avoid being spoofed by output that is attached by Intel VTune @@ -39,16 +42,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Updated default GCPy Python environment to use Python 3.13 (instead of 3.12) - Benchmark routines now look for `species_database.yml` in the `Ref` and `Dev` run directories - Replaced `get_species_database_dir` with `get_species_database_files` in `gcpy/benchmark/modules/benchmark_funcs.py` -- Replaced `spcdb_dir` YAML tag with directory-specific `species_metadata` tags to specify paths to `species_database.yml` files +- Updated `gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py` to look for GCHP timers in `allPEs.log` if not found in the log file +- Updated routine `make_benchmark_aerosol_tables` to include all dust species in the aerosol burdens table ### Fixed - Fixed grid area calculation scripts of `grid_area` in `gcpy/gcpy/cstools.py` - Fixed various security issues in GitHub Actions workflows - Fixed colorbar bounds for case of comparing cubed-sphere grids - Fixed the restart regridding for stretched GCHP when target lat/lon is exactly 0.0 in `gcpy/regrid_restart_file.py` +- Fixed computation of the global AOD benchmark table caused by hardwired species names +- Fixed error in `create_benchmark_emissions_table` where all species were assumed to be in Ref and Dev even if they were not ### Removed - Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0 +- Removed `paths:spcdb_dir` YAML tag in benchmark configuration files +- Removed `st_Ox` from `benchmark_categories.yml`; this species is no longer used in TransportTracers simulations ## [1.6.2] - 2025-06-12 ### Added diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml index a04d74de..187f23fa 100644 --- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml @@ -29,16 +29,17 @@ paths: main_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR} results_dir: BenchmarkResults weights_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR}/weights - spcdb_dir: default # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string # data: ref: diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml index 235730ce..358fa128 100644 --- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml @@ -32,13 +32,15 @@ paths: spcdb_dir: default # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string # data: ref: diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml index 53c927e9..51262623 100644 --- a/gcpy/benchmark/config/1mo_benchmark.yml +++ b/gcpy/benchmark/config/1mo_benchmark.yml @@ -21,24 +21,22 @@ # main_dir: High-level directory containing ref & dev rundirs # results_dir: Directory where plots/tables will be created # weights_dir: Path to regridding weights -# spcdb_dir: Folder in which the species_database.yml file is -# located. If set to "default", then will look for -# species_database.yml in one of the Dev rundirs. # paths: main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holylfs06/LABS/jacob_lab/Shared/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - spcdb_dir: default # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string # data: ref: diff --git a/gcpy/benchmark/config/1yr_ch4_benchmark.yml b/gcpy/benchmark/config/1yr_ch4_benchmark.yml index c4712e98..7711efd1 100644 --- a/gcpy/benchmark/config/1yr_ch4_benchmark.yml +++ b/gcpy/benchmark/config/1yr_ch4_benchmark.yml @@ -21,15 +21,11 @@ # main_dir: High-level directory containing ref & dev rundirs # results_dir: Directory where plots/tables will be created # weights_dir: Path to regridding weights -# spcdb_dir: Folder in which the species_database.yml file is -# located. If set to "default", then will look for -# species_database.yml in one of the Dev rundirs. # paths: main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED weights_dir: /n/holylfs06/LABS/jacob_lab/Shared/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - spcdb_dir: default # # Observational data dirs are on Harvard Cannon, edit if necessary # @@ -43,13 +39,15 @@ paths: site_file: allozonesondes_site_elev.csv # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string # data: ref: diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index da959248..1be8c668 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -21,16 +21,12 @@ # main_dir: High-level directory containing ref & dev rundirs # results_dir: Directory where plots/tables will be created # weights_dir: Path to regridding weights -# spcdb_dir: Folder in which the species_database.yml file is -# located. If set to "default", then will look for -# species_database.yml in one of the Dev rundirs. # obs_data: Paths to observations (for models vs. obs plots) # paths: main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holylfs06/LABS/jacob_lab/Shared/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - spcdb_dir: default # # Observational data dirs are on Harvard Cannon, edit if necessary # @@ -44,13 +40,16 @@ paths: site_file: allozonesondes_site_elev.csv # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string +# # data: ref: @@ -134,15 +133,18 @@ options: plot_drydep: True plot_emis: True plot_jvalues: True + plot_models_vs_obs: True plot_options: by_spc_cat: True by_hco_cat: True # # Benchmark tables # + aer_budget_table: True emis_table: True mass_accum_table: False mass_table: True + Ox_budget_table: True OH_metrics: True ops_budget_table: False sanity_check_table: True diff --git a/gcpy/benchmark/config/1yr_tt_benchmark.yml b/gcpy/benchmark/config/1yr_tt_benchmark.yml index 9f338e62..fbeda520 100644 --- a/gcpy/benchmark/config/1yr_tt_benchmark.yml +++ b/gcpy/benchmark/config/1yr_tt_benchmark.yml @@ -21,24 +21,23 @@ # main_dir: High-level directory containing ref & dev rundirs # results_dir: Directory where plots/tables will be created # weights_dir: Path to regridding weights -# spcdb_dir: Folder in which the species_database.yml file is -# located. If set to "default", then will look for -# species_database.yml in one of the Dev rundirs. # paths: main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holylfs06/LABS/jacob_lab/Shared/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - spcdb_dir: default # # data: Contains configurations for ref and dev runs -# version: Version string (must not contain spaces) -# dir: Path to run directory -# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files -# restarts_subdir: Subdirectory w/ GEOS-Chem restarts -# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) -# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) -# resolution: GCHP resolution string +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# logs_subdir: Subdirectory w/ GEOS-Chem log files +# logs_template: Template for log file names (may include tokens) +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string +# # data: ref: diff --git a/gcpy/benchmark/modules/benchmark_categories.yml b/gcpy/benchmark/modules/benchmark_categories.yml index 0d12b21b..6183d35a 100644 --- a/gcpy/benchmark/modules/benchmark_categories.yml +++ b/gcpy/benchmark/modules/benchmark_categories.yml @@ -256,7 +256,6 @@ TransportTracersBenchmark: - e90_n - e90_s - st80_25 - - stOX WetLossConv: WetLossConv: - Pb210 diff --git a/gcpy/benchmark/modules/benchmark_drydep.py b/gcpy/benchmark/modules/benchmark_drydep.py index 948067d8..ae23fc99 100644 --- a/gcpy/benchmark/modules/benchmark_drydep.py +++ b/gcpy/benchmark/modules/benchmark_drydep.py @@ -18,6 +18,7 @@ def make_benchmark_drydep_plots( refstr, dev, devstr, + spcdb_files, collection="DryDep", dst="./benchmark", subdst=None, @@ -30,7 +31,6 @@ def make_benchmark_drydep_plots( n_job=-1, time_mean=False, varlist=None, - spcdb_dir=None, ): """ Creates six-panel comparison plots (PDF format) from GEOS-Chem @@ -48,6 +48,8 @@ def make_benchmark_drydep_plots( data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): collection : str @@ -79,9 +81,6 @@ def make_benchmark_drydep_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -89,10 +88,6 @@ def make_benchmark_drydep_plots( List of variables to plot. If varlist is None, then all common variables in Ref & Dev will be plotted. """ - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The spcdb_dir argument has not been specified!" - raise ValueError(msg) # Replace whitespace in the ref and dev labels refstr = util.replace_whitespace(refstr) @@ -144,7 +139,7 @@ def make_benchmark_drydep_plots( sigdiff_list=sigdiff_list, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) util.add_bookmarks_to_pdf( pdfname, diff --git a/gcpy/benchmark/modules/benchmark_funcs.py b/gcpy/benchmark/modules/benchmark_funcs.py index 5573fa10..e7ab44b3 100644 --- a/gcpy/benchmark/modules/benchmark_funcs.py +++ b/gcpy/benchmark/modules/benchmark_funcs.py @@ -1,10 +1,10 @@ +#!/usr/bin/env python3 """ Specific utilities for creating plots from GEOS-Chem benchmark simulations. """ import os import warnings import itertools -from distutils.version import LooseVersion import gc import numpy as np import pandas as pd @@ -13,10 +13,16 @@ import sparselt.xr from joblib import Parallel, delayed from tabulate import tabulate -from gcpy import util from gcpy.regrid import create_regridders from gcpy.grid import get_troposphere_mask -from gcpy.util import replace_whitespace +from gcpy.util import \ + add_bookmarks_to_pdf, add_missing_variables, add_nested_bookmarks_to_pdf, \ + array_equals, compare_varnames, create_blank_dataarray, dataset_reader, \ + dataset_mean, divide_dataset_by_dataarray, get_area_from_dataset, \ + get_emissions_varnames, get_filepath, get_variables_from_dataset, \ + insert_text_into_file, make_directory, print_totals, read_config_file, \ + read_species_metadata, rename_and_flip_gchp_rst_vars, \ + replace_whitespace, unique_values, verify_variable_type, wrap_text from gcpy.units import convert_units from gcpy.constants import \ COL_WIDTH, ENCODING, MW_AIR_g, SKIP_THESE_VARS, TABLE_WIDTH @@ -37,13 +43,13 @@ def create_total_emissions_table( devdata, devstr, species, + spcdb_files, outfilename, ref_interval=[2678400.0], dev_interval=[2678400.0], template="Emis{}_", refmetdata=None, devmetdata=None, - spcdb_dir=None, ): """ Creates a table of emissions totals (by sector and by inventory) @@ -70,6 +76,8 @@ def create_total_emissions_table( { species_name: target_unit", etc. } where "species_name" and "target_unit" are strs. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs outfilename: str Name of the text file which will contain the table of emissions totals. @@ -105,23 +113,25 @@ def create_total_emissions_table( devmetdata: xarray dataset Dataset containing dev meteorology and area Default value: None - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None Remarks: This method is mainly intended for model benchmarking purposes, rather than as a general-purpose tool. - Species properties (such as molecular weights) are read from a + Species metadata (such as molecular weights) are read from a YAML file called "species_database.yml". """ # ================================================================== # Initialization # ================================================================== - util.verify_variable_type(refdata, xr.Dataset) - util.verify_variable_type(devdata, xr.Dataset) + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(refstr, str) + verify_variable_type(devdata, xr.Dataset) + verify_variable_type(devstr, str) + verify_variable_type(species, dict) + verify_variable_type(spcdb_files, list) + verify_variable_type(outfilename, str) # Get ref area [m2] if "AREA" in refdata.data_vars.keys(): @@ -143,17 +153,10 @@ def create_total_emissions_table( "dataset containing Met_AREAM2 is not passed!" raise ValueError(msg) - # Load a YAML file containing species properties (such as - # molecular weights), which we will need for unit conversions. - # This is located in the "data" subfolder of this folder where - # this benchmark.py file is found. - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - properties = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), + # Read the species database files in the Ref & Dev rundirs, + # and return a dict for each containing the species metadata. + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, quiet=True ) @@ -165,24 +168,24 @@ def create_total_emissions_table( # Get the list of emission variables for which we will print totals # ================================================================== - # Make sure that Ref and Dev datasets have the same variables. - # Variables that are in Ref but not in Dev will be added to Dev - # with all missing values (NaNs). And vice-versa. - [refdata, devdata] = util.add_missing_variables(refdata, devdata) - # Find all common variables between the two datasets # and get the lists of variables only in Ref and only in Dev, - vardict = util.compare_varnames(refdata, devdata, quiet=True) + vardict = compare_varnames(refdata, devdata, quiet=True) cvars = vardict["commonvars"] refonly = vardict["refonly"] devonly = vardict["devonly"] + # Make sure that Ref and Dev datasets have the same variables. + # Variables that are in Ref but not in Dev will be added to Dev + # with all missing values (NaNs). And vice-versa. + [refdata, devdata] = add_missing_variables(refdata, devdata) + # ================================================================= # Open the file for output # ================================================================= # Create file try: - f = open(outfilename, "w") + f = open(outfilename, "w", encoding=ENCODING) except (IOError, OSError, FileNotFoundError) as e: raise e(f"Could not open {outfilename} for writing!") from e @@ -201,7 +204,7 @@ def create_total_emissions_table( # Get a list of emission variable names for each species diagnostic_template = template.replace("{}", species_name) - varnames = util.get_emissions_varnames(cvars, diagnostic_template) + varnames = get_emissions_varnames(cvars, diagnostic_template) # Also add variables that might be in either Ref or Dev # but not the other. This will allow us to print totals @@ -262,12 +265,11 @@ def create_total_emissions_table( else: spc_name = species_name - # Get a list of properties for the given species - species_properties = properties.get(spc_name) - - # If no properties are found, then skip to next species - if species_properties is None: - print(f"No properties found for {spc_name} ... skippping") + # Get metadata for the given species + ref_species_metadata = ref_metadata.get(spc_name) + dev_species_metadata = dev_metadata.get(spc_name) + if ref_species_metadata is None and dev_species_metadata is None: + print(f"No metadata found for {spc_name} ... skipping") continue # Convert units of Ref and Dev and save to numpy ndarray objects @@ -278,14 +280,14 @@ def create_total_emissions_table( refarray = convert_units( refdata[v], spc_name, - species_properties, + ref_species_metadata, target_units, interval=ref_interval, area_m2=refarea, ) # Set Dev to NaN (missing values) everywhere - devarray = util.create_blank_dataarray( + devarray = create_blank_dataarray( name=refdata[v].name, sizes=devdata.sizes, coords=devdata.coords, @@ -298,14 +300,14 @@ def create_total_emissions_table( devarray = convert_units( devdata[v], spc_name, - species_properties, + dev_species_metadata, target_units, interval=dev_interval, area_m2=devarea, ) # Set Ref to NaN (missing values) everywhere - refarray = util.create_blank_dataarray( + refarray = create_blank_dataarray( name=devdata[v].name, sizes=refdata.sizes, coords=refdata.coords, @@ -318,7 +320,7 @@ def create_total_emissions_table( refarray = convert_units( refdata[v], spc_name, - species_properties, + ref_species_metadata, target_units, interval=ref_interval, area_m2=refarea, @@ -326,7 +328,7 @@ def create_total_emissions_table( devarray = convert_units( devdata[v], spc_name, - species_properties, + dev_species_metadata, target_units, interval=dev_interval, area_m2=devarea, @@ -335,7 +337,7 @@ def create_total_emissions_table( # ========================================================== # Print emission totals for Ref and Dev # ========================================================== - util.print_totals( + print_totals( refarray, devarray, f, @@ -354,7 +356,7 @@ def create_total_emissions_table( f.close() # Reopen file and replace placeholder with list of diffs - util.insert_text_into_file( + insert_text_into_file( filename=outfilename, search_text=placeholder, replace_text=diff_list_to_text( @@ -372,10 +374,10 @@ def create_global_mass_table( varlist, met_and_masks, label, + spcdb_files, trop_only=False, outfilename="GlobalMass_TropStrat.txt", verbose=False, - spcdb_dir=None, ): """ Creates a table of global masses for a list of species in contained in @@ -402,6 +404,8 @@ def create_global_mass_table( label: str Label to go in the header string. Can be used to pass the month & year. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): trop_only: bool @@ -416,23 +420,20 @@ def create_global_mass_table( Set this switch to True if you wish to print out extra informational messages. Default value: False - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None Remarks: This method is mainly intended for model benchmarking purposes, rather than as a general-purpose tool. - Species properties (such as molecular weights) are read from a + Species metadata (such as molecular weights) are read from a YAML file called "species_database.yml". """ # ================================================================== # Initialization # ================================================================== - util.verify_variable_type(refdata, xr.Dataset) - util.verify_variable_type(devdata, xr.Dataset) + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(devdata, xr.Dataset) # Make sure required arguments are passed if varlist is None: @@ -440,16 +441,10 @@ def create_global_mass_table( if met_and_masks is None: raise ValueError('The "met_and_masks" argument was not passed!') - # Load a YAML file containing species properties (such as - # molecular weights), which we will need for unit conversions. - # This is located in the "data" subfolder of this current directory.2 - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - properties = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for each. + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, quiet=True ) @@ -463,7 +458,7 @@ def create_global_mass_table( # Create file try: - f = open(outfilename, "w") + f = open(outfilename, "w", encoding=ENCODING) except (IOError, OSError, FileNotFoundError) as e: raise e(f"Could not open {outfilename} for writing!") from e @@ -505,25 +500,17 @@ def create_global_mass_table( # Get the species name spc_name = v.split("_")[1] - # Get a list of properties for the given species - species_properties = properties.get(spc_name) - - # If no properties are found, then skip to next species - if species_properties is None: + # Get metadta for the given species + ref_species_metadata = ref_metadata.get(spc_name) + dev_species_metadata = dev_metadata.get(spc_name) + if ref_species_metadata is None and dev_species_metadata is None: if verbose: - msg = f"No properties found for {spc_name} ... skippping" + msg = f"No metadata found for {spc_name} ... skippping" print(msg) continue # Specify target units target_units = "Gg" - mol_wt_g = species_properties.get("MW_g") - if mol_wt_g is None: - if verbose: - msg = \ - f"No molecular weight found for {spc_name} ... skippping" - print(msg) - continue # ============================================================== # Convert units of Ref and save to a DataArray @@ -534,7 +521,7 @@ def create_global_mass_table( refarray = convert_units( refarray, spc_name, - species_properties, + ref_species_metadata, target_units, area_m2=met_and_masks["Ref_Area"], delta_p=met_and_masks["Ref_Delta_P"], @@ -550,7 +537,7 @@ def create_global_mass_table( devarray = convert_units( devarray, spc_name, - species_properties, + dev_species_metadata, target_units, area_m2=met_and_masks["Dev_Area"], delta_p=met_and_masks["Dev_Delta_P"], @@ -559,10 +546,10 @@ def create_global_mass_table( # ============================================================== # Print global masses for Ref and Dev - # (we will mask out tropospheric boxes in util.print_totals) + # (we will mask out tropospheric boxes in print_totals) # ============================================================== if trop_only: - util.print_totals( + print_totals( refarray, devarray, f, @@ -570,7 +557,7 @@ def create_global_mass_table( masks=met_and_masks ) else: - util.print_totals( + print_totals( refarray, devarray, f, @@ -585,7 +572,7 @@ def create_global_mass_table( f.close() # Reopen file and replace placeholder text by diff_text - util.insert_text_into_file( + insert_text_into_file( filename=outfilename, search_text=placeholder, replace_text=diff_list_to_text( @@ -610,10 +597,10 @@ def create_mass_accumulation_table( varlist, met_and_masks, label, + spcdb_files, trop_only=False, outfilename="GlobalMassAccum_TropStrat.txt", verbose=False, - spcdb_dir=None, ): """ Creates a table of global mass accumulation for a list of species in @@ -648,6 +635,8 @@ def create_mass_accumulation_table( label: str Label to go in the header string. Can be used to pass the month & year. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): trop_only: bool @@ -662,25 +651,22 @@ def create_mass_accumulation_table( Set this switch to True if you wish to print out extra informational messages. Default value: False - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None Remarks: This method is mainly intended for model benchmarking purposes, rather than as a general-purpose tool. - Species properties (such as molecular weights) are read from a + Species metadata (such as molecular weights) are read from a YAML file called "species_database.yml". """ # ================================================================== # Initialization # ================================================================== - util.verify_variable_type(refdatastart, xr.Dataset) - util.verify_variable_type(refdataend, xr.Dataset) - util.verify_variable_type(devdatastart, xr.Dataset) - util.verify_variable_type(devdataend, xr.Dataset) + verify_variable_type(refdatastart, xr.Dataset) + verify_variable_type(refdataend, xr.Dataset) + verify_variable_type(devdatastart, xr.Dataset) + verify_variable_type(devdataend, xr.Dataset) # Make sure required arguments are passed if varlist is None: @@ -688,17 +674,10 @@ def create_mass_accumulation_table( if met_and_masks is None: raise ValueError('The "met_and_masks" argument was not passed!') - # Load a YAML file containing species properties (such as - # molecular weights), which we will need for unit conversions. - # This is located in the "data" subfolder of this current directory.2 - # Make sure the species database folder is passed - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - properties = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for each. + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, quiet=True ) @@ -712,7 +691,7 @@ def create_mass_accumulation_table( # Create file try: - f = open(outfilename, "w") + f = open(outfilename, "w", encoding=ENCODING) except (IOError, OSError, FileNotFoundError) as e: raise e(f"Could not open {outfilename} for writing!") from e @@ -723,7 +702,7 @@ def create_mass_accumulation_table( title1 = f"### Global mass accumulation (Gg) {label} (Trop + Strat)" if trop_only: title1 = f"### Global mass accumulation (Gg) {label} (Trop only)" - title2 = f"### Computed as change in instantaneous mass across period" + title2 = "### Computed as change in instantaneous mass across period" title3 = f"### Ref = {refstr}" title4 = f"### Dev = {devstr}" title5 = f"### Ref period: {refperiodstr}" @@ -762,11 +741,10 @@ def create_mass_accumulation_table( # Get the species name spc_name = v.split("_")[1] - # Get a list of properties for the given species - species_properties = properties.get(spc_name) - - # If no properties are found, then skip to next species - if species_properties is None: + # Get a list of metadata for the given species + ref_species_metadata = ref_metadata.get(spc_name) + dev_species_metadata = dev_metadata.get(spc_name) + if ref_species_metadata is None and dev_species_metadata is None: if verbose: msg = f"No properties found for {spc_name} ... skippping" print(msg) @@ -774,13 +752,6 @@ def create_mass_accumulation_table( # Specify target units target_units = "Gg" - mol_wt_g = species_properties.get("MW_g") - if mol_wt_g is None: - if verbose: - msg = \ - f"No molecular weight found for {spc_name} ... skippping" - print(msg) - continue # ============================================================== # Convert units of Ref and save to a DataArray @@ -791,7 +762,7 @@ def create_mass_accumulation_table( refarrays = convert_units( refarrays, spc_name, - species_properties, + ref_species_metadata, target_units, area_m2=met_and_masks["Refs_Area"], delta_p=met_and_masks["Refs_Delta_P"], @@ -803,7 +774,7 @@ def create_mass_accumulation_table( refarraye = convert_units( refarraye, spc_name, - species_properties, + ref_species_metadata, target_units, area_m2=met_and_masks["Refe_Area"], delta_p=met_and_masks["Refe_Delta_P"], @@ -822,7 +793,7 @@ def create_mass_accumulation_table( devarrays = convert_units( devarrays, spc_name, - species_properties, + dev_species_metadata, target_units, area_m2=met_and_masks["Devs_Area"], delta_p=met_and_masks["Devs_Delta_P"], @@ -835,7 +806,7 @@ def create_mass_accumulation_table( devarraye = convert_units( devarraye, spc_name, - species_properties, + dev_species_metadata, target_units, area_m2=met_and_masks["Deve_Area"], delta_p=met_and_masks["Deve_Delta_P"], @@ -847,11 +818,11 @@ def create_mass_accumulation_table( # ============================================================== # Print global masses for Ref and Dev - # (we will mask out tropospheric boxes in util.print_totals) + # (we will mask out tropospheric boxes in print_totals) # ============================================================== # ewl: for now trop_only is always false for accumulation table if trop_only: - util.print_totals( + print_totals( refarray, devarray, f, @@ -859,7 +830,7 @@ def create_mass_accumulation_table( masks=met_and_masks ) else: - util.print_totals( + print_totals( refarray, devarray, f, @@ -874,7 +845,7 @@ def create_mass_accumulation_table( f.close() # Reopen file and replace placeholder text by diff_text - util.insert_text_into_file( + insert_text_into_file( filename=outfilename, search_text=placeholder, replace_text=diff_list_to_text( @@ -892,6 +863,7 @@ def make_benchmark_conc_plots( refstr, dev, devstr, + spcdb_files, dst="./benchmark", subdst=None, overwrite=False, @@ -915,7 +887,6 @@ def make_benchmark_conc_plots( second_ref=None, second_dev=None, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots of species concentration @@ -932,6 +903,8 @@ def make_benchmark_conc_plots( data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): dst: str @@ -1017,9 +990,6 @@ def make_benchmark_conc_plots( diff-of-diffs plotting. This dataset should have the same model type and grid as ref. Default value: None - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -1031,13 +1001,14 @@ def make_benchmark_conc_plots( # ================================================================== # Initialization and data read # ================================================================== - - # Make sure the species database folder is passed - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") + verify_variable_type(ref, (str, list)) + verify_variable_type(refstr, str) + verify_variable_type(dev, (str, list)) + verify_variable_type(devstr, str) + verify_variable_type(spcdb_files, list) # Create the destination folder - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Define extra title text (usually a date string) # for the top-title of the plot @@ -1051,7 +1022,7 @@ def make_benchmark_conc_plots( devstr = replace_whitespace(devstr) # Pick the proper function to read the data - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Open datasets refds = reader(ref, drop_variables=SKIP_THESE_VARS).load() @@ -1148,12 +1119,12 @@ def make_benchmark_conc_plots( # Compute the annual mean of datasets (if necessary) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) - refmetds = util.dataset_mean(refmetds) - devmetds = util.dataset_mean(devmetds) - second_ref = util.dataset_mean(second_refds) - second_dev = util.dataset_mean(second_devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) + refmetds = dataset_mean(refmetds) + devmetds = dataset_mean(devmetds) + second_ref = dataset_mean(second_refds) + second_dev = dataset_mean(second_devds) # Create regridding files if necessary while not in parallel loop [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] @@ -1183,62 +1154,91 @@ def make_benchmark_conc_plots( # Keep original units for plots. # ================================================================== if not plot_by_spc_cat: - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) var_prefix = 'SpeciesConcVV_' varlist = [k for k in refds.data_vars.keys() if var_prefix in k] varlist.sort() # Surface pdfname = os.path.join(dst, 'SpeciesConc_Sfc.pdf') - compare_single_level(refds, refstr, devds, devstr, - varlist=varlist, - cmpres=cmpres, - pdfname=pdfname, - use_cmap_RdBu=use_cmap_RdBu, - log_color_scale=log_color_scale, - extra_title_txt=extra_title_txt, - normalize_by_area=normalize_by_area, - weightsdir=weightsdir, - second_ref=second_refds, - second_dev=second_devds, - spcdb_dir=spcdb_dir) - - util.add_bookmarks_to_pdf(pdfname, varlist, remove_prefix=var_prefix, - verbose=verbose) + compare_single_level( + refds, + refstr, + devds, + devstr, + varlist=varlist, + cmpres=cmpres, + pdfname=pdfname, + use_cmap_RdBu=use_cmap_RdBu, + log_color_scale=log_color_scale, + extra_title_txt=extra_title_txt, + normalize_by_area=normalize_by_area, + weightsdir=weightsdir, + second_ref=second_refds, + second_dev=second_devds, + spcdb_files=spcdb_files + ) + + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix=var_prefix, + verbose=verbose + ) + # 500 hPa pdfname = os.path.join(dst, 'SpeciesConc_500hPa.pdf') - compare_single_level(refds, refstr, devds, devstr, - ilev=22, - varlist=varlist, - cmpres=cmpres, - pdfname=pdfname, - use_cmap_RdBu=use_cmap_RdBu, - log_color_scale=log_color_scale, - normalize_by_area=normalize_by_area, - extra_title_txt=extra_title_txt, - weightsdir=weightsdir, - second_ref=second_refds, - second_dev=second_devds, - spcdb_dir=spcdb_dir) - - util.add_bookmarks_to_pdf(pdfname, varlist, remove_prefix=var_prefix, - verbose=verbose) + compare_single_level( + refds, + refstr, + devds, + devstr, + ilev=22, + varlist=varlist, + cmpres=cmpres, + pdfname=pdfname, + use_cmap_RdBu=use_cmap_RdBu, + log_color_scale=log_color_scale, + normalize_by_area=normalize_by_area, + extra_title_txt=extra_title_txt, + weightsdir=weightsdir, + second_ref=second_refds, + second_dev=second_devds, + spcdb_files=spcdb_files + ) + + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix=var_prefix, + verbose=verbose + ) + # Zonal mean pdfname = os.path.join(dst, 'SpeciesConc_ZnlMn.pdf') - compare_zonal_mean(refds, refstr, devds, devstr, - varlist=varlist, - pdfname=pdfname, - use_cmap_RdBu=use_cmap_RdBu, - log_color_scale=log_color_scale, - normalize_by_area=normalize_by_area, - extra_title_txt=extra_title_txt, - weightsdir=weightsdir, - second_ref=second_refds, - second_dev=second_devds, - spcdb_dir=spcdb_dir) - - util.add_bookmarks_to_pdf(pdfname, varlist, remove_prefix=var_prefix, - verbose=verbose) + compare_zonal_mean( + refds, + refstr, + devds, + devstr, + varlist=varlist, + pdfname=pdfname, + use_cmap_RdBu=use_cmap_RdBu, + log_color_scale=log_color_scale, + normalize_by_area=normalize_by_area, + extra_title_txt=extra_title_txt, + weightsdir=weightsdir, + second_ref=second_refds, + second_dev=second_devds, + spcdb_files=spcdb_files + ) + + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix=var_prefix, + verbose=verbose + ) return # ================================================================== @@ -1271,11 +1271,11 @@ def make_benchmark_conc_plots( # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) if diff_of_diffs: - [refds, second_refds] = util.add_missing_variables(refds, second_refds) - [devds, second_devds] = util.add_missing_variables(devds, second_devds) + [refds, second_refds] = add_missing_variables(refds, second_refds) + [devds, second_devds] = add_missing_variables(devds, second_devds) # Collection prefix if "SpeciesConc" in collection: @@ -1330,7 +1330,7 @@ def createplots(filecat): warninglist.append(varname) continue varlist.append(varname) - if warninglist != []: + if warninglist: msg = f"\n\nWarning: variables in {filecat} category not in dataset: {warninglist}" print(msg) @@ -1373,13 +1373,16 @@ def createplots(filecat): second_ref=second_refds, second_dev=second_devds, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) diff_sfc[:] = [v.replace(coll_prefix, "") for v in diff_sfc] cat_diff_dict['sfc'] = diff_sfc - util.add_nested_bookmarks_to_pdf( - pdfname, filecat, catdict, - warninglist, remove_prefix=coll_prefix + add_nested_bookmarks_to_pdf( + pdfname, + filecat, + catdict, + warninglist, + remove_prefix=coll_prefix ) # ----------------------- @@ -1419,14 +1422,17 @@ def createplots(filecat): second_ref=second_refds, second_dev=second_devds, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) diff_500[:] = [v.replace(coll_prefix, "") for v in diff_500] #dict_500[filecat] = diff_500 cat_diff_dict['500'] = diff_500 - util.add_nested_bookmarks_to_pdf( - pdfname, filecat, catdict, - warninglist, remove_prefix=coll_prefix + add_nested_bookmarks_to_pdf( + pdfname, + filecat, + catdict, + warninglist, + remove_prefix=coll_prefix ) # ----------------------- @@ -1465,14 +1471,17 @@ def createplots(filecat): second_ref=second_refds, second_dev=second_devds, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) diff_zm[:] = [v.replace(coll_prefix, "") for v in diff_zm] #dict_zm = diff_zm cat_diff_dict['zm'] = diff_zm - util.add_nested_bookmarks_to_pdf( - pdfname, filecat, catdict, - warninglist, remove_prefix=coll_prefix + add_nested_bookmarks_to_pdf( + pdfname, + filecat, + catdict, + warninglist, + remove_prefix=coll_prefix ) # Strat_ZonalMean plots will use a log-pressure Y-axis, with @@ -1508,11 +1517,14 @@ def createplots(filecat): second_ref=second_refds, second_dev=second_devds, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) - util.add_nested_bookmarks_to_pdf( - pdfname, filecat, catdict, - warninglist, remove_prefix=coll_prefix + add_nested_bookmarks_to_pdf( + pdfname, + filecat, + catdict, + warninglist, + remove_prefix=coll_prefix ) return {filecat: cat_diff_dict} @@ -1545,7 +1557,7 @@ def createplots(filecat): for filename in sigdiff_files: if "sfc" in plots: if "sfc" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: for c, diff_list in dict_sfc.items(): print(f"* {c}: ", file=f, end="") for v in diff_list: @@ -1555,7 +1567,7 @@ def createplots(filecat): if "500hpa" in plots: if "500hpa" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: for c, diff_list in dict_500.items(): print(f"* {c}: ", file=f, end="") for v in diff_list: @@ -1565,7 +1577,7 @@ def createplots(filecat): if "zonalmean" in plots or "zm" in plots: if "zonalmean" in filename or "zm" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: for c, diff_list in dict_zm.items(): print(f"* {c}: ", file=f, end="") for v in diff_list: @@ -1590,6 +1602,7 @@ def make_benchmark_emis_plots( refstr, dev, devstr, + spcdb_files, dst="./benchmark", subdst=None, plot_by_spc_cat=False, @@ -1605,7 +1618,6 @@ def make_benchmark_emis_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots of emissions for model @@ -1624,6 +1636,8 @@ def make_benchmark_emis_plots( data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): dst: str @@ -1689,9 +1703,6 @@ def make_benchmark_emis_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -1707,13 +1718,14 @@ def make_benchmark_emis_plots( # ================================================================= # Initialization and data read # ================================================================= - - # Make sure the species database folder is passed - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") + verify_variable_type(ref, (str,list)) + verify_variable_type(refstr, str) + verify_variable_type(dev, (str,list)) + verify_variable_type(devstr, str) + verify_variable_type(spcdb_files, list) # Create the destination folder - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Create the "Emissions" category folder. If subdst is passed, # then create a sub-folder (needed for the 1-year benchmarks). @@ -1733,7 +1745,7 @@ def make_benchmark_emis_plots( devstr = replace_whitespace(devstr) # Get the function that will read the dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Ref dataset try: @@ -1749,19 +1761,19 @@ def make_benchmark_emis_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) # Create regridding files if necessary while not in parallel loop [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] # Combine 2D and 3D variables into an overall list - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) vars2D = vardict["commonvars2D"] vars3D = vardict["commonvars3D"] varlist = vars2D + vars3D @@ -1804,10 +1816,14 @@ def make_benchmark_emis_plots( extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, + ) + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix="Emis", + verbose=verbose ) - util.add_bookmarks_to_pdf(pdfname, varlist, - remove_prefix="Emis", verbose=verbose) return # Get emissions variables (non-inventory), categories, and species @@ -1877,11 +1893,14 @@ def createfile_hco_cat(c): sigdiff_list=diff_emis, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) - util.add_bookmarks_to_pdf( - pdfname, varnames, remove_prefix="Emis", verbose=verbose + add_bookmarks_to_pdf( + pdfname, + varnames, + remove_prefix="Emis", + verbose=verbose ) # Save the list of quantities with significant differences for # this category into the diff_dict dictionary for use below @@ -1914,7 +1933,7 @@ def createfile_hco_cat(c): if sigdiff_files is not None: for filename in sigdiff_files: if "emis" in filename: - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as f: for c, diff_list in dict_emis.items(): print(f"* {c}: ", file=f, end="") for v in diff_list: @@ -1930,10 +1949,10 @@ def createfile_hco_cat(c): catdict = get_species_categories(benchmark_type) # in case any emissions are skipped (for use in nested pdf bookmarks) - warninglist = ([]) + warninglist = [] # for checking if emissions species not defined in benchmark category # file - allcatspc = ([]) + allcatspc = [] emisdict = {} # used for nested pdf bookmarks # for i, filecat in enumerate(catdict): @@ -1998,10 +2017,14 @@ def createfile_bench_cat(filecat): extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, + ) + add_nested_bookmarks_to_pdf( + pdfname, + filecat, + emisdict, + warninglist ) - util.add_nested_bookmarks_to_pdf( - pdfname, filecat, emisdict, warninglist) return catspc #------------------------------------------------ @@ -2041,6 +2064,7 @@ def make_benchmark_emis_tables( refstr, devlist, devstr, + spcdb_files, dst="./benchmark", benchmark_type="FullChemBenchmark", refmet=None, @@ -2048,7 +2072,6 @@ def make_benchmark_emis_tables( overwrite=False, ref_interval=[2678400.0], dev_interval=[2678400.0], - spcdb_dir=None, ): """ Creates a text file containing emission totals by species and @@ -2067,6 +2090,8 @@ def make_benchmark_emis_tables( (aka "Development") data set devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): dst: str @@ -2097,23 +2122,20 @@ def make_benchmark_emis_tables( is set to [2678400.0], which is the number of seconds in July (our 1-month benchmarking month). Default value: [2678400.0] - spcdb_dir: str - Directory of species_datbase.yml file - Default value: Directory of GCPy code repository """ # ================================================================== # Initialization # ================================================================== - - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) + verify_variable_type(reflist, list) + verify_variable_type(refstr, str) + verify_variable_type(devlist, list) + verify_variable_type(devstr, str) + verify_variable_type(spcdb_files, list) # Create the destination folder - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Create the "Tables" category folder if it does not exist emisdir = os.path.join(dst, "Tables") @@ -2139,35 +2161,31 @@ def make_benchmark_emis_tables( refmetds = None devmetds = None - if LooseVersion(xr.__version__) < LooseVersion("0.15.0"): - refds = xr.open_mfdataset(reflist, drop_variables=SKIP_THESE_VARS) - devds = xr.open_mfdataset(devlist, drop_variables=SKIP_THESE_VARS) - if refmet is not None: - refmetds = xr.open_mfdataset( - refmet, drop_variables=SKIP_THESE_VARS) - if devmet is not None: - devmetds = xr.open_mfdataset( - devmet, drop_variables=SKIP_THESE_VARS) - else: - # , combine="nested", concat_dim="time") - refds = xr.open_mfdataset(reflist, drop_variables=SKIP_THESE_VARS) - # , combine="nested", concat_dim="time") - devds = xr.open_mfdataset(devlist, drop_variables=SKIP_THESE_VARS) - if refmet is not None: - # , combine="nested", concat_dim="time") - refmetds = xr.open_mfdataset( - refmet, drop_variables=SKIP_THESE_VARS) - if devmet is not None: - # , combine="nested", concat_dim="time") - devmetds = xr.open_mfdataset( - devmet, drop_variables=SKIP_THESE_VARS) + refds = xr.open_mfdataset( + reflist, + drop_variables=SKIP_THESE_VARS + ) + devds = xr.open_mfdataset( + devlist, + drop_variables=SKIP_THESE_VARS + ) + if refmet is not None: + refmetds = xr.open_mfdataset( + refmet, + drop_variables=SKIP_THESE_VARS + ) + if devmet is not None: + devmetds = xr.open_mfdataset( + devmet, + drop_variables=SKIP_THESE_VARS + ) # ================================================================== # Create table of emissions # ================================================================== # Emissions species dictionary - spc_dict = util.read_config_file( + spc_dict = read_config_file( os.path.join( os.path.dirname(__file__), EMISSION_SPC @@ -2175,7 +2193,7 @@ def make_benchmark_emis_tables( quiet=True ) species=spc_dict[benchmark_type] - inv_dict = util.read_config_file( + inv_dict = read_config_file( os.path.join( os.path.dirname(__file__), EMISSION_INV @@ -2195,13 +2213,13 @@ def make_benchmark_emis_tables( devds, devstr, species, + spcdb_files, file_emis_totals, ref_interval, dev_interval, template="Emis{}_", refmetdata=refmetds, devmetdata=devmetds, - spcdb_dir=spcdb_dir ) # Create table of emissions by inventory @@ -2211,13 +2229,13 @@ def make_benchmark_emis_tables( devds, devstr, inventories, + spcdb_files, file_inv_totals, ref_interval, dev_interval, template="Inv{}_", refmetdata=refmetds, devmetdata=devmetds, - spcdb_dir=spcdb_dir ) # ------------------------------------------- @@ -2235,6 +2253,7 @@ def make_benchmark_jvalue_plots( refstr, dev, devstr, + spcdb_files, varlist=None, dst="./benchmark", subdst=None, @@ -2250,7 +2269,6 @@ def make_benchmark_jvalue_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots of J-values for model @@ -2267,6 +2285,8 @@ def make_benchmark_jvalue_plots( data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): varlist: list of str @@ -2334,9 +2354,6 @@ def make_benchmark_jvalue_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -2361,20 +2378,15 @@ def make_benchmark_jvalue_plots( # Initialization # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Create the directory for output - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) # Get the function that will read file(s) into a Dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Ref dataset try: @@ -2390,20 +2402,20 @@ def make_benchmark_jvalue_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) # Create regridding files if necessary [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] # Get a list of the 3D variables in both datasets if varlist is None: - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) cmn = vardict["commonvars3D"] # ================================================================== @@ -2425,9 +2437,9 @@ def make_benchmark_jvalue_plots( # JNoon_* are cumulative sums of local noon J-values; we need # to divide these by JNoonFrac to get the average value - refds = util.divide_dataset_by_dataarray(refds, + refds = divide_dataset_by_dataarray(refds, refds["JNoonFrac"], varlist) - devds = util.divide_dataset_by_dataarray(devds, + devds = divide_dataset_by_dataarray(devds, devds["JNoonFrac"], varlist) # Subfolder of dst where PDF files will be printed @@ -2491,10 +2503,10 @@ def make_benchmark_jvalue_plots( sigdiff_list=diff_sfc, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) diff_sfc[:] = [v.replace(prefix, "") for v in diff_sfc] - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=prefix, @@ -2529,11 +2541,16 @@ def make_benchmark_jvalue_plots( sigdiff_list=diff_500, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) diff_500[:] = [v.replace(prefix, "") for v in diff_500] - util.add_bookmarks_to_pdf(pdfname, varlist, - remove_prefix=prefix, verbose=verbose) + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix=prefix, + verbose=verbose + ) + # Full-column zonal mean plots if "zonalmean" in plots: if subdst is not None: @@ -2561,10 +2578,10 @@ def make_benchmark_jvalue_plots( sigdiff_list=diff_zm, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) diff_zm[:] = [v.replace(prefix, "") for v in diff_zm] - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=prefix, @@ -2598,10 +2615,14 @@ def make_benchmark_jvalue_plots( log_color_scale=log_color_scale, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, + ) + add_bookmarks_to_pdf( + pdfname, + varlist, + remove_prefix=prefix, + verbose=verbose ) - util.add_bookmarks_to_pdf(pdfname, varlist, - remove_prefix=prefix, verbose=verbose) # ============================================================== # Write the lists of J-values that have significant differences, @@ -2611,7 +2632,7 @@ def make_benchmark_jvalue_plots( for filename in sigdiff_files: if "sfc" in plots: if "sfc" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: print("* J-Values: ", file=f, end="") for v in diff_sfc: print(f"{v} ", file=f, end="") @@ -2620,7 +2641,7 @@ def make_benchmark_jvalue_plots( if "500" in plots: if "500" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: print("* J-Values: ", file=f, end="") for v in diff_500: print(f"{v} ", file=f, end="") @@ -2629,7 +2650,7 @@ def make_benchmark_jvalue_plots( if "zonalmean" in plots or "zm" in plots: if "zonalmean" in filename or "zm" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: print("* J-Values: ", file=f, end="") for v in diff_zm: print(f"{v} ", file=f, end="") @@ -2649,6 +2670,7 @@ def make_benchmark_collection_2d_var_plots( dev, devstr, colname, + spcdb_files, var_prefix=None, varlist=None, dst="./benchmark", @@ -2661,7 +2683,6 @@ def make_benchmark_collection_2d_var_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF file containing plots comparing all 2D variables in @@ -2680,6 +2701,8 @@ def make_benchmark_collection_2d_var_plots( A string to describe dev (e.g. version number) colname: str Name of file collection, to be used in PDF name + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): var_prefix: str @@ -2729,9 +2752,6 @@ def make_benchmark_collection_2d_var_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -2745,20 +2765,15 @@ def make_benchmark_collection_2d_var_plots( # Initialization # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Create the directory for output - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) # Get the function that will read file(s) into a Dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Ref dataset try: @@ -2774,20 +2789,20 @@ def make_benchmark_collection_2d_var_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) # Create regridding files if necessary [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] # Get a list of the 2D variables in both datasets if varlist is None: - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) cmn = vardict["commonvars2D"] # Get a list of variables @@ -2826,12 +2841,13 @@ def make_benchmark_collection_2d_var_plots( extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, - verbose=verbose) + verbose=verbose + ) # ------------------------------------------- # Clean up @@ -2840,12 +2856,14 @@ def make_benchmark_collection_2d_var_plots( del devds gc.collect() + def make_benchmark_collection_3d_var_plots( ref, refstr, dev, devstr, colname, + spcdb_files, var_prefix=None, varlist=None, dst="./benchmark", @@ -2859,7 +2877,6 @@ def make_benchmark_collection_3d_var_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots comparing all 3D variables in @@ -2878,6 +2895,8 @@ def make_benchmark_collection_3d_var_plots( A string to describe dev (e.g. version number) colname: str Name of file collection, to be used in PDF name + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): var_prefix: str @@ -2930,9 +2949,6 @@ def make_benchmark_collection_3d_var_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -2957,20 +2973,15 @@ def make_benchmark_collection_3d_var_plots( # Initialization # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Create the directory for output - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) # Get the function that will read file(s) into a Dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Ref dataset try: @@ -2986,20 +2997,20 @@ def make_benchmark_collection_3d_var_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) # Create regridding files if necessary [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] # Get a list of the 3D variables in both datasets if varlist is None: - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) cmn = vardict["commonvars3D"] # Get a list of variables @@ -3045,12 +3056,13 @@ def make_benchmark_collection_3d_var_plots( extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, - verbose=verbose) + verbose=verbose + ) # 500hPa plots if "500hpa" in plots: @@ -3074,10 +3086,13 @@ def make_benchmark_collection_3d_var_plots( extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, + ) + add_bookmarks_to_pdf( + pdfname, + varlist, + verbose=verbose ) - util.add_bookmarks_to_pdf(pdfname, varlist, - verbose=verbose) # Full-column zonal mean plots if "zonalmean" in plots: pdfname = os.path.join( @@ -3098,12 +3113,13 @@ def make_benchmark_collection_3d_var_plots( extra_title_txt=extra_title_txt, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, - verbose=verbose) + verbose=verbose + ) # Strat_ZonalMean plots will use a log-pressure Y-axis, with # a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19) @@ -3127,10 +3143,13 @@ def make_benchmark_collection_3d_var_plots( log_color_scale=log_color_scale, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files + ) + add_bookmarks_to_pdf( + pdfname, + varlist, + verbose=verbose ) - util.add_bookmarks_to_pdf(pdfname, varlist, - verbose=verbose) # ------------------------------------------- # Clean up @@ -3139,11 +3158,13 @@ def make_benchmark_collection_3d_var_plots( del devds gc.collect() + def make_benchmark_aod_plots( ref, refstr, dev, devstr, + spcdb_files, varlist=None, dst="./benchmark", subdst=None, @@ -3155,7 +3176,6 @@ def make_benchmark_aod_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots of column aerosol optical @@ -3172,6 +3192,8 @@ def make_benchmark_aod_plots( data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): varlist: list of str @@ -3219,9 +3241,6 @@ def make_benchmark_aod_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False @@ -3230,13 +3249,8 @@ def make_benchmark_aod_plots( # Initialization and also read data # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Create destination plots directory - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Create the "Aerosols" directory as a subfolder of dst. # If subdst is passed, then create a subdirectory of the "Aerosols" @@ -3257,7 +3271,7 @@ def make_benchmark_aod_plots( devstr = replace_whitespace(devstr) # Get the function that will read file(s) into a dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Read the Ref dataset try: @@ -3273,8 +3287,8 @@ def make_benchmark_aod_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) # Create regridding files if necessary [_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)] @@ -3305,17 +3319,17 @@ def make_benchmark_aod_plots( # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refds, devds] = util.add_missing_variables(refds, devds) + [refds, devds] = add_missing_variables(refds, devds) # Find common AOD variables in both datasets # (or use the varlist passed via keyword argument) if varlist is None: - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) cmn3D = vardict["commonvars3D"] varlist = [v for v in cmn3D if "AOD" in v and "_bin" not in v] # Dictionary and list for new display names - newvars = util.read_config_file( + newvars = read_config_file( os.path.join( os.path.dirname(__file__), AOD_SPC @@ -3439,11 +3453,14 @@ def make_benchmark_aod_plots( sigdiff_list=diff_aod, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) diff_aod[:] = [v.replace("Column_AOD_", "") for v in diff_aod] - util.add_bookmarks_to_pdf( - pdfname, newvarlist, remove_prefix="Column_AOD_", verbose=verbose + add_bookmarks_to_pdf( + pdfname, + newvarlist, + remove_prefix="Column_AOD_", + verbose=verbose ) # ================================================================== @@ -3453,7 +3470,7 @@ def make_benchmark_aod_plots( if sigdiff_files is not None: for filename in sigdiff_files: if "sfc" in filename: - with open(filename, "a+") as f: + with open(filename, "a+", encoding=ENCODING) as f: print("* Column AOD: ", file=f, end="") for v in diff_aod: print(f"{v} ", file=f, end="") @@ -3473,13 +3490,13 @@ def make_benchmark_mass_tables( refstr, dev, devstr, + spcdb_files, varlist=None, dst="./benchmark", subdst=None, overwrite=False, verbose=False, label="at end of simulation", - spcdb_dir=None, ref_met_extra=None, dev_met_extra=None ): @@ -3499,6 +3516,8 @@ def make_benchmark_mass_tables( data set will be compared against the "Ref" data set. devstr: str A string to describe dev (e.g. version number) + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): varlist: list of str @@ -3526,9 +3545,6 @@ def make_benchmark_mass_tables( verbose: bool Set this flag to True to print extra informational output. Default value: False. - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None ref_met_extra: str Path to ref Met file containing area data for use with restart files which do not contain the Area variable. @@ -3542,11 +3558,6 @@ def make_benchmark_mass_tables( # Initialization # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) @@ -3582,8 +3593,8 @@ def make_benchmark_mass_tables( # If the data is from a GCHP restart file, rename variables and # flip levels to match the GEOS-Chem Classic naming and level # conventions. Otherwise no changes will be made. - refds = util.rename_and_flip_gchp_rst_vars(refds) - devds = util.rename_and_flip_gchp_rst_vars(devds) + refds = rename_and_flip_gchp_rst_vars(refds) + devds = rename_and_flip_gchp_rst_vars(devds) # ================================================================== # Make sure that all necessary meteorological variables are found @@ -3593,9 +3604,9 @@ def make_benchmark_mass_tables( # Find the area variable in Ref if ref_met_extra is None: - ref_area = util.get_area_from_dataset(refds) + ref_area = get_area_from_dataset(refds) else: - ref_area = util.get_area_from_dataset( + ref_area = get_area_from_dataset( xr.open_dataset( ref_met_extra, drop_variables=SKIP_THESE_VARS @@ -3604,9 +3615,9 @@ def make_benchmark_mass_tables( # Find the area variable in Dev if dev_met_extra is None: - dev_area = util.get_area_from_dataset(devds) + dev_area = get_area_from_dataset(devds) else: - dev_area = util.get_area_from_dataset( + dev_area = get_area_from_dataset( xr.open_dataset( dev_met_extra, drop_variables=SKIP_THESE_VARS @@ -3616,15 +3627,15 @@ def make_benchmark_mass_tables( # Find required meteorological variables in Ref # (or exit with an error if we can't find them) metvar_list = ["Met_DELPDRY", "Met_BXHEIGHT", "Met_TropLev"] - refmet = util.get_variables_from_dataset(refds, metvar_list) - devmet = util.get_variables_from_dataset(devds, metvar_list) + refmet = get_variables_from_dataset(refds, metvar_list) + devmet = get_variables_from_dataset(devds, metvar_list) # ================================================================== # Make sure that all necessary species are found # ================================================================== # Get lists of variables names in datasets - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) commonvars = vardict["commonvars3D"] refonly = vardict['refonly'] devonly = vardict['devonly'] @@ -3704,7 +3715,7 @@ def make_benchmark_mass_tables( label, outfilename=mass_file, verbose=verbose, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) # ================================================================== @@ -3726,7 +3737,7 @@ def make_benchmark_mass_tables( outfilename=mass_file, trop_only=True, verbose=verbose, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) # ------------------------------------------- @@ -3746,13 +3757,13 @@ def make_benchmark_mass_accumulation_tables( dev_end, devstr, devperiodstr, + spcdb_files, varlist=None, dst="./benchmark", subdst=None, overwrite=False, verbose=False, label="at end of simulation", - spcdb_dir=None, ): """ Creates a text file containing global mass totals by species and @@ -3781,6 +3792,8 @@ def make_benchmark_mass_accumulation_tables( A string to describe dev (e.g. version number) devperiodstr: str Dev simulation period start and end + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): varlist: list of str @@ -3808,19 +3821,11 @@ def make_benchmark_mass_accumulation_tables( verbose: bool Set this flag to True to print extra informational output. Default value: False. - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None """ # ================================================================== # Initialization # ================================================================== - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) - # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) @@ -3844,10 +3849,10 @@ def make_benchmark_mass_accumulation_tables( # ================================================================== print('Creating mass accumulation tables from four restart files:') - print(' Ref start: {}'.format(ref_start)) - print(' Ref end: {}'.format(ref_end)) - print(' Dev start: {}'.format(dev_start)) - print(' Dev end: {}'.format(dev_end)) + print(f' Ref start: {ref_start}') + print(f' Ref end: {ref_end}') + print(f' Dev start: {dev_start}') + print(f' Dev end: {dev_end}') # Read data with warnings.catch_warnings(): @@ -3864,10 +3869,10 @@ def make_benchmark_mass_accumulation_tables( # If the data is from a GCHP restart file, rename variables and # flip levels to match the GEOS-Chem Classic naming and level # conventions. Otherwise no changes will be made. - refSds = util.rename_and_flip_gchp_rst_vars(refSds) - refEds = util.rename_and_flip_gchp_rst_vars(refEds) - devSds = util.rename_and_flip_gchp_rst_vars(devSds) - devEds = util.rename_and_flip_gchp_rst_vars(devEds) + refSds = rename_and_flip_gchp_rst_vars(refSds) + refEds = rename_and_flip_gchp_rst_vars(refEds) + devSds = rename_and_flip_gchp_rst_vars(devSds) + devEds = rename_and_flip_gchp_rst_vars(devEds) # Add area to start restart dataset if area in end but not start # Need to consider area variable names used in both GC-Classic and GCHP @@ -3890,27 +3895,27 @@ def make_benchmark_mass_accumulation_tables( warnings.filterwarnings("ignore", category=xr.SerializationWarning) # Find the area variable in Ref - refs_area = util.get_area_from_dataset(refSds) - refe_area = util.get_area_from_dataset(refEds) + refs_area = get_area_from_dataset(refSds) + refe_area = get_area_from_dataset(refEds) # Find the area variable in Dev - devs_area = util.get_area_from_dataset(devSds) - deve_area = util.get_area_from_dataset(devEds) + devs_area = get_area_from_dataset(devSds) + deve_area = get_area_from_dataset(devEds) # Find required meteorological variables in Ref # (or exit with an error if we can't find them) metvar_list = ["Met_DELPDRY", "Met_BXHEIGHT", "Met_TropLev"] - refsmet = util.get_variables_from_dataset(refSds, metvar_list) - refemet = util.get_variables_from_dataset(refEds, metvar_list) - devsmet = util.get_variables_from_dataset(devSds, metvar_list) - devemet = util.get_variables_from_dataset(devEds, metvar_list) + refsmet = get_variables_from_dataset(refSds, metvar_list) + refemet = get_variables_from_dataset(refEds, metvar_list) + devsmet = get_variables_from_dataset(devSds, metvar_list) + devemet = get_variables_from_dataset(devEds, metvar_list) # ================================================================== # Make sure that all necessary species are found # ================================================================== # Get lists of variables names in datasets - vardict = util.compare_varnames(refSds, devSds, quiet=(not verbose)) + vardict = compare_varnames(refSds, devSds, quiet=not verbose) commonvars = vardict["commonvars3D"] refonly = vardict['refonly'] devonly = vardict['devonly'] @@ -4010,7 +4015,7 @@ def make_benchmark_mass_accumulation_tables( label, outfilename=mass_file, verbose=verbose, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) ## ================================================================== @@ -4034,7 +4039,7 @@ def make_benchmark_mass_accumulation_tables( # outfilename=mass_file, # trop_only=True, # verbose=verbose, - # spcdb_dir=spcdb_dir + # spcdb_files=spcdb_files, #) # ------------------------------------------- @@ -4092,7 +4097,7 @@ def make_benchmark_oh_metrics( # ================================================================== # Define destination directory - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) @@ -4111,8 +4116,8 @@ def make_benchmark_oh_metrics( # Get tropopause mask # ================================================================== # Find the area variables in Ref and Dev - #ref_area = util.get_area_from_dataset(refds) - #dev_area = util.get_area_from_dataset(devds) + #ref_area = get_area_from_dataset(refds) + #dev_area = get_area_from_dataset(devds) # Find required meteorological variables in Ref # (or exit with an error if we can't find them) @@ -4124,8 +4129,8 @@ def make_benchmark_oh_metrics( "Met_TropLev", "FracOfTimeInTrop", ] - refmet = util.get_variables_from_dataset(refmetds, metvar_list) - devmet = util.get_variables_from_dataset(devmetds, metvar_list) + refmet = get_variables_from_dataset(refmetds, metvar_list) + devmet = get_variables_from_dataset(devmetds, metvar_list) # Create the mask arrays for the troposphere for Ref and Dev ref_tropmask = get_troposphere_mask(refmetds) @@ -4281,6 +4286,7 @@ def make_benchmark_wetdep_plots( dev, devstr, collection, + spcdb_files, dst="./benchmark", cmpres=None, datestr=None, @@ -4296,7 +4302,6 @@ def make_benchmark_wetdep_plots( weightsdir='.', n_job=-1, time_mean=False, - spcdb_dir=None, ): """ Creates PDF files containing plots of species concentration @@ -4315,6 +4320,8 @@ def make_benchmark_wetdep_plots( A string to describe dev (e.g. version number) collection: str String name of collection to plot comparisons for. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): dst: str @@ -4358,20 +4365,13 @@ def make_benchmark_wetdep_plots( Set to 1 to disable parallel plotting. Value of -1 allows the application to decide. Default value: -1 - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None time_mean : bool Determines if we should average the datasets over time Default value: False """ - # Make sure the species database folder is passed - if spcdb_dir is None: - msg = "The 'spcdb_dir' argument has not been specified!" - raise ValueError(msg) # Create destination plot directory - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Make a collection subdirectory targetdst = os.path.join(dst, collection) @@ -4389,7 +4389,7 @@ def make_benchmark_wetdep_plots( devstr = replace_whitespace(devstr) # Get the function that will read file(s) into a dataset - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = dataset_reader(time_mean, verbose=verbose) # Open datasets refds = reader(ref, drop_variables=SKIP_THESE_VARS) @@ -4405,12 +4405,12 @@ def make_benchmark_wetdep_plots( # Compute mean of data over the time dimension (if time_mean=True) if time_mean: - refds = util.dataset_mean(refds) - devds = util.dataset_mean(devds) + refds = dataset_mean(refds) + devds = dataset_mean(devds) if refmet is not None: - refmetds = util.dataset_mean(refmetds) + refmetds = dataset_mean(refmetds) if devmet is not None: - devmetds = util.dataset_mean(devmetds) + devmetds = dataset_mean(devmetds) # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev @@ -4420,7 +4420,7 @@ def make_benchmark_wetdep_plots( #[refds, devds] = add_missing_variables(refds, devds) # Get list of variables in collection - vardict = util.compare_varnames(refds, devds, quiet=(not verbose)) + vardict = compare_varnames(refds, devds, quiet=not verbose) varlist = [v for v in vardict["commonvars3D"] if collection + "_" in v] varlist.sort() @@ -4446,9 +4446,9 @@ def make_benchmark_wetdep_plots( extra_title_txt=datestr, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=collection + '_', @@ -4476,9 +4476,9 @@ def make_benchmark_wetdep_plots( extra_title_txt=datestr, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=collection + '_', @@ -4508,9 +4508,9 @@ def make_benchmark_wetdep_plots( extra_title_txt=datestr, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files, ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=collection + '_', @@ -4538,9 +4538,9 @@ def make_benchmark_wetdep_plots( normalize_by_area=normalize_by_area, weightsdir=weightsdir, n_job=n_job, - spcdb_dir=spcdb_dir + spcdb_files=spcdb_files ) - util.add_bookmarks_to_pdf( + add_bookmarks_to_pdf( pdfname, varlist, remove_prefix=collection + '_', @@ -4565,10 +4565,10 @@ def make_benchmark_aerosol_tables( devstr, year, days_per_mon, + spcdb_files, dst='./benchmark', overwrite=False, is_gchp=False, - spcdb_dir=None, ): """ Compute FullChemBenchmark aerosol budgets & burdens @@ -4588,6 +4588,8 @@ def make_benchmark_aerosol_tables( The year of the benchmark simulation (e.g. '2016'). days_per_month: list of int List of number of days per month for all months + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): dst: str @@ -4599,96 +4601,72 @@ def make_benchmark_aerosol_tables( is_gchp: bool Whether datasets are for GCHP Default value: False - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None """ # Create destination directory - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # List of species (and subsets for the trop & strat) - species_list = ["BCPI", "OCPI", "SO4", "DST1", "SALA", "SALC"] + species_list = [ + "BCPI", "OCPI", "SO4", "DST1", "DST2", "DST3", "DST4", + "DSTbin1", "DSTbin2", "DSTbin3", "DSTbin4", "DSTbin5", + "DSTbin6", "DSTbin7", "SALA", "SALC" + ] - # Read the species database - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - spcdb = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) - - # Molecular weights [g mol-1], as taken from the species database - mw = {} - for v in species_list: - mw[v] = spcdb[v]["MW_g"] - mw["Air"] = MW_AIR_g + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for the union of species. + # We'll need properties such as mol. wt. for unit conversions, etc. + _, dev_metadata = read_species_metadata(spcdb_files, quiet=True) # Get the list of relevant AOD diagnostics from a YAML file ifile= AOD_SPC - aod = util.read_config_file( + aod = read_config_file( os.path.join( os.path.dirname(__file__), ifile, ), quiet=True ) - - aod_list = [v for v in aod.keys() if "Dust" in v or "Hyg" in v] - # different names for GCHP - if is_gchp: - aod_list = [v.replace('550nm', 'WL1') for v in aod_list] - # Descriptive names - spc2name = {"BCPI": "Black Carbon", - "DST1": "Dust", - "OCPI": "Organic Carbon", - "SO4": "Sulfate", - "SALA": "Sea Salt (accum)", - "SALC": "Sea Salt (coarse)" - } - - # Read data collections - if LooseVersion(xr.__version__) < LooseVersion("0.15.0"): - ds_aer = xr.open_mfdataset( - devlist_aero, - data_vars=aod_list, - compat='override', - coords='all') - ds_spc = xr.open_mfdataset( - devlist_spc, drop_variables=SKIP_THESE_VARS) - ds_met = xr.open_mfdataset( - devlist_met, drop_variables=SKIP_THESE_VARS) - else: - ds_aer = xr.open_mfdataset( - devlist_aero, - data_vars=aod_list, - compat='override', - coords='all') # , - # combine="nested", concat_dim="time") - ds_spc = xr.open_mfdataset(devlist_spc, - drop_variables=SKIP_THESE_VARS) # , - # combine="nested", concat_dim="time") - ds_met = xr.open_mfdataset(devlist_met, - drop_variables=SKIP_THESE_VARS) # , - # combine="nested", concat_dim="time") + ds_aer = xr.open_mfdataset( + devlist_aero, + drop_variables=SKIP_THESE_VARS + ) + ds_spc = xr.open_mfdataset( + devlist_spc, + drop_variables=SKIP_THESE_VARS + ) + ds_met = xr.open_mfdataset( + devlist_met, + drop_variables=SKIP_THESE_VARS + ) # Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new # naming introduced in GEOS-Chem 14.1.0 ds_spc = rename_speciesconc_to_speciesconcvv(ds_spc) + # Trim species_list to the variables that are only in the dataset + varlist = [ + var for var in species_list if f"SpeciesConcVV_{var}" in ds_spc.data_vars + ] + + # Molecular weights [g mol-1], as taken from the species database + mw = {} + full_names = {} + for var in varlist: + full_names[var] = dev_metadata[var]["FullName"].strip() + mw[var] = dev_metadata[var]["MW_g"] + mw["Air"] = MW_AIR_g + # Get troposphere mask tropmask = get_troposphere_mask(ds_met) # Get number of months n_mon = len(days_per_mon) - # -------------------------------- + # ------------------------------------------------------------------ # Surface area # (kludgey but it works - revisit this) - # -------------------------------- + # ------------------------------------------------------------------ # Get number of vertical levels N_LEVS = ds_spc.dims["lev"] @@ -4710,41 +4688,43 @@ def make_benchmark_aerosol_tables( area_m2[t, k, :, :] = area[t, :, :] total_area_m2 = np.sum(area_m2[0, 0, :, :]) - # ------------------------------ + # ------------------------------------------------------------------ # Conversion factors and time increments - # ------------------------------ + # ------------------------------------------------------------------ # v/v dry --> Tg vv_to_Tg = {} - for spc in species_list: - vv_to_Tg[spc] = ds_met["Met_AD"].values * (mw[spc] / mw["Air"]) * 1e-9 + for var in varlist: + if f"SpeciesConcVV_{var}" in ds_spc.data_vars: + vv_to_Tg[var] = \ + ds_met["Met_AD"].values * (mw[var] / mw["Air"]) * 1e-9 # Days in the benchmark duration days_per_yr = np.sum(days_per_mon) - # ------------------------------ + # ------------------------------------------------------------------ # Define function to print tables - # ------------------------------ - def print_aerosol_metrics(data, species_list, filename, title, label): + # ------------------------------------------------------------------ + def print_aerosol_metrics(data, varlist, namelist, filename, title, label): - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as ofile: # Print top header - print("%" * 79, file=f) - print(f" {title} for {year} in {devstr}", file=f) - print(" (weighted by the number of days per month)", file=f) - print("%" * 79, file=f) - line = "\n" + " " * 40 + "Strat Trop Strat+Trop\n" - line += " " * 40 + "----------- ---------- ----------" - print(line, file=f) + print("%" * 79, file=ofile) + print(f" {title} for {year} in {devstr}", file=ofile) + print(" (weighted by the number of days per month)", file=ofile) + print("%" * 79, file=ofile) + line = "\n" + " " *67 + "Strat Trop Strat+Trop\n" + line += " " * 67 + "----------- ---------- ----------" + print(line, file=ofile) # Print data - for spc in species_list: - line = f"{spc2name[spc] : <17} ({spc : <4}) {label} : {data[spc + '_s']:11.9f} {data[spc + '_t']:10.8f} {data[spc + '_f']:10.8f}\n" - print(line, file=f) + for var in varlist: + line = f"{namelist[var] : <41} ({var : <7}) {label} : {data[var + '_s']:11.9f} {data[var + '_t']:10.8f} {data[var + '_f']:10.8f}\n" + print(line, file=ofile) - # -------------------------------------- + # ------------------------------------------------------------------ # Compute aerosol burdens [Tg] and print - # -------------------------------------- + # ------------------------------------------------------------------ # Table info filename = f"{dst}/Aerosol_Burdens.txt" @@ -4768,29 +4748,50 @@ def print_aerosol_metrics(data, species_list, filename, title, label): sum_axes = (1, 2, 3) # Loop over species - for spc in species_list: + for var in varlist: # Whole-atmosphere and trop-only quantities [g] # NOTE: DryDep is by nature trop-only - varname = "SpeciesConcVV_" + spc - q[spc + "_f"] = ds_spc[varname].values * vv_to_Tg[spc] - q[spc + "_t"] = np.ma.masked_array(q[spc + "_f"], tropmask) + varname = f"SpeciesConcVV_{var}" + q[var + "_f"] = ds_spc[varname].values * vv_to_Tg[var] + q[var + "_t"] = np.ma.masked_array(q[var + "_f"], tropmask) # Compute monthly sums, weighted by the number of days per month - q_sum_f = np.sum(q[spc + "_f"], axis=sum_axes) * days_per_mon - q_sum_t = np.sum(q[spc + "_t"], axis=sum_axes) * days_per_mon + q_sum_f = np.sum(q[var + "_f"], axis=sum_axes) * days_per_mon + q_sum_t = np.sum(q[var + "_t"], axis=sum_axes) * days_per_mon q_sum_s = q_sum_f - q_sum_t # Compute annual averages - burdens[spc + "_f"] = np.sum(q_sum_f) / days_per_yr - burdens[spc + "_t"] = np.sum(q_sum_t) / days_per_yr - burdens[spc + "_s"] = np.sum(q_sum_s) / days_per_yr + burdens[var + "_f"] = np.sum(q_sum_f) / days_per_yr + burdens[var + "_t"] = np.sum(q_sum_t) / days_per_yr + burdens[var + "_s"] = np.sum(q_sum_s) / days_per_yr - print_aerosol_metrics(burdens, species_list, filename, title, label) + print_aerosol_metrics(burdens, varlist, full_names, filename, title, label) - # ------------------------------------------- + # ------------------------------------------------------------------ + # Define function to print tables + # ------------------------------------------------------------------ + def print_aods(data, varlist, filename, title): + + with open(filename, "w+", encoding=ENCODING) as ofile: + + # Print top header + print("%" * 79, file=ofile) + print(f" {title} for {year} in {devstr}", file=ofile) + print(" (weighted by the number of days per month)", file=ofile) + print("%" * 79, file=ofile) + line = "\n" + " " *32 + "Strat Trop Strat+Trop\n" + line += " " * 32 + "----------- ---------- ----------" + print(line, file=ofile) + + # Print data + for var in varlist: + line = f"{var : <4} column optical depth [1]: {data[var + '_s']:11.9f} {data[var + '_t']:10.8f} {data[var + '_f']:10.8f}\n" + print(line, file=ofile) + + # ------------------------------------------------------------------ # Compute average AOD's [Tg] and print - # ------------------------------------------- + # ------------------------------------------------------------------ # Table info filename = f"{dst}/Global_Mean_AOD.txt" @@ -4813,36 +4814,45 @@ def print_aerosol_metrics(data, species_list, filename, title, label): else: sum_axes = (1, 2, 3) + # List of AOD species to plot + varlist = [var for var in aod.keys() if "Dust" in var or "Hyg" in var] + if is_gchp: + varlist = [var.replace('550nm', 'WL1') for var in varlist] + # Loop over AOD variables - for varname in aod_list: + aod_list = [] + for varname in varlist: - # Get the corresponding species name + # Extract the s if "Dust" in varname: - spc = "DST1" + var = "Dust" + elif "Total" in varname: + continue else: - spc = varname.split("_")[1] + var = varname.split("_")[1] + aod_list.append(var) # Whole-atmosphere AOD [1] - q[spc + "_f"] = ds_aer[varname].values + q[var + "_f"] = ds_aer[varname].values # Tropospheric-only AOD [1] - q[spc + "_t"] = np.ma.masked_array(q[spc + "_f"], tropmask) + q[var + "_t"] = np.ma.masked_array(q[var + "_f"], tropmask) # Create monthly sums, weighted by the number of days per month - q_sum_f = np.sum(q[spc + "_f"] * area_m2, axis=sum_axes) * days_per_mon - q_sum_t = np.sum(q[spc + "_t"] * area_m2, axis=sum_axes) * days_per_mon + q_sum_f = np.sum(q[var + "_f"] * area_m2, axis=sum_axes) * days_per_mon + q_sum_t = np.sum(q[var + "_t"] * area_m2, axis=sum_axes) * days_per_mon q_sum_s = q_sum_f - q_sum_t # Take annual averages - aods[spc + "_f"] = np.sum(q_sum_f) / total_area_m2 / days_per_yr - aods[spc + "_t"] = np.sum(q_sum_t) / total_area_m2 / days_per_yr - aods[spc + "_s"] = np.sum(q_sum_s) / total_area_m2 / days_per_yr + aods[var + "_f"] = np.sum(q_sum_f) / total_area_m2 / days_per_yr + aods[var + "_t"] = np.sum(q_sum_t) / total_area_m2 / days_per_yr + aods[var + "_s"] = np.sum(q_sum_s) / total_area_m2 / days_per_yr - print_aerosol_metrics(aods, species_list, filename, title, label) + print_aods(aods, aod_list, filename, title) - # ------------------------------------------- + # ------------------------------------------------------------------ # Clean up - # ------------------------------------------- + # ------------------------------------------------------------------ del ds_aer del ds_spc del ds_met @@ -4856,6 +4866,7 @@ def make_benchmark_operations_budget( devfiles, ref_interval, dev_interval, + spcdb_files, benchmark_type=None, label=None, col_sections=["Full", "Trop", "PBL", "FixedLevs", "Strat"], @@ -4868,7 +4879,6 @@ def make_benchmark_operations_budget( species=None, overwrite=True, verbose=False, - spcdb_dir=None, ): """ Prints the "operations budget" (i.e. change in mass after @@ -4885,6 +4895,8 @@ def make_benchmark_operations_budget( Lists of files to read from "Dev" version. interval: float Number of seconds in the diagnostic interval. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs Keyword Args (optional): benchmark_type: str @@ -4939,9 +4951,6 @@ def make_benchmark_operations_budget( Set this switch to True if you wish to print out extra informational messages. Default value: False - spcdb_dir : str - Directory containing the species_database.yml file. - Default value: None """ # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) @@ -5010,14 +5019,8 @@ def make_benchmark_operations_budget( print('Opening ref and dev data') skip_vars = SKIP_THESE_VARS if annual: - if LooseVersion(xr.__version__) < LooseVersion("0.15.0"): - ref_ds = xr.open_mfdataset(reffiles, drop_variables=skip_vars) - dev_ds = xr.open_mfdataset(devfiles, drop_variables=skip_vars) - else: - # , combine="nested", concat_dim="time") - ref_ds = xr.open_mfdataset(reffiles, drop_variables=skip_vars) - # , combine="nested", concat_dim="time") - dev_ds = xr.open_mfdataset(devfiles, drop_variables=skip_vars) + ref_ds = xr.open_mfdataset(reffiles, drop_variables=skip_vars) + dev_ds = xr.open_mfdataset(devfiles, drop_variables=skip_vars) else: ref_ds = xr.open_dataset(reffiles, drop_variables=skip_vars) dev_ds = xr.open_dataset(devfiles, drop_variables=skip_vars) @@ -5029,7 +5032,7 @@ def make_benchmark_operations_budget( # ------------------------------------------ # Get information about variables in data files - vardict = util.compare_varnames(ref_ds, dev_ds, quiet=True) + vardict = compare_varnames(ref_ds, dev_ds, quiet=True) refonly = vardict["refonly"] devonly = vardict["devonly"] cmnvars = vardict["commonvars2D"] @@ -5095,11 +5098,8 @@ def make_benchmark_operations_budget( # ------------------------------------------ # Load a YAML file containing species properties - spc_properties = util.read_config_file( - os.path.join( - os.path.dirname(__file__), - "species_database.yml" - ), + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, quiet=True ) @@ -5113,9 +5113,13 @@ def make_benchmark_operations_budget( # Identify wetdep species is_wetdep[spc] = None - properties = spc_properties.get(spc) - if properties is not None: - is_wetdep[spc] = properties.get("Is_WetDep") + ref_species_metadata = ref_metadata.get(spc) + dev_species_metadata = dev_metadata.get(spc) + if ref_species_metadata is not None and \ + dev_species_metadata is not None: + is_wetdep[spc] = \ + ref_species_metadata.get("Is_WetDep") or \ + dev_species_metadata.get("Is_WetDep") # Unit conversion factors and units ref_conv_fac[spc] = ref_interval * 1e-6 @@ -5348,15 +5352,10 @@ def make_benchmark_operations_budget( if compute_restart: print('Computing RESTART operation budgets...') - # Load a YAML file containing species properties (such as - # molecular weights), which we will need for unit conversions. - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - properties = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), + # Read the species database files in the Ref & Dev rundirs, + # and return a dict containing metadata for each. + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, quiet=True ) @@ -5380,14 +5379,22 @@ def make_benchmark_operations_budget( # Get ref and dev mass - # Get species properties for unit conversion. If none, skip. - species_properties = properties.get(spc) - if species_properties is None: + # Get species metadata for unit conversion. If none, skip. + ref_species_metadata = ref_metadata.get(spc) + dev_species_metadata = dev_metadata.get(spc) + if ref_species_metadata is None and \ + dev_species_metadata is None: continue - else: - mol_wt_g = species_properties.get("MW_g") - if mol_wt_g is None: - continue + + # Get molecular weights + #ref_mol_wt_g = get_molwt_from_metadata( + # ref_species_metadata, + # spc + #) + #dev_mol_wt_g = get_molwt_from_metadata( + # ref_species_metadata, + # spc + #) # Specify target units target_units = "Gg" @@ -5401,7 +5408,7 @@ def make_benchmark_operations_budget( refarray = convert_units( refarray, spc, - species_properties, + ref_species_metadata, target_units, area_m2=met_and_masks["Ref_Area"], delta_p=met_and_masks["Ref_Delta_P"], @@ -5417,7 +5424,7 @@ def make_benchmark_operations_budget( devarray = convert_units( devarray, spc_name, - species_properties, + dev_species_metadata, target_units, area_m2=met_and_masks["Dev_Area"], delta_p=met_and_masks["Dev_Delta_P"], @@ -5457,11 +5464,11 @@ def make_benchmark_operations_budget( # ------------------------------------------ # Create the target output directory hierarchy if it doesn't already exist - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Print budgets to file filename = f"{dst}/Budgets_After_Operations.txt" - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as f: print("#" * 78, file=f) if label is not None and benchmark_type is not None: print(f"{benchmark_type} budgets for {label}", file=f) @@ -5521,49 +5528,6 @@ def make_benchmark_operations_budget( gc.collect() -def get_species_database_dir(config): - """ - Returns the directory in which the species_database.yml file is - located. If "paths:spcdb_dir" (as specified in the benchmark YAML - configuration file) is None, then it will look for species_database.yml - in a default location (i.e. in one of the Dev folders). - - Args: - ----- - config: dict - Dictionary containing configuration information as read in - from a benchmark configuration YAML file. - - Returns: - -------- - spcdb_dir: str - Path to the directory in which the species_database.yml file - is located. - """ - spcdb_dir = config["paths"]["spcdb_dir"] - if "DEFAULT" in spcdb_dir.upper(): - if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: - spcdb_dir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"] - ) - else: - spcdb_dir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"] - ) - spcdb_path = os.path.join( - spcdb_dir, - "species_database.yml" - ) - if os.path.exists(os.path.join(spcdb_path)): - msg = f"Using species database {spcdb_dir}/species_database.yml" - print(msg) - return spcdb_dir - msg = f"Could not find the {spcdb_dir}/species_database.yml file!" - raise FileNotFoundError(msg) - - def create_benchmark_summary_table( refpath, refstr, @@ -5625,9 +5589,6 @@ def create_benchmark_summary_table( Set this switch to True if you wish to print out extra informational messages. Default value: False - spcdb_dir: str - Directory of species_datbase.yml file - Default value: Directory of GCPy code repository Remarks: This method is mainly intended for model benchmarking purposes, @@ -5646,11 +5607,11 @@ def create_benchmark_summary_table( devstr = replace_whitespace(devstr) # Create the directory for output - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) # Create file try: - f = open(os.path.join(dst, outfilename), "w") + f = open(os.path.join(dst, outfilename), "w", encoding=ENCODING) except (IOError, OSError, FileNotFoundError) as e: msg = f"Could not open {outfilename} for writing!" raise e(msg) from e @@ -5678,7 +5639,7 @@ def create_benchmark_summary_table( skip_vars.append("AREA") # Pick the proper function to read the data - reader = util.dataset_reader( + reader = dataset_reader( multi_files=False, verbose=verbose ) @@ -5688,7 +5649,7 @@ def create_benchmark_summary_table( # Read Ref data refdata = reader( - util.get_filepath( + get_filepath( refpath, col, refdate, @@ -5699,7 +5660,7 @@ def create_benchmark_summary_table( # Get Dev data devdata = reader( - util.get_filepath( + get_filepath( devpath, col, devdate, @@ -5711,13 +5672,13 @@ def create_benchmark_summary_table( # Make sure that Ref and Dev datasets have the same variables. # Variables that are in Ref but not in Dev will be added to Dev # with all missing values (NaNs). And vice-versa. - [refdata, devdata] = util.add_missing_variables( + [refdata, devdata] = add_missing_variables( refdata, devdata ) # Find all common variables between the two datasets - vardict = util.compare_varnames( + vardict = compare_varnames( refdata, devdata, quiet=True @@ -5730,7 +5691,7 @@ def create_benchmark_summary_table( # NOTE: Use 32-point float for comparisons since this is # the precision used for History diagnostics. for v in vardict["commonvarsData"]: - if not util.array_equals( + if not array_equals( refdata[v], devdata[v], dtype=np.float32 @@ -5738,7 +5699,7 @@ def create_benchmark_summary_table( diff_list.append(v) # Drop duplicate values from diff_list - diff_list = util.unique_values(diff_list, drop=[None]) + diff_list = unique_values(diff_list, drop=[None]) if len(diff_list) == 0: print("-" * 79, file=f) @@ -5783,7 +5744,7 @@ def diff_list_to_text( diff_text : str String with concatenated list values. """ - util.verify_variable_type(diff_list, list) + verify_variable_type(diff_list, list) # Use "Dev" and "Ref" for inserting into a header if fancy_format: @@ -5792,7 +5753,7 @@ def diff_list_to_text( # Strip out duplicates from diff_list # Prepare a message about species differences (or alternate msg) - diff_list = util.unique_values(diff_list, drop=[None]) + diff_list = unique_values(diff_list, drop=[None]) # Print the text n_diff = len(diff_list) @@ -5806,7 +5767,7 @@ def diff_list_to_text( # of the string and the '###' at the end of the string, if fancy_format: diff_text = f"### {diff_text : <{TABLE_WIDTH-7}}{'###'}" - diff_text = util.wrap_text( + diff_text = wrap_text( diff_text, width=TABLE_WIDTH ) @@ -5834,8 +5795,8 @@ def diff_of_diffs_toprow_title(config, model): title: str The plot title string for the diff-of-diff """ - util.verify_variable_type(config, dict) - util.verify_variable_type(model, str) + verify_variable_type(config, dict) + verify_variable_type(model, str) if not "gcc" in model and not "gchp" in model: msg = "The 'model' argument must be either 'gcc' or 'gchp'!" raise ValueError(msg) @@ -5902,9 +5863,6 @@ def create_benchmark_sanity_check_table( Set this switch to True if you wish to print out extra informational messages. Default value: False - spcdb_dir: str - Directory of species_datbase.yml file - Default value: Directory of GCPy code repository Remarks: This method is mainly intended for model benchmarking purposes, @@ -5919,23 +5877,23 @@ def create_benchmark_sanity_check_table( devstr = replace_whitespace(devstr) # Create the directory for output (if necessary) - util.make_directory(dst, overwrite) + make_directory(dst, overwrite) outfilename = os.path.join(dst, outfilename) # Pick the proper function to read the data - reader = util.dataset_reader( + reader = dataset_reader( multi_files=False, verbose=verbose ) # Variables to skip - skip_vars = SKIP_THESE_VARS.append("AREA") - + skip_vars = [SKIP_THESE_VARS, "AREA"] + # ================================================================== # Open output file and write header # ================================================================== with open(outfilename, "w", encoding=ENCODING) as ofile: - + # Title strings title1 = "### Benchmark diagnostic sanity check table" title2 = f"### Dev = {devstr}" @@ -5953,7 +5911,7 @@ def create_benchmark_sanity_check_table( for col in collections: # Read data into an xr.DataSet object - file_name = util.get_filepath( + file_name = get_filepath( devpath, col, devdate, @@ -5979,7 +5937,7 @@ def create_benchmark_sanity_check_table( print(f"{os.path.basename(file_name)}", file=ofile) print("="*80, file=ofile) print("", file=ofile) - + if len(all_zeros_or_nans) == 0: print("No variables were all zero or all NaN", file=ofile) else: diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 2aa3261b..82441629 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -10,7 +10,7 @@ from gcpy.units import convert_units from gcpy.util import \ dataset_reader, get_area_from_dataset, make_directory, \ - read_config_file, replace_whitespace, verify_variable_type + read_species_metadata, replace_whitespace, verify_variable_type from gcpy.benchmark.modules.benchmark_utils import \ get_datetimes_from_filenames @@ -69,29 +69,24 @@ def get_delta_pressure( def get_passive_tracer_metadata( - spcdb_dir + spcdb_files ): """ Returns a dictionary with metadata for the passive tracer. Args - spcdb_dir : str : Directory containing species_database.yml + spcdb_files : list : Paths to Ref & Dev species_database.yml files Returns - properties : dict : Dictionary with species metadata + properties : dict : Dictionary with species metadata """ - verify_variable_type(spcdb_dir, str) + verify_variable_type(spcdb_files, list) - spc_name = SPC_NAME - properties = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for the union of species. + _, properties = read_species_metadata(spcdb_files, quiet=True) - return properties.get(spc_name) + return properties.get(SPC_NAME) def get_passive_tracer_varname( @@ -279,11 +274,11 @@ def make_benchmark_mass_conservation_table( ref_label, dev_files, dev_label, + spcdb_files, dst="./benchmark", overwrite=False, ref_areapath=None, dev_areapath=None, - spcdb_dir=None ): """ Creates a text file containing global mass of passive species @@ -294,21 +289,21 @@ def make_benchmark_mass_conservation_table( ref_label : str : Ref version label dev_files : list|str : List of files from the Dev model dev_label : str : Dev version label + spcdb_files : list : Paths to Ref & Dev species_database.yml files dst : str : Destination folder for file output overwrite : bool : Overwrite pre-existing files? ref_areapath : list|str : Path to file w/ Ref area data (optional) dev_areapath : list|str : Path to file w/ Dev area data (optional) - spcdb_dir : str : Path to species database file """ verify_variable_type(ref_files, (list, str)) verify_variable_type(ref_label, str) verify_variable_type(dev_files, (list, str)) verify_variable_type(dev_label, str) + verify_variable_type(spcdb_files, list) verify_variable_type(dst, (str, type(None))) verify_variable_type(overwrite, bool) verify_variable_type(ref_areapath, (str, type(None))) verify_variable_type(ref_areapath, (str, type(None))) - verify_variable_type(spcdb_dir, str) # ================================================================== # Initialize @@ -322,7 +317,7 @@ def make_benchmark_mass_conservation_table( dev_label = replace_whitespace(dev_label) # Get a list of properties for the given species - metadata = get_passive_tracer_metadata(spcdb_dir) + metadata = get_passive_tracer_metadata(spcdb_files) # Replace whitespace with underscores in version labels ref_label = replace_whitespace(ref_label) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py index c4a6495e..83f4c698 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -4,7 +4,9 @@ more text files. """ import os +import subprocess import numpy as np +from gcpy.constants import ENCODING from gcpy.util import make_directory, replace_whitespace, verify_variable_type @@ -70,6 +72,38 @@ def count_characters(text, char_to_match="."): return count[char_to_match] +def check_file_for_timing_info(text_file): + """ + Checks if a given text file contains GCHP timers output. If not, + we will reset the file name to allPEs.log (in the same path). + + This update is necessary because MAPL v2.59 has switched the GCHP + timers printout from the GCHP log file to allPEs.log. This will + allow backwards compatibility with output from GCHP simulations + that use older MAPL versions. + + Args + text_file : str : Name of the text file to check + + Returns + text_file : str : Name of the text file containing GCHP timers + output (either the input file or "allPEs.log") + """ + result = subprocess.run( + ['grep', 'Times for component ', text_file], + capture_output=True, + text=True, + check=False, + ) + if len(result.stdout) == 0: + text_file = os.path.join( + os.path.dirname(text_file), + "allPEs.log" + ) + + return text_file + + def read_one_text_file(text_file): """ Parses the GCHP log file (plain text) with timing information @@ -96,18 +130,27 @@ def read_one_text_file(text_file): inclusive = 0 temp_timers = [] + # Check if the input file has GCHP timers, otherwise use "allPEs.log" + text_file = check_file_for_timing_info(text_file) + # Open the log file - with open(text_file, encoding="utf-8") as ifile: + with open(text_file, encoding=ENCODING) as ifile: # Read each line in the file for line in ifile: - # Strip newlines; skip empty lines + # Strip out lines that are only present in "allPEs.log" + line = line.replace("0000: GCHPctmEnv: INFO:", "") + line = line.replace("0000: MAPL.profiler: INFO:", "") + + # Strip newlines line = line.strip() + + # Skip empty lines if len(line) == 0: continue - # GCHP timers section (also skip header lines) + # GCHPchem timers section (also skip header lines) if 'Times for component ' in line: keep_line = True inclusive = 3 @@ -124,8 +167,13 @@ def read_one_text_file(text_file): keep_line = False continue + # Skip everything that follows GCHPchem until the + # summary timers section + if "Times for component " in line: + keep_line = False + # Summary section (also skip header lines) - if 'Report on process: 0' in line: + if 'Report on process:' in line: keep_line = True inclusive = 2 continue @@ -139,10 +187,10 @@ def read_one_text_file(text_file): in line: continue - # NOTE: This line only appears in cloud benchmarks, - # which signals the end of GCHP output and the start of - # job statistics. Exit when we encounter this. - if keep_line and "Command being timed:" in line: + # This line appears in GCHP log files using MAPL + # versions prior to 2.59. It indicates the end of the + # GCHP timers section. Exit when we encounter this. + if keep_line and "++" in line: break # Append timing info lines into a list of dicts @@ -273,7 +321,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): dev_label : str : Version string for the "Dev" model table_file : str : File name for the timing table output """ - with open(table_file, "w", encoding="utf-8") as ofile: + with open(table_file, "w", encoding=ENCODING) as ofile: # Print header print("%"*79, file=ofile) diff --git a/gcpy/benchmark/modules/benchmark_species_changes.py b/gcpy/benchmark/modules/benchmark_species_changes.py index 969d5676..a10ddeda 100644 --- a/gcpy/benchmark/modules/benchmark_species_changes.py +++ b/gcpy/benchmark/modules/benchmark_species_changes.py @@ -5,19 +5,20 @@ Example: - python -m gcpy.benchmark.modules.benchmark_species_changes \ - --ref-label "14.4.0" \ - --ref-log "gcc_14.4.0/14.4.0.log \ - --dev-label "14.5.0" \ - --dev-log "gcc_14.5.0/14.5.0.log" \ - --spcdb-dir "gcc_14.5.0/14.5.0.log" \ + python -m gcpy.benchmark.modules.benchmark_species_changes \ + --ref-label "14.4.0" \ + --ref-log "gcc_14.4.0/14.4.0.log \ + --dev-label "14.5.0" \ + --dev-log "gcc_14.5.0/14.5.0.log" \ + --spcdb-files ["gcc_14.4.0/species_database.yml", \ + "gcc_14.5.0/species_database.yml"], \ --output-file "wiki_tables.txt" """ -from os.path import exists, join +from os.path import exists import argparse import pandas as pd -from gcpy.util import read_config_file, verify_variable_type +from gcpy.util import read_species_metadata, verify_variable_type def read_one_log_file(log_file): @@ -247,7 +248,7 @@ def make_benchmark_species_changes_wiki_tables( ref_log, dev_label, dev_log, - spcdb_dir, + spcdb_files, output_file, ): """ @@ -255,26 +256,23 @@ def make_benchmark_species_changes_wiki_tables( between Ref and Dev versions. Args - ref_label : str : Label for the Ref version - ref_log : str : Path to log file for the Ref version - dev_label : str : Label for the Dev version - dev_log : str : Path to log file for the Dev version - spcdb_dir : str : Directory containing species_database.yml - output_file : str : Path to file with generated wiki tables + ref_label : str : Label for the Ref version + ref_log : str : Path to log file for the Ref version + dev_label : str : Label for the Dev version + dev_log : str : Path to log file for the Dev version + spcdb_files : list : Paths to Ref & Dev species_database.yml files + output_file : str : Path to file with generated wiki tables """ verify_variable_type(ref_label, str) verify_variable_type(ref_log, str) verify_variable_type(dev_label, str) verify_variable_type(dev_log, str) - verify_variable_type(spcdb_dir, str) + verify_variable_type(spcdb_files, list) - # Read the species database - input_file = join(spcdb_dir, "species_database.yml") - try: - species_database = read_config_file(input_file) - except FileNotFoundError as exc: - msg = f"Could not find {input_file}!" - raise FileNotFoundError(msg) from exc + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for the union of species. + # We'll need properties such as mol. wt. for unit conversions, etc. + species_database = read_species_metadata(spcdb_files, quiet=True) # Get species metadata for the Ref and Dev versions ref = get_species_metadata(ref_log, species_database) @@ -349,12 +347,12 @@ def main(): help="Log file from the Dev version" ) parser.add_argument( - "--spcdb-dir", - metavar="SPCDB_DIR", - type=str, + "--spcdb-files", + metavar="SPCDB_FILES", + type=list, required=True, default="./", - help="Directory where the species_database.yml file resides" + help="Paths to the Ref & Dev species_database.yml files" ) parser.add_argument( "-o", "--output-file", @@ -372,7 +370,7 @@ def main(): args.ref_log, args.dev_label, args.dev_log, - args.spcdb_dir, + args.spcdb_files, args.output_file ) diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index d0f3a9ad..864e11b3 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -1,6 +1,6 @@ +#!/usr/bin/env python3 """ Utility functions specific to the benchmark plotting/tabling scripts. -TODO: Migrate other benchmark-specific utilities from gcpy/benchmark.py to here. """ import os import numpy as np @@ -8,6 +8,7 @@ import pandas as pd from gcpy import util from gcpy.constants import SKIP_THESE_VARS +from gcpy.util import verify_variable_type # Suppress numpy divide by zero warnings to prevent output spam np.seterr(divide="ignore", invalid="ignore") @@ -18,7 +19,7 @@ EMISSION_SPC = "emission_species.yml" EMISSION_INV = "emission_inventories.yml" LUMPED_SPC = "lumped_species.yml" - +SPECIES_DATABASE = "species_database.yml" def make_output_dir( dst, @@ -688,3 +689,64 @@ def get_datetimes_from_filenames( datetimes[idx] = np.datetime64(dt_str) return datetimes + + +def get_species_database_files(config, ref_model, dev_model): + """ + Returns the paths to the species_database.yml files in the + Ref and Dev benchmark run directories. + + Args: + config : dict : Benchmark configuration information + ref_model : str : Either "gcc" or "gchp" + dev_model : str : Either "gcc" or "gchp" + + Returns: + spcdb_files : list : Paths to the species database files + corresponding to Ref & Dev simulations + """ + verify_variable_type(config, dict) + verify_variable_type(ref_model, str) + verify_variable_type(dev_model, str) + + # "gcc" and "gchp" are the only allowable values for ref_model + if ref_model not in ("gcc", "gchp"): + msg = "Argument 'ref_model' must be either 'gcc' or 'gchp'!" + raise ValueError(msg) + + # "gcc" and "gchp" are the only allowable values for dev_model + if dev_model not in ("gcc", "gchp"): + msg = "Argument 'dev_model' must be either 'gcc' or 'gchp'!" + raise ValueError(msg) + + # For GCHP vs GCC comparisons, "Ref" is actually the "Dev" of GCC + if ref_model == "gcc" and dev_model == "gchp": + ref_or_dev = "dev" + else: + ref_or_dev = "ref" + + # Species database in the "Ref" simulation folder + ref_spcdb_file = os.path.join( + config["paths"]["main_dir"], + config["data"][ref_or_dev][ref_model]["dir"], + SPECIES_DATABASE, + ) + if not os.path.exists(ref_spcdb_file): + msg = f"Could not find {ref_spcdb_file}!" + raise FileNotFoundError(msg) + msg = f"Ref species database: {ref_spcdb_file}" + print(msg) + + # Species database in the "Dev" simulation folder + dev_spcdb_file = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"][dev_model]["dir"], + SPECIES_DATABASE, + ) + if not os.path.exists(dev_spcdb_file): + msg = f"Could not find {dev_spcdb_file}!" + raise FileNotFoundError(msg) + msg = f"Dev species database: {dev_spcdb_file}" + print(msg) + + return [ref_spcdb_file, dev_spcdb_file] diff --git a/gcpy/benchmark/modules/budget_ox.py b/gcpy/benchmark/modules/budget_ox.py index e756eb46..25932b7d 100644 --- a/gcpy/benchmark/modules/budget_ox.py +++ b/gcpy/benchmark/modules/budget_ox.py @@ -13,11 +13,12 @@ import gc import numpy as np import xarray as xr -from gcpy import constants +from gcpy.constants import \ + AVOGADRO, ENCODING, G, MW_AIR_g, SKIP_THESE_VARS from gcpy.grid import get_troposphere_mask -from gcpy.util import get_filepath, read_config_file, \ - rename_and_flip_gchp_rst_vars, reshape_MAPL_CS, \ - replace_whitespace +from gcpy.util import \ + get_filepath, read_config_file, read_species_metadata, \ + rename_and_flip_gchp_rst_vars, reshape_MAPL_CS, replace_whitespace from gcpy.benchmark.modules.benchmark_utils import \ add_lumped_species_to_dataset, get_lumped_species_definitions @@ -42,7 +43,7 @@ def __init__( year, dst, overwrite, - spcdb_dir, + spcdb_files, is_gchp, gchp_res, gchp_is_pre_14_0 @@ -63,8 +64,8 @@ def __init__( Year of the benchmark simulation. overwrite: bool Denotes whether to ovewrite existing budget tables. - spcdb_dir: str - Directory where species_database.yml is stored. + spcdb_files : list + Paths to species_database.yml files in Ref & Dev rundirs is_gchp: bool Denotes if this is GCHP (True) or GCC (False) data. gchp_res: str @@ -81,9 +82,7 @@ def __init__( self.devrstdir = devrstdir self.dst = dst self.overwrite = overwrite - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - self.spcdb_dir = spcdb_dir + self.spcdb_files = spcdb_files self.is_gchp = is_gchp self.gchp_res = gchp_res self.gchp_is_pre_14_0 = gchp_is_pre_14_0 @@ -93,8 +92,8 @@ def __init__( # --------------------------------------------------------------- self.y0 = int(year) self.y1 = self.y0 + 1 - self.y0_str = "{}".format(self.y0) - self.y1_str = "{}".format(self.y1) + self.y0_str = f"{self.y0}" + self.y1_str = f"{self.y1}" # -------------------------------------------------------------- # Read data into datasets @@ -128,7 +127,7 @@ def __init__( self.get_area_and_volume() self.tropmask = get_troposphere_mask(self.ds_met) self.get_time_info() - self.get_conv_factors(spcdb_dir) + self.get_conv_factors(spcdb_files) # ================================================================== @@ -210,19 +209,19 @@ def read_rst(self, ystr): """ ds = xr.open_dataset( self.rst_file_path(ystr), - drop_variables=constants.SKIP_THESE_VARS + drop_variables=SKIP_THESE_VARS ) if self.is_gchp: - RstPrefix="SPC_" + rst_prefix="SPC_" else: - RstPrefix="SpeciesRst_" + rst_prefix="SpeciesRst_" ds = add_lumped_species_to_dataset( ds, lspc_dict=self.lspc_dict, verbose=False, - prefix=RstPrefix + prefix=rst_prefix ) return ds @@ -241,7 +240,7 @@ def read_diag(self, collection, prefix=None): """ ds = xr.open_mfdataset( self.pathlist[collection], - drop_variables=constants.SKIP_THESE_VARS, + drop_variables=SKIP_THESE_VARS, combine="nested", concat_dim="time" ) @@ -285,13 +284,12 @@ def get_time_info(self): Returns time information for the budget computations. """ # Months - self.N_MONTHS = 12 - self.N_MONTHS_FLOAT = self.N_MONTHS * 1.0 + self.n_months = 12 # Days per month in the benchmark year - self.d_per_m = np.zeros(self.N_MONTHS) - self.s_per_m = np.zeros(self.N_MONTHS) - for t in range(self.N_MONTHS): + self.d_per_m = np.zeros(self.n_months) + self.s_per_m = np.zeros(self.n_months) + for t in range(self.n_months): self.d_per_m[t] = monthrange(self.y0, t + 1)[1] * 1.0 self.s_per_m[t] = self.d_per_m[t] * 86400.0 @@ -300,28 +298,27 @@ def get_time_info(self): self.s_per_a = np.sum(self.s_per_m) # Fraction of year occupied by each month - self.frac_of_a = np.zeros(self.N_MONTHS) - for t in range(self.N_MONTHS): + self.frac_of_a = np.zeros(self.n_months) + for t in range(self.n_months): self.frac_of_a[t] = self.d_per_m[t] / self.d_per_a - def get_conv_factors(self, spcdb_dir): + def get_conv_factors(self, spcdb_files): """ Gets conversion factors used in budget computations Arguments: - spcdb_dir : str - Path to the species_database.yml file + spcdb_files : str + Paths to the species_database.yml file in Ref & Dev """ - # Read the species database - path = os.path.join(spcdb_dir, "species_database.yml") - spcdb = read_config_file(path, quiet=True) + # Read the species database file from the Dev model + _, spcdb = read_species_metadata(spcdb_files, quiet=True) # Molecular weights [kg mol-1], as taken from the species database self.mw = {} self.mw["O3"] = spcdb["O3"]["MW_g"] * 1.0e-3 self.mw["Ox"] = self.mw["O3"] - self.mw["Air"] = constants.MW_AIR_g * 1.0e-3 + self.mw["Air"] = MW_AIR_g * 1.0e-3 # kg/s --> Tg/d self.kg_s_to_tg_a_value = 86400.0 * self.d_per_a * 1e-9 @@ -360,7 +357,7 @@ def init_and_final_mass( deltap_end = globvars.ds_end["Met_DELPDRY"].isel(time=0).values else: deltap_end = globvars.ds_end["Met_DELPDRY"].values - g100 = 100.0 / constants.G + g100 = 100.0 / G airmass_ini = (deltap_ini * globvars.area_m2.values) * g100 airmass_end = (deltap_end * globvars.area_m2.values) * g100 @@ -393,7 +390,7 @@ def init_and_final_mass( ) tropmass_end = np.ma.masked_array( mass_end, - globvars.tropmask[globvars.N_MONTHS-1, :, :, :] + globvars.tropmask[globvars.n_months-1, :, :, :] ) # Create a dict to return values @@ -419,7 +416,7 @@ def annual_average_prodloss( """ # Conversion factors - mw_avo = globvars.mw["Ox"] / constants.AVOGADRO + mw_avo = globvars.mw["Ox"] / AVOGADRO kg_to_tg = 1.0e-9 # Tropospheric P(Ox) and L(Ox) [molec/s] @@ -435,7 +432,7 @@ def annual_average_prodloss( # Compute monthly-weighted averages [Tg Ox] prod_tot = 0.0 loss_tot = 0.0 - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): prod_tot += np.sum(prod_trop[t, :, :, :]) * globvars.frac_of_a[t] loss_tot += np.sum(loss_trop[t, :, :, :]) * globvars.frac_of_a[t] prod_tot *= globvars.s_per_a * kg_to_tg * mw_avo @@ -464,7 +461,7 @@ def annual_average_drydep( """ # Conversion factors and area - mw_avo = (globvars.mw["Ox"] / constants.AVOGADRO) + mw_avo = globvars.mw["Ox"] / AVOGADRO kg_to_tg = 1.0e-9 area_cm2 = globvars.area_cm2.values @@ -473,7 +470,7 @@ def annual_average_drydep( # Convert to Tg Ox dry_tot = 0.0 - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): dry_tot += np.nansum(dry[t, :, :] * area_cm2) * globvars.frac_of_a[t] result = dry_tot * globvars.s_per_a * kg_to_tg * mw_avo @@ -508,7 +505,7 @@ def annual_average_wetdep(globvars): # Monthly-weighted conv & LS wet losses [kg HNO3/s] wetcv_tot = 0.0 wetls_tot = 0.0 - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): wetcv_tot += np.nansum(wetcv_trop[t, :, :, :]) * globvars.frac_of_a[t] wetls_tot += np.nansum(wetls_trop[t, :, :, :]) * globvars.frac_of_a[t] @@ -590,19 +587,16 @@ def print_budget( # Create the plot directory hierarchy if it doesn't already exist if os.path.isdir(globvars.dst) and not globvars.overwrite: err_str = "Pass overwrite=True to overwrite files in that directory" - print("Directory {} exists. {}".format(globvars.dst, err_str)) + print(f"Directory {globvars} exists. {err_str}") return if not os.path.isdir(globvars.dst): os.makedirs(globvars.dst) # Filename - filename = "{}/Ox_budget_troposphere_{}.txt".format( - globvars.dst, - globvars.y0_str - ) + filename = f"{globvars.dst}/Ox_budget_troposphere_{globvars.y0_str}.txt" # Open file and print budgets - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as f: print("="*50, file=f) print("Annual Average Global Ox Budget", file=f) print(f"for GEOS-Chem {globvars.devstr} 1-year benchmark\n", file=f) @@ -613,46 +607,46 @@ def print_budget( print(" MASS ACCUMULATION Tg Ox a-1 Tg O3 a-1", file=f) print(" ----------------- ---------- ----------\n", file=f) v = [mass["Ox_ini"], mass["O3_ini"]] - print(" Initial mass {:11.4f} {:11.4f}".format(*v), file=f) + print(f" Initial mass {v[0]:11.4f} {v[1]:11.4f}", file=f) v = [mass["Ox_end"], mass["O3_end"]] - print(" Final mass {:11.4f} {:11.4f}".format(*v), file=f) + print(f" Final mass {v[0]:11.4f} {v[1]:11.4f}", file=f) print(" ---------- ----------", file=f) v = [mass["Ox_acc"], mass["O3_acc"]] - print(" Difference {:11.7f} {:11.7f}".format(*v), file=f) + print(f" Difference {v[0]:11.7f} {v[1]:11.7f}", file=f) print("\n", file=f) print(" Ox SOURCES Tg Ox a-1", file=f) print(" ----------------- ----------\n", file=f) print(" * Chemistry", file=f) v = prodloss["POx"] - print(" Total POx {:11.4f}".format(v), file=f) + print(f" Total POx {v:11.4f}", file=f) v = prodloss["LOx"] - print(" Total LOx {:11.4f}".format(v), file=f) + print(f" Total LOx {v:11.4f}", file=f) print(" ----------", file=f) v = prodloss["POx-LOx"] - print(" Net POx - LOx {:11.6f}\n".format(v), file=f) + print(f" Net POx - LOx {v:11.6f}\n", file=f) v = metrics["dyn"] print(" * Dynamics", file=f) - print(" Strat Flux(Ox) {:11.4f}\n".format(v), file=f) + print(f" Strat Flux(Ox) {v:11.4f}\n", file=f) print("\n Ox SINKS Tg Ox a-1", file=f) print(" ----------------- ----------\n", file=f) v = drydep - print(" * Dry deposition {:11.4f}\n".format(v), file=f) + print(f" * Dry deposition {v:11.4f}\n", file=f) print(" * Wet deposition", file=f) v = wetdep["CV"] - print(" Convective {:11.4f}".format(v), file=f) + print(f" Convective {v:11.4f}", file=f) v = wetdep["LS"] - print(" Large-Scale {:11.4f}".format(v), file=f) + print(f" Large-Scale {v:11.4f}", file=f) print(" ----------", file=f) v = wetdep["Total"] - print(" Total Wetdep {:11.6f}\n".format(v), file=f) + print(f" Total Wetdep {v:11.6f}\n", file=f) print("\n Ox METRICS", file=f) print(" ----------------- ----------\n", file=f) print(" * Delta Ox", file=f) v = metrics["net"] - print(" Chem+Dyn-Dry-Wet {:11.6f} Tg Ox a-1".format(v), file=f) + print(f" Chem+Dyn-Dry-Wet {v:11.6f} Tg Ox a-1", file=f) print("\n * Ox Lifetime", file=f) v = metrics["life"] - print(" Mass/(LOx+Dyn+Wet) {:11.6f} days".format(v), file=f) + print(f" Mass/(LOx+Dyn+Wet) {v:11.6f} days", file=f) print(file=f) f.close() @@ -663,44 +657,30 @@ def global_ox_budget( devdir, devrstdir, year, + spcdb_file, dst='./1yr_benchmark', overwrite=True, - spcdb_dir=None, is_gchp=False, gchp_res="c24", gchp_is_pre_14_0=False ): """ - Main program to compute Ox budgets - - Arguments: - maindir: str - Top-level benchmark folder - devstr: str - Denotes the "Dev" benchmark version. - year: int - The year of the benchmark simulation (e.g. 2016). - - Keyword Args (optional): - dst: str - Directory where budget tables will be created. - Default value: './1yr_benchmark' - overwrite: bool - Denotes whether to ovewrite existing budget tables. - Default value: True - spcdb_dir: str - Directory where species_database.yml is stored. - Default value: GCPy directory - is_gchp: bool - Denotes if data is from GCHP (True) or GCC (false). - Default value: False - gchp_res: str - GCHP resolution string (e.g. "c24", "c48", etc.) - Default value: None - gchp_is_pre_14_0: bool - Denotes if the version is prior to GCHP 14.0.0 (True) - or not (False). - Default value: False + Main program to compute Ox budgets from GEOS-Chem Classic or + GCHP benchmark simulations. + + Args + devstr : str : Label for the Dev version + devdir : str : Path to the Dev data directory + devrstdir : str : Path to the Dev restart file directory + year : int : Year of the benchmark simulation + spcdb_file : str : Path to the Dev species_database.yml file + + Keyword Args + dst : str : Directory where tables will be written + overwrite : bool : Should existing tables should be overwritten? + is_gchp : bool : Is Dev from a GCHP benchmark simulation? + gchp_res : str : GCHP resolution string (e.g. "c24") + gchp_is_pre_14_0 : bool : Is Dev from a GCHP version prior to 14.0.0? """ # Store global variables in a private class @@ -711,7 +691,7 @@ def global_ox_budget( year, dst, overwrite, - spcdb_dir, + spcdb_file, is_gchp, gchp_res, gchp_is_pre_14_0, diff --git a/gcpy/benchmark/modules/budget_tt.py b/gcpy/benchmark/modules/budget_tt.py index 4ee7068d..d7cca621 100644 --- a/gcpy/benchmark/modules/budget_tt.py +++ b/gcpy/benchmark/modules/budget_tt.py @@ -15,9 +15,12 @@ import gc import numpy as np import xarray as xr -from gcpy import constants +from gcpy.constants import \ + AVOGADRO, ENCODING, G, MW_AIR_g, SKIP_THESE_VARS from gcpy.grid import get_troposphere_mask -from gcpy import util +from gcpy.util import \ + get_filepath, dict_diff, read_species_metadata, \ + rename_and_flip_gchp_rst_vars, replace_whitespace, reshape_MAPL_CS from gcpy.benchmark.modules.benchmark_utils import \ rename_speciesconc_to_speciesconcvv @@ -36,7 +39,7 @@ class _GlobVars: """ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, - gchp_res, gchp_is_pre_14_0, overwrite, spcdb_dir): + gchp_res, gchp_is_pre_14_0, overwrite, spcdb_file): """ Initializes the _GlobVars class. @@ -60,13 +63,13 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, to GCHP 14.0.0. Needed for restart files only. overwrite: bool Denotes whether to ovewrite existing budget tables. - spcdb_dir: str - Directory where species_database.yml is stored. + spcdb_file : str + Paths to species_database.yml files in the Dev rundir """ # ------------------------------ # Arguments from outside # ------------------------------ - self.devstr = util.replace_whitespace(devstr) + self.devstr = replace_whitespace(devstr) self.devdir = devdir self.devrstdir = devrstdir self.dst = dst @@ -80,15 +83,15 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, # ------------------------------ self.y0 = year self.y1 = self.y0 + 1 - self.y0_str = "{}".format(self.y0) - self.y1_str = "{}".format(self.y1) + self.y0_str = f"{self.y0}" + self.y1_str = f"{self.y1}" # ------------------------------ # Collection file lists # ------------------------------ # Initial restart file - RstInit = util.get_filepath( + rst_init = get_filepath( self.devrstdir, "Restart", np.datetime64(f"{self.y0_str}-01-01T00:00:00"), @@ -98,7 +101,7 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, ) # Final restart file - RstFinal = util.get_filepath( + rst_final = get_filepath( self.devrstdir, "Restart", np.datetime64(f"{self.y1_str}-01-01T00:00:00"), @@ -108,57 +111,58 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, ) # Diagnostics - HemcoDiag = os.path.join( + hemco_diag = os.path.join( self.devdir, f"HEMCO_diagnostics.{self.y0_str}*.nc" ) - DryDep = os.path.join( + dry_dep = os.path.join( self.devdir, f"*.DryDep.{self.y0_str}*.nc4" ) - RadioNucl = os.path.join( + radio_nucl = os.path.join( self.devdir, f"*.RadioNuclide.{self.y0_str}*.nc4" ) + gchp_use_statemet_avg = False if is_gchp: - StateMetAvg = os.path.join( + state_met_avg = os.path.join( self.devdir, f"*.StateMet_avg.{self.y0_str}*.nc4" ) - StateMet = os.path.join( + state_met = os.path.join( self.devdir, f"*.StateMet.{self.y0_str}*.nc4" ) # Set a logical if we need to read StateMet_avg or StateMet - gchp_use_statemet_avg = os.path.exists(StateMetAvg) + gchp_use_statemet_avg = os.path.exists(state_met_avg) else: - StateMet = os.path.join( + state_met = os.path.join( self.devdir, f"*.StateMet.{self.y0_str}*.nc4" ) - SpeciesConc = os.path.join( + species_conc = os.path.join( self.devdir, f"*.SpeciesConc.{self.y0_str}*.nc4" ) - WetLossConv = os.path.join( + wet_loss_conv = os.path.join( self.devdir, f"*.WetLossConv.{self.y0_str}*.nc4" ) - WetLossLS = os.path.join( + wet_loss_ls = os.path.join( self.devdir, f"*.WetLossLS.{self.y0_str}*.nc4" ) - GCHPEmiss = os.path.join( + gchp_emiss = os.path.join( self.devdir, f"*.Emissions.{self.y0_str}*.nc4" ) @@ -171,14 +175,14 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, extra_kwargs = {} self.ds_ini = xr.open_dataset( - RstInit, - drop_variables=constants.SKIP_THESE_VARS, + rst_init, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) self.ds_end = xr.open_dataset( - RstFinal, - drop_variables=constants.SKIP_THESE_VARS, + rst_final, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) @@ -186,25 +190,25 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, # vertical axis. Also test if the restart files have the BXHEIGHT # variable contained within them. if is_gchp: - self.ds_ini = util.rename_and_flip_gchp_rst_vars(self.ds_ini) - self.ds_end = util.rename_and_flip_gchp_rst_vars(self.ds_end) + self.ds_ini = rename_and_flip_gchp_rst_vars(self.ds_ini) + self.ds_end = rename_and_flip_gchp_rst_vars(self.ds_end) # Diagnostics self.ds_dcy = xr.open_mfdataset( - RadioNucl, - drop_variables=constants.SKIP_THESE_VARS, + radio_nucl, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) self.ds_dry = xr.open_mfdataset( - DryDep, - drop_variables=constants.SKIP_THESE_VARS, + dry_dep, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) self.ds_cnc = xr.open_mfdataset( - SpeciesConc, - drop_variables=constants.SKIP_THESE_VARS, + species_conc, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) self.ds_cnc = rename_speciesconc_to_speciesconcvv( @@ -212,14 +216,14 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, ) self.ds_wcv = xr.open_mfdataset( - WetLossConv, - drop_variables=constants.SKIP_THESE_VARS, + wet_loss_conv, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) self.ds_wls = xr.open_mfdataset( - WetLossLS, - drop_variables=constants.SKIP_THESE_VARS, + wet_loss_ls, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) @@ -227,28 +231,28 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, # For GCHP: Read from StateMet_avg if present (otherwise StateMet) if is_gchp and gchp_use_statemet_avg: self.ds_met = xr.open_mfdataset( - StateMetAvg, - drop_variables=constants.SKIP_THESE_VARS, + state_met_avg, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) else: self.ds_met = xr.open_mfdataset( - StateMet, - drop_variables=constants.SKIP_THESE_VARS, + state_met, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) # Emissions if self.is_gchp: self.ds_hco = xr.open_mfdataset( - GCHPEmiss, - drop_variables=constants.SKIP_THESE_VARS, + gchp_emiss, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) else: self.ds_hco = xr.open_mfdataset( - HemcoDiag, - drop_variables=constants.SKIP_THESE_VARS, + hemco_diag, + drop_variables=SKIP_THESE_VARS, **extra_kwargs ) @@ -261,7 +265,7 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, msg = 'Could not find Met_AREAM2 in StateMet_avg collection!' raise ValueError(msg) area_m2 = self.ds_met["Met_AREAM2"].isel(time=0) - area_m2 = util.reshape_MAPL_CS(area_m2) + area_m2 = reshape_MAPL_CS(area_m2) self.area_m2 = area_m2 self.area_cm2 = self.ds_met["Met_AREAM2"] * 1.0e4 else: @@ -272,20 +276,20 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, # ------------------------------ # Months and days # ------------------------------ - self.N_MONTHS = 12 - self.N_MONTHS_FLOAT = self.N_MONTHS * 1.0 + self.n_months = 12 + self.n_months_float = self.n_months * 1.0 # Days per month in the benchmark year - self.d_per_mon = np.zeros(self.N_MONTHS) - for t in range(self.N_MONTHS): + self.d_per_mon = np.zeros(self.n_months) + for t in range(self.n_months): self.d_per_mon[t] = monthrange(self.y0, t + 1)[1] * 1.0 # Days in the benchmark year self.d_per_yr = np.sum(self.d_per_mon) # Fraction of year occupied by each month - self.frac_of_yr = np.zeros(self.N_MONTHS) - for t in range(self.N_MONTHS): + self.frac_of_yr = np.zeros(self.n_months) + for t in range(self.n_months): self.frac_of_yr[t] = self.d_per_mon[t] / self.d_per_yr # ------------------------------ @@ -295,17 +299,14 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, # List of species (and subsets for the trop & strat) self.species_list = ["Pb210", "Be7", "Be10"] - # Read the species database - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - path = os.path.join(spcdb_dir, "species_database.yml") - spcdb = util.read_config_file(path, quiet=True) + # Read the species database files in the Dev rundirs + _, spcdb = read_species_metadata(spcdb_file, quiet=True) # Molecular weights [g mol-1], as taken from the species database self.mw = {} for v in self.species_list: self.mw[v] = spcdb[v]["MW_g"] - self.mw["Air"] = constants.MW_AIR_g + self.mw["Air"] = MW_AIR_g # kg/s --> g/day self.kg_s_to_g_d_value = 86400.0 * 1000.0 @@ -323,7 +324,7 @@ def __init__(self, devstr, devdir, devrstdir, year, dst, is_gchp, self.kg_s_to_g_d[spc] = self.kg_s_to_g_d_value # kg/mole for each species - self.kg_per_mol[spc] = constants.AVOGADRO / (self.mw[spc] * 1e-3) + self.kg_per_mol[spc] = AVOGADRO / (self.mw[spc] * 1e-3) # v/v dry --> g self.vv_to_g[spc] = self.ds_met["Met_AD"].values \ @@ -393,7 +394,7 @@ def mass_from_rst(globvars, ds, tropmask): # Conversion factors based on restart-file met fields # NOTE: Convert DataArray to numpy ndarray so that the # broadcasting will be done properly!!! - g100 = 100.0 / constants.G + g100 = 100.0 / G if 'time' in ds["Met_DELPDRY"].dims: deltap = ds["Met_DELPDRY"].isel(time=0).values else: @@ -407,10 +408,7 @@ def mass_from_rst(globvars, ds, tropmask): # Determine if restart file variables have a time dimension # (if one does, they all do) varname = prefix + globvars.species_list[0] - if 'time' in ds[varname].dims: - has_time_dim = True - else: - has_time_dim = False + has_time_dim = 'time' in ds[varname].dims # Loop over species for spc in globvars.species_list: @@ -460,9 +458,9 @@ def annual_average(globvars, ds, collection, conv_factor): # Initialize q = {} - q_sum_f = np.zeros(globvars.N_MONTHS) - q_sum_t = np.zeros(globvars.N_MONTHS) - q_sum_s = np.zeros(globvars.N_MONTHS) + q_sum_f = np.zeros(globvars.n_months) + q_sum_t = np.zeros(globvars.n_months) + q_sum_s = np.zeros(globvars.n_months) result = {} for spc in globvars.species_list: @@ -534,9 +532,9 @@ def annual_average_sources(globvars): # Initialize q = {} - q_sum_f = np.zeros(globvars.N_MONTHS) - q_sum_t = np.zeros(globvars.N_MONTHS) - q_sum_s = np.zeros(globvars.N_MONTHS) + q_sum_f = np.zeros(globvars.n_months) + q_sum_t = np.zeros(globvars.n_months) + q_sum_s = np.zeros(globvars.n_months) result = {} # Pb210 source is from Rn222 decay, in the RadioNuclide collection @@ -560,16 +558,16 @@ def annual_average_sources(globvars): q["Be10_f"] = np.zeros(q_shape) # Error traps - if q_shape[0] != globvars.N_MONTHS: - msg = f"Expected {globvars.N_MONTHS} but only found {q_shape[0]}" - raise ValueException(msg) + if q_shape[0] != globvars.n_months: + msg = f"Expected {globvars.n_months} but only found {q_shape[0]}" + raise ValueError(msg) if q_shape[1] != n_levs: msg = f"Expected {n_levs} but only found {q_shape[1]}!" - raise ValueException(msg) + raise ValueError(msg) # Convert Be7 and Be10 sources from kg/m2/s to g/day # If GCHP data, must vertically flip the emissions diagnostic - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): for k in range(n_levs): if globvars.is_gchp: kf = n_levs - k - 1 @@ -669,14 +667,14 @@ def trop_residence_time(globvars): q_wls_sum = np.sum(q_wls, axis=sum_axes) # Compute lifetime - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): num = q_cnc_sum[t] denom = q_dry_sum[t] + q_wcv_sum[t] + q_wls_sum[t] result[spc + "_t"] += 1.0 / (num / denom) # Convert residence time [d] result[spc + "_t"] = 1.0 \ - / (result[spc + "_t"] / globvars.N_MONTHS_FLOAT) + / (result[spc + "_t"] / globvars.n_months_float) return result @@ -699,10 +697,11 @@ def print_budgets(globvars, data, key): err_str = "Pass overwrite=True to overwrite files in that directory" print(f"Directory {globvars.dst} exists. {err_str}") return - elif not os.path.isdir(globvars.dst): + if not os.path.isdir(globvars.dst): os.makedirs(globvars.dst) # Filename to print + filename = "" if "_f" in key: filename = \ f"{globvars.dst}/Pb-Be_budget_trop_strat.{globvars.devstr}.txt" @@ -717,7 +716,7 @@ def print_budgets(globvars, data, key): title = "Annual Average Global Budgets of 210Pb, 7Be, and 10Be\n " # Open file and print budgets - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as f: if "_f" in key: print( f"Table 1. {title} in the Troposphere + Stratosphere in {globvars.devstr} for year {globvars.y0_str}\n", file=f) @@ -733,8 +732,7 @@ def print_budgets(globvars, data, key): file=f) vals = [v[1] for v in list(data["burden"].items()) if key in v[0]] - print(" Burden, g {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Burden, g {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) print(file=f) if "_t" in key: @@ -742,56 +740,45 @@ def print_budgets(globvars, data, key): for v in list(data["res_time"].items()) if key in v[0]] print( - " Residence time, d {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + f" Residence time, d {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) print(file=f) vals = [v[1] for v in list(data["src_total"].items()) if key in v[0]] - print(" Sources, g d-1 {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Sources, g d-1 {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) print(file=f) vals = [v[1] for v in list(data["snk_total"].items()) if key in v[0]] - print(" Sinks, g d-1 {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Sinks, g d-1 {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) vals = [v[1] for v in list(data["snk_drydep"].items()) if key in v[0]] - print(" Dry deposition {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Dry deposition {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) print(" Wet deposition", file=f) vals = [v[1] for v in list(data["snk_wetls"].items()) if key in v[0]] - print(" Stratiform {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Stratiform {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) vals = [v[1] for v in list(data["snk_wetconv"].items()) if key in v[0]] - print(" Convective {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Convective {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) vals = [v[1] for v in list(data["snk_decay"].items()) if key in v[0]] - print(" Radioactive decay {:11.7f} {:11.7f} {:11.8f}".format( - *vals), file=f) + print(f" Radioactive decay {vals[0]:11.7f} {vals[1]:11.7f} {vals[2]:11.8f}", file=f) print(file=f) vals = [v[1] for v in list(data["src_minus_snk"].items()) if key in v[0]] - print(" Sources - Sinks, g d-1 {:11.6f} {:11.6f} {:11.6f}".format( - *vals), file=f) + print(f" Sources - Sinks, g d-1 {vals[0]:11.6f} {vals[1]:11.6f} {vals[2]:11.6f}", file=f) print(file=f) print(" Accumulation Term", file=f) vals = [v[1] for v in list(data["accum_init"].items()) if key in v[0]] - print(" Initial mass, g {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Initial mass, g {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) vals = [v[1] for v in list(data["accum_final"].items()) if key in v[0]] - print(" Final mass, g {:11.4f} {:11.4f} {:11.4f}".format( - *vals), file=f) + print(f" Final mass, g {vals[0]:11.4f} {vals[1]:11.4f} {vals[2]:11.4f}", file=f) vals = [v[1] for v in list(data["accum_diff"].items()) if key in v[0]] - print(" Difference, g {:11.7f} {:11.7f} {:11.7f}".format( - *vals), file=f) + print(f" Difference, g {vals[0]:11.7f} {vals[1]:11.7f} {vals[2]:11.7f}", file=f) f.close() @@ -800,43 +787,30 @@ def transport_tracers_budgets( devdir, devrstdir, year, + spcdb_file, dst='./1yr_benchmark', is_gchp=False, gchp_res="c00", gchp_is_pre_14_0=False, overwrite=True, - spcdb_dir=os.path.dirname(__file__)): +): """ - Main program to compute TransportTracersBenchmark budgets - - Args: - maindir: str - Top-level benchmark folder - devstr: str - Denotes the "Dev" benchmark version. - year: int - The year of the benchmark simulation (e.g. 2016). - - Keyword Args (optional): - dst: str - Directory where budget tables will be created. - Default value: './1yr_benchmark' - is_gchp: bool - Denotes if data is from GCHP (True) or GCC (false). - Default value: False - gchp_res: str - A string (e.g. "c24") denoting GCHP grid resolution. - Default value: "c00". - gchp_is_pre_14_0: bool - Logical to indicate whether or not the GCHP data is prior - to GCHP 14.0.0. Needed for restart files only. - Default value: False - overwrite: bool - Denotes whether to ovewrite existing budget tables. - Default value: True - spcdb_dir: str - Directory where species_database.yml is stored. - Default value: GCPy directory + Main program to compute TransportTracers budgets from + GEOS-Chem Classic or GCHP benchmark simulations. + + Args + devstr : str : Label for the Dev version + devdir : str : Path to the Dev data directory + devrstdir : str : Path to the Dev restart file directory + year : int : Year of the benchmark simulation + spcdb_file : str : Path to the Dev species_database.yml file + + Keyword Args + dst : str : Directory where tables will be written + overwrite : bool : Should existing tables should be overwritten? + is_gchp : bool : Is Dev from a GCHP benchmark simulation? + gchp_res : str : GCHP resolution string (e.g. "c24") + gchp_is_pre_14_0 : bool : Is Dev from a GCHP version prior to 14.0.0? """ # Store global variables in a private class @@ -850,7 +824,7 @@ def transport_tracers_budgets( gchp_res, gchp_is_pre_14_0, overwrite, - spcdb_dir + spcdb_file, ) # Data structure for budgets @@ -871,14 +845,14 @@ def transport_tracers_budgets( # Get initial mass from restart file ds = globvars.ds_end - tropmask = globvars.tropmask[globvars.N_MONTHS - 1, :, :, :] + tropmask = globvars.tropmask[globvars.n_months - 1, :, :, :] data["accum_final"] = mass_from_rst( globvars, ds, tropmask ) # Take the difference final - init - data["accum_diff"] = util.dict_diff( + data["accum_diff"] = dict_diff( data["accum_init"], data["accum_final"] ) @@ -940,7 +914,7 @@ def transport_tracers_budgets( ) # Sources - sinks - data["src_minus_snk"] = util.dict_diff( + data["src_minus_snk"] = dict_diff( data["snk_total"], data["src_total"] ) diff --git a/gcpy/benchmark/modules/oh_metrics.py b/gcpy/benchmark/modules/oh_metrics.py index 98217c9e..96a87053 100644 --- a/gcpy/benchmark/modules/oh_metrics.py +++ b/gcpy/benchmark/modules/oh_metrics.py @@ -2,10 +2,6 @@ """ Prints key metrics (e.g. global mean OH, MCF lifetime, and CH4 lifetimes) for a GEOS-Chem full-chemistry simulation or methane simulation. -Requires Python3. - -Calling sequence: -./oh_metrics.py """ # ===================================================================== # %%% IMPORTS ETC. %%% @@ -14,8 +10,10 @@ import warnings import numpy as np import xarray as xr -import gcpy.constants as const -from gcpy.util import make_directory, read_config_file, replace_whitespace +from gcpy.constants import \ + AVOGADRO, ENCODING, MW_AIR_kg, SKIP_THESE_VARS +from gcpy.util import \ + make_directory, read_species_metadata, replace_whitespace # ===================================================================== # %%% METHODS %%% @@ -24,42 +22,26 @@ def combine_dataset(file_list=None): """ - Wrapper for xarray.open_mfdataset, taking into account the - extra arguments needed in xarray 0.15 and later. + Wrapper for xarray.open_mfdataset Args: - file_list: list of str + file_list : list : List of files to read - Returns: - ds: xarray Dataset + REturns + data : xr.Dataset : Object w/ "Metrics" collection data """ # Return a single Dataset containing data from all MeanOH files. - # NOTE: Need to add combine="nested" and concat_dim="time" - # for xarray 0.15 and higher!!! - v = xr.__version__.split(".") - if int(v[0]) == 0 and int(v[1]) >= 15: - try: - ds = xr.open_mfdataset( - file_list, - drop_variables=const.SKIP_THESE_VARS, - combine="nested", - concat_dim="time" - ) - except FileNotFoundError as exc: - msg = f"Could not find one or more files in {file_list}" - raise FileNotFoundError(msg) from exc - else: - try: - ds = xr.open_mfdataset( - file_list, - drop_variables=const.SKIP_THESE_VARS - ) - except FileNotFoundError as exc: - msg = f"Could not find one or more files in {file_list}" - raise FileNotFoundError(msg) from exc + try: + data = xr.open_mfdataset( + file_list, + drop_variables=SKIP_THESE_VARS, + ) + except FileNotFoundError as exc: + msg = f"Could not find one or more files in {file_list}" + raise FileNotFoundError(msg) from exc - return ds + return data def validate_metrics_collection(ds): @@ -99,12 +81,10 @@ def read_metrics_collection(files): into a single xarray Dataset. Args: - data_dir: str - Directory containing data files. - Default: "./OutputDir". + files : list : List of "Metrics" collection netCDF files - Returns: - ds: xarray Dataset + Returns + data : xr.Dataset : Object containing "Metrics" collection data """ # If files a scalar, promote it to a list @@ -113,78 +93,76 @@ def read_metrics_collection(files): files = [files] # Combine data into a single dataset - ds = combine_dataset(files) + data = combine_dataset(files) # Exit if we do not have all necessary metrics variables - if not validate_metrics_collection(ds): + if not validate_metrics_collection(data): msg = "Dataset does not have enough variables for computing metrics!" raise ValueError(msg) - return ds + return data -def total_airmass(ds): +def total_airmass(data): """ Computes the total airmass (in both kg and molec). - Args: - ds: xarray Dataset + Args + data : xr.DataSet : Object w/ "Metrics" collection data - Returns: - airmass_kg, airmass_m: numpy float64 - Total atmospheric air mass in [kg] and [molec] + Returns + airmass_kg : np.float64 : Total atmospheric air mass [kg] + airmass_m : np.float64 : Total atmospheric air mass [molecules] """ - airmass_kg = np.nansum(ds["AirMassColumnFull"].values) - airmass_m = airmass_kg * (const.AVOGADRO / const.MW_AIR_kg) + airmass_kg = np.nansum(data["AirMassColumnFull"].values) + airmass_m = airmass_kg * (AVOGADRO / MW_AIR_kg) return airmass_kg, airmass_m -def global_mean_oh(ds, airmass_kg, mw_oh_kg): +def global_mean_oh(data, airmass_kg, mw_oh_kg): """ Computes the global mean OH concentration (1e5 molec cm-3) - Args: - sum_airmass_kg: numpy float64 - ds: xarray Dataset + Args + data : xr.DataSet : Object w/ "Metrics" collection data + airmass_kg : np.float64 : Total atmospheric air mass [kg] + mw_oh_kg : np.flaot64 : Mol. wt. of OH [kg] - Returns: - sum_mean_oh: numpy float64 + Returns + sum_mean_oh : np.float64 : Sum of Mean OH [1e5 molec/cm3] """ # Divide out total airmass to get total mean OH concentration [kg m-3] # Then convert mean OH from [kg m-3] to [1e5 molec cm-3] - mean_oh = np.nansum(ds["OHwgtByAirMassColumnFull"].values) - mean_oh = (mean_oh / airmass_kg) - mean_oh *= (const.AVOGADRO / (mw_oh_kg * 1.0e6)) * 1.0e-5 + mean_oh = np.nansum(data["OHwgtByAirMassColumnFull"].values) + mean_oh = mean_oh / airmass_kg + mean_oh *= (AVOGADRO / (mw_oh_kg * 1.0e6)) * 1.0e-5 return mean_oh -def lifetimes_wrt_oh(ds, airmass_m): +def lifetimes_wrt_oh(data, airmass_m): """ Computes the lifetimes (in years) of CH4 and CH3CCl3 (aka MCF) against tropospheric OH. - Args: - ds: xarray Dataset - - airmass_m: numpy float64 - Total airmass [molecules] - - s_per_yr: numpy float64 - Conversion factor: seconds to year. + Args + data : xr.DataSet : Object w/ "Metrics" collection data + airmass_m : np.float64 : Total airmass [molecules] + s_per_yr : np.float64 : Seconds in this year - Returns: - ch4_life_wrt_oh, mcf_life_wrt_oh: numpy float64 + Returns + ch4_life_wrt_oh : np.float64 : CH4 lifetime w/r/t OH [years] + mcf_life_wrt_oh : np.float64 : MCF lifetime w/r/t OH [years] """ # Seconds per year s_per_yr = np.float64(86400.0) * np.float64(365.25) # Loss of OH by CH4+OH and MCF+OH reactions [molec] - oh_loss_by_ch4 = np.nansum(ds["LossOHbyCH4columnTrop"].values) - oh_loss_by_mcf = np.nansum(ds["LossOHbyMCFcolumnTrop"].values) + oh_loss_by_ch4 = np.nansum(data["LossOHbyCH4columnTrop"].values) + oh_loss_by_mcf = np.nansum(data["LossOHbyMCFcolumnTrop"].values) # CH4 and MCF lifetimes against OH [years] ch4_life_wrt_oh = (airmass_m / oh_loss_by_ch4) / s_per_yr @@ -193,40 +171,25 @@ def lifetimes_wrt_oh(ds, airmass_m): return ch4_life_wrt_oh, mcf_life_wrt_oh -def init_common_vars(ref, refstr, dev, devstr, spcdb_dir): +def init_common_vars(ref, refstr, dev, devstr, spcdb_files): """ Returns a dictionary containing various quantities that need to be passed between methods. - Args: - ref: str - Path name of "Ref" (aka "Reference") data set file. - - refstr: str - A string to describe ref (e.g. version number) + Args + ref : str : Path name of "Ref" (aka "Reference") data file + refstr : str : Label to describe Ref + dev : str : Path name of "Dev" (aka "Development") data file + devstr : str : Label to describe Dev + spcdb_files : list : Paths to Ref & Dev species_database.yml files - dev: str - Path name of "Dev" (aka "Development") data set file. - The "Dev" data set will be compared against the "Ref" data set. - - devstr: str - A string to describe dev (e.g. version number) - - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None - - Returns: - common_vars: dict + Returns + common_vars : dict : OH Metrics data """ - # Get species database - spcdb = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) + # Read the species database files in the Ref & Dev rundirs, and + # return a dict containing metadata for the union of species. + # We'll need properties such as mol. wt. for unit conversions, etc. + _, spcdb = read_species_metadata(spcdb_files, quiet=True) # Define common_vars dictionary common_vars = { @@ -236,7 +199,7 @@ def init_common_vars(ref, refstr, dev, devstr, spcdb_dir): "mw_oh_kg": spcdb["OH"]["MW_g"] * 1.0e-3, # # Conversion factors - "kg_to_m_ch4": (spcdb["CH4"]["MW_g"] * 1.0e-3) / const.AVOGADRO, + "kg_to_m_ch4": (spcdb["CH4"]["MW_g"] * 1.0e-3) / AVOGADRO, # # Ref data "refdata": read_metrics_collection(ref), @@ -261,11 +224,11 @@ def compute_oh_metrics(common_vars): Computes the mass-weighted mean OH concentration, CH3CCl3 (aka MCF) lifetime w/r/t OH, and CH4 lifetime w/r/t OH. - Args: - common_vars: dict + Args + common_vars : dict : OH Metrics data - Returns: - common_vars: dict + Returns + common_vars : dict : Updated OH Metrics data """ # ================================================================== @@ -322,14 +285,16 @@ def write_to_file(f, title, ref, dev, absdiff, pctdiff, is_mean_oh=False): Internal routine used by print_metrics to write a specific quantity (mean OH, MCF lifetime, CH4 lifetime) to a file. - Args: - f: file - - title: str - - ref, dev, absdiff, pctdiff: numpy float64 + Args + f : file : File object + title : str : Title for the data + ref : np.float64 : Ref data value + dev : np.float64 : Dev data value + absdiff : np.float64 : Absolute difference + pctdiff : np.float64 : Percent difference - is_mean_oh: bool + Keyword Args + is_mean_oh : bool : Denotes if this data is Mean OH or not """ print(file=f) print("-" * 60, file=f) @@ -353,16 +318,16 @@ def print_metrics(common_vars, dst): Prints the mass-weighted mean OH (full atmospheric column) from a GEOS-Chem simulation. - Args: - ds: xarray Dataset - is_ch4_sim: bool + Args + common_vars : dict : Data containing OH Metrics data + dst : str : Folder where OH Metrics output will be written """ # Create file outfilename = os.path.join(dst, "OH_metrics.txt") # Open filename for output - with open(outfilename, "w") as f: + with open(outfilename, "w", encoding=ENCODING) as f: # ============================================================== # Write header @@ -428,47 +393,25 @@ def make_benchmark_oh_metrics( refstr, dev, devstr, + spcdb_files, dst="./benchmark", overwrite=True, - spcdb_dir=None, ): """ Creates a text file containing metrics of global mean OH, MCF lifetime, and CH4 lifetime for benchmarking purposes. - Args: - ref: str - Path name of "Ref" (aka "Reference") data set file. - - refstr: str - A string to describe ref (e.g. version number) + Args + ref : str : Path name of "Ref" (aka "Reference") data file + refstr : str : Label to describe Ref + dev : str : Path name of "Dev" (aka "Development") data file + devstr : str : Label to describe Dev + spcdb_files : list : Paths to Ref & Dev species_database.yml files - dev: str - Path name of "Dev" (aka "Development") data set file. - The "Dev" data set will be compared against the "Ref" data set. - - devstr: str - A string to describe dev (e.g. version number) - - Keyword Args (optional): - dst: str - A string denoting the destination folder where the file - containing emissions totals will be written. - Default value: ./benchmark - - overwrite: bool - Set this flag to True to overwrite files in the - destination folder (specified by the dst argument). - Default value: False - - spcdb_dir: str - Directory of species_datbase.yml file - Default value: None + Keyword Args + dst : str : Folder where OH Metrics output will be written + overwrite : bool : Overwrite previously-generated files? (T/F) """ - # Make sure the species database folder is passed - if spcdb_dir is None: - raise ValueError("The 'spcdb_dir' argument has not been specified!") - # Replace whitespace in the ref and dev labels refstr = replace_whitespace(refstr) devstr = replace_whitespace(devstr) @@ -490,7 +433,7 @@ def make_benchmark_oh_metrics( refstr, dev, devstr, - spcdb_dir + spcdb_files ) # Compute the OH metrics diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 12113ac2..ac250d43 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ run_1yr_fullchem_benchmark.py: Driver script for creating benchmark plots and testing gcpy @@ -64,14 +64,14 @@ #TODO: Peel out routines from benchmark_funcs.py into smaller # routines in the gcpy/benchmark/modules folder, such as these: from gcpy.benchmark.modules.benchmark_funcs import \ - diff_of_diffs_toprow_title, get_species_database_dir, \ - make_benchmark_conc_plots, make_benchmark_emis_plots, \ - make_benchmark_emis_tables, make_benchmark_jvalue_plots, \ - make_benchmark_aod_plots, make_benchmark_mass_tables, \ - make_benchmark_operations_budget, make_benchmark_aerosol_tables + diff_of_diffs_toprow_title, make_benchmark_conc_plots, \ + make_benchmark_emis_plots, make_benchmark_emis_tables, \ + make_benchmark_jvalue_plots, make_benchmark_aod_plots, \ + make_benchmark_mass_tables, make_benchmark_operations_budget, \ + make_benchmark_aerosol_tables from gcpy.benchmark.modules.benchmark_utils import \ gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ - get_log_filepaths, print_benchmark_info + get_log_filepaths, get_species_database_files, print_benchmark_info from gcpy.benchmark.modules.benchmark_models_vs_obs \ import make_benchmark_models_vs_obs_plots from gcpy.benchmark.modules.benchmark_models_vs_sondes \ @@ -106,9 +106,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): bmk_mon_inds = [0, 3, 6, 9] bmk_n_months = len(bmk_mon_strs) - # Folder in which the species_database.yml file is found - spcdb_dir = get_species_database_dir(config) - # ====================================================================== # Data directories # ====================================================================== @@ -128,7 +125,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Log file directory paths s = "logs_subdir" gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) - gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) + #gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s) # Directories where plots & tables will be created @@ -289,6 +286,9 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]: + # GCC vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gcc") + # ================================================================== # GCC vs GCC filepaths for StateMet collection data # ================================================================== @@ -335,6 +335,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gcc_vs_gcc_resultsdir, @@ -343,7 +344,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -360,6 +360,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gcc_vs_gcc_resultsdir, @@ -369,7 +370,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -402,6 +402,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, @@ -412,7 +413,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -429,6 +429,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], @@ -438,7 +439,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -466,12 +466,12 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, benchmark_type=bmk_type, ref_interval=sec_per_month_ref, dev_interval=sec_per_month_dev, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -503,12 +503,12 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -525,11 +525,11 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -562,12 +562,12 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -584,11 +584,11 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -621,12 +621,12 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -644,11 +644,11 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -682,11 +682,11 @@ def gcc_vs_gcc_mass_table(mon): gcc_vs_gcc_refstr, devpath, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, ) # Create tables in parallel @@ -730,6 +730,7 @@ def gcc_vs_gcc_ops_budg(mon): refpath, config["data"]["dev"]["gcc"]["version"], devpath, + spcdb_files, sec_per_month_ref[mon], sec_per_month_dev[mon], benchmark_type=bmk_type, @@ -776,9 +777,9 @@ def gcc_vs_gcc_ops_budg(mon): config["data"]["dev"]["gcc"]["version"], bmk_year_dev, days_per_month_dev, + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -791,9 +792,9 @@ def gcc_vs_gcc_ops_budg(mon): gcc_vs_gcc_devdir, gcc_vs_gcc_devrstdir, bmk_year_dev, + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -872,9 +873,9 @@ def gcc_vs_gcc_ops_budg(mon): config["data"]["ref"]["gcc"]["version"], dev, config["data"]["dev"]["gcc"]["version"], + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -928,6 +929,9 @@ def gcc_vs_gcc_ops_budg(mon): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]: + # GCHP vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gchp") + # ================================================================== # GCHP vs GCC filepaths for StateMet collection data # ================================================================== @@ -977,6 +981,7 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gchp_vs_gcc_resultsdir, @@ -985,7 +990,6 @@ def gcc_vs_gcc_ops_budg(mon): weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1002,6 +1006,7 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gchp_vs_gcc_resultsdir, @@ -1011,7 +1016,6 @@ def gcc_vs_gcc_ops_budg(mon): plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1045,6 +1049,7 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, @@ -1055,7 +1060,6 @@ def gcc_vs_gcc_ops_budg(mon): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1072,6 +1076,7 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], @@ -1081,7 +1086,6 @@ def gcc_vs_gcc_ops_budg(mon): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1110,13 +1114,13 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, devmet=devmet, dst=gchp_vs_gcc_resultsdir, ref_interval=sec_per_month_ref, dev_interval=sec_per_month_dev, benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1149,12 +1153,12 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1171,11 +1175,11 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1209,12 +1213,12 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1231,11 +1235,11 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1269,12 +1273,12 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst="AnnualMean", time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -1292,11 +1296,11 @@ def gcc_vs_gcc_ops_budg(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -1346,11 +1350,11 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, devpath, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, dev_met_extra=devareapath ) @@ -1398,6 +1402,7 @@ def gchp_vs_gcc_ops_budg(mon): devpath, bmk_sec_per_month_dev[mon], bmk_sec_per_month_dev[mon], + spcdb_files, benchmark_type=bmk_type, label=f"at 01{bmk_mon_yr_strs_dev[mon]}", operations=[ @@ -1452,9 +1457,9 @@ def gchp_vs_gcc_ops_budg(mon): config["data"]["dev"]["gchp"]["version"], bmk_year_dev, days_per_month_dev, + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, is_gchp=True, ) @@ -1470,9 +1475,9 @@ def gchp_vs_gcc_ops_budg(mon): gcc_vs_gcc_devdir, gcc_vs_gcc_devrstdir, bmk_year_dev, + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir ) # Compute Ox budget table for GCHP @@ -1481,9 +1486,8 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gcc_devdir, gchp_vs_gcc_devrstdir, bmk_year_dev, - dst=gchp_vs_gcc_tablesdir, + spcdb_files, overwrite=True, - spcdb_dir=spcdb_dir, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] @@ -1514,9 +1518,9 @@ def gchp_vs_gcc_ops_budg(mon): config["data"]["dev"]["gcc"]["version"], dev, config["data"]["dev"]["gchp"]["version"], + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1575,6 +1579,9 @@ def gchp_vs_gcc_ops_budg(mon): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: + # GCHP vs GCHP species database files + spcdb_files = get_species_database_files(config, "gchp", "gchp") + # ================================================================== # GCHP vs GCHP filepaths for StateMet collection data # ================================================================== @@ -1632,6 +1639,7 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, refmet=refmet, devmet=devmet, cmpres=cmpres, @@ -1643,7 +1651,6 @@ def gchp_vs_gcc_ops_budg(mon): plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1660,6 +1667,7 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, cmpres=cmpres, refmet=refmet[mon_ind], devmet=devmet[mon_ind], @@ -1670,7 +1678,6 @@ def gchp_vs_gcc_ops_budg(mon): plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1705,6 +1712,7 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst="AnnualMean", cmpres=cmpres, @@ -1716,7 +1724,6 @@ def gchp_vs_gcc_ops_budg(mon): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1733,6 +1740,7 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, cmpres=cmpres, subdst=bmk_mon_yr_strs_dev[mon], @@ -1743,7 +1751,6 @@ def gchp_vs_gcc_ops_budg(mon): "plot_options"]["by_hco_cat"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1773,6 +1780,7 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gchp_vs_gchp_resultsdir, @@ -1780,7 +1788,6 @@ def gchp_vs_gcc_ops_budg(mon): dev_interval=sec_per_month_dev, benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1814,13 +1821,13 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst='AnnualMean', cmpres=cmpres, time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1837,12 +1844,12 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], cmpres=cmpres, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1877,13 +1884,13 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst="AnnualMean", cmpres=cmpres, time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1900,12 +1907,12 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], cmpres=cmpres, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1940,13 +1947,13 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst="AnnualMean", cmpres=cmpres, time_mean=True, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -1964,12 +1971,12 @@ def gchp_vs_gcc_ops_budg(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], cmpres=cmpres, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -2034,11 +2041,11 @@ def gchp_vs_gchp_mass_table(mon): gchp_vs_gchp_refstr, devpath, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, ref_met_extra=refareapath, dev_met_extra=devareapath ) @@ -2089,6 +2096,7 @@ def gchp_vs_gchp_ops_budg(mon): devpath, bmk_sec_per_month_ref[mon], bmk_sec_per_month_dev[mon], + spcdb_files, benchmark_type=bmk_type, label=f"at 01{bmk_mon_yr_strs_dev[mon]}", operations=[ @@ -2143,9 +2151,9 @@ def gchp_vs_gchp_ops_budg(mon): config["data"]["dev"]["gchp"]["version"], bmk_year_dev, days_per_month_dev, + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, is_gchp=True, ) @@ -2161,9 +2169,9 @@ def gchp_vs_gchp_ops_budg(mon): gchp_vs_gchp_devdir, gchp_vs_gchp_devrstdir, bmk_year_dev, + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] @@ -2195,9 +2203,9 @@ def gchp_vs_gchp_ops_budg(mon): config["data"]["ref"]["gchp"]["version"], dev, config["data"]["dev"]["gchp"]["version"], + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -2291,6 +2299,8 @@ def gchp_vs_gchp_ops_budg(mon): if config["options"]["outputs"]["plot_conc"]: print("\n%%% Creating GCHP vs. GCC diff-of-diffs conc plots %%%") + # Diff of diffs species database files (use GCC) + spcdb_files = get_species_database_files(config, "gcc", "gcc") # -------------------------------------------------------------- # GCHP vs GCC diff-of-diff species concentration plots: @@ -2328,6 +2338,7 @@ def gchp_vs_gchp_ops_budg(mon): diff_of_diffs_refstr, gchp_ref, diff_of_diffs_devstr, + spcdb_files, dst=diff_of_diffs_resultsdir, subdst="AnnualMean", time_mean=True, @@ -2340,7 +2351,6 @@ def gchp_vs_gchp_ops_budg(mon): second_ref=gcc_dev, second_dev=gchp_dev, cats_in_ugm3=None, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -2357,6 +2367,7 @@ def gchp_vs_gchp_ops_budg(mon): diff_of_diffs_refstr, gchp_ref[mon_ind], diff_of_diffs_devstr, + spcdb_files, dst=diff_of_diffs_resultsdir, subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], @@ -2368,7 +2379,6 @@ def gchp_vs_gchp_ops_budg(mon): second_ref=gcc_dev[mon_ind], second_dev=gchp_dev[mon_ind], cats_in_ugm3=None, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py index 31a473c8..42c0c763 100644 --- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ run_1yr_tt_benchmark.py: Script containing code for creating benchmark plots and testing @@ -59,14 +59,13 @@ from joblib import Parallel, delayed from gcpy.util import copy_file_to_dir, get_filepath, get_filepaths from gcpy.benchmark.modules.benchmark_funcs import \ - get_species_database_dir, make_benchmark_conc_plots, \ - make_benchmark_wetdep_plots, make_benchmark_mass_tables, \ - make_benchmark_operations_budget + make_benchmark_conc_plots, make_benchmark_wetdep_plots, \ + make_benchmark_mass_tables, make_benchmark_operations_budget from gcpy.benchmark.modules.budget_tt import transport_tracers_budgets from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table from gcpy.benchmark.modules.benchmark_utils import \ gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ - get_log_filepaths, print_benchmark_info + get_log_filepaths, get_species_database_files, print_benchmark_info from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \ make_benchmark_gcclassic_timing_table from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \ @@ -91,11 +90,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): bmk_mon_inds = [0, 3, 6, 9] bmk_n_months = len(bmk_mon_strs) - # ===================================================================== - # Path to species_database.yml - # ===================================================================== - spcdb_dir = get_species_database_dir(config) - # ====================================================================== # Data directories # For gchp_vs_gcc_refdir use config["data"]["dev"]["gcc"]["version"], not ref (mps, 6/27/19) @@ -116,7 +110,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Log file directory paths s = "logs_subdir" gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) - gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s) # Directories where plots & tables will be created @@ -263,6 +256,9 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]: + # GCC vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gcc") + # ================================================================== # GCC vs GCC filepaths for StateMet collection data # ================================================================== @@ -309,6 +305,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, collection=collection, refmet=refmet, devmet=devmet, @@ -319,7 +316,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -333,6 +329,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, + spcdb_files, collection=collection, refmet=refmet[mon_ind], devmet=devmet[mon_ind], @@ -342,7 +339,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -380,7 +376,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, - collection=collection, + collection, + spcdb_files, refmet=refmet, devmet=devmet, dst=gcc_vs_gcc_resultsdir, @@ -389,7 +386,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -403,7 +399,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, - collection=collection, + collection, + spcdb_files, refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gcc_vs_gcc_resultsdir, @@ -411,7 +408,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -428,9 +424,9 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_refdir, gcc_vs_gcc_refrstdir, int(bmk_year_ref), + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # Dev @@ -439,9 +435,9 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gcc_vs_gcc_devdir, gcc_vs_gcc_devrstdir, int(bmk_year_dev), + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -473,11 +469,11 @@ def gcc_vs_gcc_mass_table(mon): gcc_vs_gcc_refstr, devpath, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, ) # Create tables in parallel @@ -519,6 +515,7 @@ def gcc_vs_gcc_mass_table(mon): devs, sec_per_yr_ref, sec_per_yr_dev, + spcdb_files, benchmark_type=bmk_type, label=bmk_year_dev, operations=[ @@ -606,6 +603,9 @@ def gcc_vs_gcc_mass_table(mon): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]: + # GCHP vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gchp") + # ================================================================== # GCHP vs GCC filepaths for StateMet collection data # @@ -657,6 +657,7 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, collection=collection, refmet=refmet, devmet=devmet, @@ -667,7 +668,6 @@ def gcc_vs_gcc_mass_table(mon): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -681,6 +681,7 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, + spcdb_files, collection=collection, refmet=refmet[mon_ind], devmet=devmet[mon_ind], @@ -690,7 +691,6 @@ def gcc_vs_gcc_mass_table(mon): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -729,7 +729,8 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, - collection=collection, + collection, + spcdb_files, devmet=devmet, dst=gchp_vs_gcc_resultsdir, datestr="AnnualMean", @@ -738,7 +739,6 @@ def gcc_vs_gcc_mass_table(mon): overwrite=True, benchmark_type=bmk_type, normalize_by_area=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -752,7 +752,8 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, - collection=collection, + collection, + spcdb_files, refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gchp_vs_gcc_resultsdir, @@ -761,7 +762,6 @@ def gcc_vs_gcc_mass_table(mon): overwrite=True, benchmark_type=bmk_type, normalize_by_area=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -777,9 +777,9 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_refdir, gchp_vs_gcc_refrstdir, int(bmk_year_dev), + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # Dev @@ -788,12 +788,12 @@ def gcc_vs_gcc_mass_table(mon): gchp_vs_gcc_devdir, gchp_vs_gcc_devrstdir, int(bmk_year_dev), + spcdb_files, dst=gchp_vs_gcc_tablesdir, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -841,11 +841,11 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gcc_refstr, devpath, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, dev_met_extra=devareapath ) @@ -889,6 +889,7 @@ def gchp_vs_gcc_mass_table(mon): devs, sec_per_yr_ref, sec_per_yr_dev, + spcdb_files, benchmark_type=bmk_type, label=bmk_year_dev, operations=[ @@ -907,6 +908,9 @@ def gchp_vs_gcc_mass_table(mon): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: + # GCHP vs GCHP species database files + spcdb_files = get_species_database_files(config, "gchp", "gchp") + # Option to specify grid resolution of comparison plots # This is a temporary hack until cs->cs regridding in GCPy is fixed cmpres="1x1.25" @@ -961,6 +965,7 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, collection=collection, refmet=refmet, devmet=devmet, @@ -971,7 +976,6 @@ def gchp_vs_gcc_mass_table(mon): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, cmpres=cmpres, n_job=config["options"]["n_cores"] ) @@ -986,6 +990,7 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, + spcdb_files, collection=collection, refmet=refmet[mon_ind], devmet=devmet[mon_ind], @@ -995,7 +1000,6 @@ def gchp_vs_gcc_mass_table(mon): benchmark_type=bmk_type, restrict_cats=restrict_cats, overwrite=True, - spcdb_dir=spcdb_dir, cmpres=cmpres, n_job=config["options"]["n_cores"] ) @@ -1036,7 +1040,8 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, - collection=collection, + collection, + spcdb_files, refmet=refmet, devmet=devmet, dst=gchp_vs_gchp_resultsdir, @@ -1046,7 +1051,6 @@ def gchp_vs_gcc_mass_table(mon): overwrite=True, benchmark_type=bmk_type, normalize_by_area=True, - spcdb_dir=spcdb_dir, cmpres=cmpres, n_job=config["options"]["n_cores"] ) @@ -1061,7 +1065,8 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, - collection=collection, + collection, + spcdb_files, refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gchp_vs_gchp_resultsdir, @@ -1070,7 +1075,6 @@ def gchp_vs_gcc_mass_table(mon): overwrite=True, benchmark_type=bmk_type, normalize_by_area=True, - spcdb_dir=spcdb_dir, cmpres=cmpres, n_job=config["options"]["n_cores"] ) @@ -1087,12 +1091,12 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_refdir, gchp_vs_gchp_refrstdir, int(bmk_year_ref), + spcdb_files, dst=gchp_vs_gchp_tablesdir, is_gchp=True, gchp_res=config["data"]["ref"]["gchp"]["resolution"], gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"], overwrite=True, - spcdb_dir=spcdb_dir ) # Dev @@ -1101,12 +1105,12 @@ def gchp_vs_gcc_mass_table(mon): gchp_vs_gchp_devdir, gchp_vs_gchp_devrstdir, int(bmk_year_dev), + spcdb_files, dst=gchp_vs_gchp_tablesdir, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], overwrite=True, - spcdb_dir=spcdb_dir ) # ================================================================== @@ -1169,11 +1173,11 @@ def gchp_vs_gchp_mass_table(mon): gchp_vs_gchp_refstr, devpath, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_tablesdir, subdst=bmk_mon_yr_strs_dev[mon], label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, - spcdb_dir=spcdb_dir, ref_met_extra=refareapath, dev_met_extra=devareapath ) @@ -1219,6 +1223,7 @@ def gchp_vs_gchp_mass_table(mon): devs, sec_per_yr_ref, sec_per_yr_dev, + spcdb_files, benchmark_type=bmk_type, label=bmk_year_dev, operations=[ @@ -1261,12 +1266,15 @@ def gchp_vs_gchp_mass_table(mon): dst=gchp_vs_gchp_tablesdir, overwrite=True, ) - + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create mass conservations tables for GCC and GCHP # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if config["options"]["outputs"]["cons_table"]: + # GCC vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gcc") + # =================================================================== # Create mass conservation table for GCC vs GCC # =================================================================== @@ -1291,9 +1299,9 @@ def gchp_vs_gchp_mass_table(mon): config["data"]["ref"]["gcc"]["version"], dev_datafiles, config["data"]["dev"]["gcc"]["version"], + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # =================================================================== @@ -1302,6 +1310,9 @@ def gchp_vs_gchp_mass_table(mon): if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]: print("\n%%% Creating GCHP vs GCC mass conservation tables %%%") + # GCHP vs GCC species database files + spcdb_files = get_species_database_files(config, "gcc", "gchp") + # Filepaths ref_datafiles = get_filepaths( gchp_vs_gcc_refrstdir, @@ -1336,9 +1347,9 @@ def gchp_vs_gchp_mass_table(mon): config["data"]["dev"]["gcc"]["version"], dev_datafiles, config["data"]["dev"]["gchp"]["version"], + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, dev_areapath=dev_areapath, ) @@ -1348,6 +1359,9 @@ def gchp_vs_gchp_mass_table(mon): if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: print("\n%%% Creating GCHP dev mass conservation table %%%") + # GCHP vs GCHP species database files + spcdb_files = get_species_database_files(config, "gchp", "gchp") + # Filepaths ref_datafiles = get_filepaths( gchp_vs_gchp_refrstdir, @@ -1393,9 +1407,9 @@ def gchp_vs_gchp_mass_table(mon): config["data"]["ref"]["gchp"]["version"], dev_datafiles, config["data"]["dev"]["gchp"]["version"], + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ref_areapath=ref_areapath, dev_areapath=dev_areapath, ) diff --git a/gcpy/benchmark/modules/ste_flux.py b/gcpy/benchmark/modules/ste_flux.py index ee947d72..ba564185 100644 --- a/gcpy/benchmark/modules/ste_flux.py +++ b/gcpy/benchmark/modules/ste_flux.py @@ -8,14 +8,13 @@ # Imports etc. # ====================================================================== -import os from calendar import monthrange, month_abbr import warnings import gc import numpy as np import pandas as pd import xarray as xr -import gcpy.constants as physconsts +from gcpy.constants import ENCODING, SKIP_THESE_VARS from gcpy.util import make_directory, replace_whitespace # Suppress harmless run-time warnings (mostly about underflow in division) @@ -65,7 +64,7 @@ def __init__(self, devstr, files, dst, year, self.overwrite = overwrite self.species = species self.month = month - self.is_TransportTracers = "TransportTracers" in bmk_type + self.is_transport_tracers = "TransportTracers" in bmk_type # ------------------------------ # Benchmark year @@ -86,35 +85,20 @@ def __init__(self, devstr, files, dst, year, # Vertical flux diagnostics # Return a single Dataset containing data from all files. - # NOTE: Need to add combine="nested" and concat_dim="time" - # for xarray 0.15 and higher!!! - v = xr.__version__.split(".") - if int(v[0]) == 0 and int(v[1]) >= 15: - try: - self.ds_flx = xr.open_mfdataset( - files, - drop_variables=physconsts.SKIP_THESE_VARS, - combine="nested", - concat_dim="time" - ) - except FileNotFoundError as exc: - msg = f"Could not find one or more files in {files}" - raise FileNotFoundError(msg) from exc - else: - try: - self.ds_flx = xr.open_mfdataset( - files, - drop_variables=physconsts.SKIP_THESE_VARS, - ) - except FileNotFoundError as exc: - msg = f"Could not find one or more files in {files}" - raise FileNotFoundError(msg) from exc + try: + self.ds_flx = xr.open_mfdataset( + files, + drop_variables=SKIP_THESE_VARS, + ) + except FileNotFoundError as exc: + msg = f"Could not find one or more files in {files}" + raise FileNotFoundError(msg) from exc # Set a flag to denote if this data is from GCHP self.is_gchp = "nf" in self.ds_flx.dims.keys() # 100 hPa level (assumes GEOS-FP/MERRA2 72-level grid) - self.hPa_100_lev = 35 + self.hpa_100_lev = 35 # Set a flag to denote if this is a 1-year benchmark self.is_1yr = self.month is None @@ -125,17 +109,16 @@ def __init__(self, devstr, files, dst, year, # ----------------------------------- # Months and days: 1-year benchmark # ----------------------------------- - self.N_MONTHS = 12 - self.N_MONTHS_FLOAT = self.N_MONTHS * 1.0 + self.n_months = 12 # Days per month in the benchmark year - self.d_per_mon = np.zeros(self.N_MONTHS) - for t in range(self.N_MONTHS): + self.d_per_mon = np.zeros(self.n_months) + for t in range(self.n_months): self.d_per_mon[t] = monthrange(self.y0, t + 1)[1] * 1.0 # Month names self.mon_name = [] - for t in range(self.N_MONTHS): + for t in range(self.n_months): self.mon_name.append(f"{ month_abbr[t + 1]} {self.y0_str}") self.mon_name.append("Annual Mean") @@ -147,8 +130,7 @@ def __init__(self, devstr, files, dst, year, # ----------------------------------- # Months and days: 1-month benchmark # ----------------------------------- - self.N_MONTHS = 1 - self.N_MONTHS_FLOAT = self.N_MONTHS * 1.0 + self.n_months = 1 # Days per month in the benchmark year self.d_per_mon = [monthrange(self.y0, self.month)[1] * 1.0] @@ -166,7 +148,7 @@ def __init__(self, devstr, files, dst, year, # ------------------------------ # kg/s --> Tg/yr - kg_s_to_Tg_yr = 1.0e-9 * 86400.0 * self.d_per_yr + kg_s_to_tg_yr = 1.0e-9 * 86400.0 * self.d_per_yr # kg/s --> g/yr kg_s_to_g_yr = 1000.0 * 86400.0 * self.d_per_yr @@ -174,10 +156,10 @@ def __init__(self, devstr, files, dst, year, # Define the conversion factor dict self.conv_factor = {} for spc in species: - if self.is_TransportTracers: + if self.is_transport_tracers: self.conv_factor[spc] = kg_s_to_g_yr else: - self.conv_factor[spc] = kg_s_to_Tg_yr + self.conv_factor[spc] = kg_s_to_tg_yr # ====================================================================== @@ -199,13 +181,13 @@ def compute_ste(globvars): """ # 100 hPa level index (convert to Python array notation) - lev = globvars.hPa_100_lev - 1 + lev = globvars.hpa_100_lev - 1 # 1-year benchmarks also include the annual mean row if globvars.is_1yr: - n_rows = globvars.N_MONTHS + 1 + n_rows = globvars.n_months + 1 else: - n_rows = globvars.N_MONTHS + n_rows = globvars.n_months # Create a dictionary to define a DataFrame df_dict = {} @@ -217,7 +199,7 @@ def compute_ste(globvars): # Compute monthly strat-trop exchange fluxes, # taken as the vertical flux across the 100hPa level - for t in range(globvars.N_MONTHS): + for t in range(globvars.n_months): month = globvars.mon_name[t] for spc in globvars.species: var = globvars.data_vars[spc] @@ -256,11 +238,11 @@ def print_ste(globvars, df): pd.options.display.float_format = '{:11.4f}'.format # Open filename for output - with open(filename, "w+") as f: + with open(filename, "w+", encoding=ENCODING) as f: # Print header print("%" * 79, file=f) - if globvars.is_TransportTracers: + if globvars.is_transport_tracers: print(f" Table 4. Strat-trop exchange in {globvars.devstr} for year {globvars.y0_str}", file=f) print(" (i.e. species flux across 100 hPa)", file=f) print("\n Units: g/yr", file=f) @@ -279,12 +261,16 @@ def print_ste(globvars, df): print(df, file=f) -def make_benchmark_ste_table(devstr, files, year, - dst='./1yr_benchmark', - bmk_type="FullChemBenchmark", - species=["O3"], - overwrite=True, - month=None): +def make_benchmark_ste_table( + devstr, + files, + year, + dst='./1yr_benchmark', + bmk_type="FullChemBenchmark", + species=None, + overwrite=True, + month=None +): """ Driver program. Computes and prints strat-trop exchange for the selected species and benchmark year. @@ -311,6 +297,9 @@ def make_benchmark_ste_table(devstr, files, year, Default: None (denotes a 1-year benchmark) """ + if species is None: + species = ["O3"] + # Initialize a private class with required global variables globvars = _GlobVars(devstr, files, dst, int(year), bmk_type, species, overwrite, month) diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py index ff8ac355..13c88564 100755 --- a/gcpy/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -48,11 +48,11 @@ import warnings from datetime import datetime import numpy as np -from gcpy.util import copy_file_to_dir, get_filepath, read_config_file +from gcpy.util import \ + copy_file_to_dir, get_filepath, read_config_file from gcpy.date_time import add_months, is_full_year from gcpy.benchmark.modules.benchmark_funcs import \ - diff_of_diffs_toprow_title, get_species_database_dir, \ - create_benchmark_summary_table, \ + diff_of_diffs_toprow_title, create_benchmark_summary_table, \ create_benchmark_sanity_check_table, \ make_benchmark_conc_plots, make_benchmark_emis_plots, \ make_benchmark_emis_tables, make_benchmark_jvalue_plots, \ @@ -70,9 +70,9 @@ import run_benchmark as run_1yr_tt_benchmark from gcpy.benchmark.modules.benchmark_utils import \ gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ - get_log_filepaths, print_benchmark_info -from gcpy.benchmark.modules.benchmark_drydep \ - import drydepvel_species, make_benchmark_drydep_plots + get_log_filepaths, get_species_database_files, print_benchmark_info +from gcpy.benchmark.modules.benchmark_drydep import \ + drydepvel_species, make_benchmark_drydep_plots from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \ make_benchmark_gcclassic_timing_table from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \ @@ -137,9 +137,6 @@ def run_benchmark_default(config): config : dict Contains configuration for 1mon benchmark from yaml file. """ - # Folder in which the species_database.yml file is found - spcdb_dir = get_species_database_dir(config) - # ===================================================================== # Data directories # For gchp_vs_gcc_refdir use config["data"]["dev"]["gcc"]["version"], @@ -317,8 +314,10 @@ def run_benchmark_default(config): if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]: # ================================================================== - # GCC vs GCC error checks + # GCC vs GCC initialization & error checks # ================================================================== + spcdb_files = get_species_database_files(config, "gcc", "gcc") + if not np.equal(gcc_ref_sec_diff, gcc_dev_sec_diff): print("Skipping GCC vs GCC emissions tables and operations") print("budget tables because months are different lengths") @@ -369,6 +368,7 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gcc_vs_gcc_resultsdir, @@ -378,7 +378,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -398,6 +397,7 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ @@ -407,7 +407,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -427,12 +426,12 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, ref_interval=[gcc_ref_sec_diff], dev_interval=[gcc_dev_sec_diff], benchmark_type=config["options"]["bmk_type"], overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -451,11 +450,11 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -477,12 +476,12 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, colname=colname, var_prefix=colname, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -504,11 +503,11 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, colname=colname, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -524,12 +523,12 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, devmet, gcc_vs_gcc_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -545,12 +544,12 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, devmet, gcc_vs_gcc_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -570,11 +569,11 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -594,11 +593,11 @@ def run_benchmark_default(config): gcc_vs_gcc_refstr, dev, gcc_vs_gcc_devstr, + spcdb_files, dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -619,9 +618,9 @@ def run_benchmark_default(config): config["data"]["ref"]["gcc"]["version"], dev, config["data"]["dev"]["gcc"]["version"], + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -654,9 +653,9 @@ def run_benchmark_default(config): deve, config["data"]["dev"]["gcc"]["version"], devperiod, + spcdb_files, overwrite=True, dst=gcc_vs_gcc_tablesdir, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -698,9 +697,9 @@ def run_benchmark_default(config): config["data"]["ref"]["gcc"]["version"], dev, config["data"]["dev"]["gcc"]["version"], + spcdb_files, dst=gcc_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -822,8 +821,10 @@ def run_benchmark_default(config): if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]: # ================================================================== - # GCHP vs GCC error checks + # GCHP vs GCC initialization & error checks # ================================================================== + spcdb_files = get_species_database_files(config, "gcc", "gchp") + if not np.equal(gcc_dev_sec_diff, gchp_dev_sec_diff): print("Skipping GCHP vs GCC emissions tables and operations") print("budget tables because months are different lengths") @@ -885,6 +886,7 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gchp_vs_gcc_resultsdir, @@ -894,7 +896,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -919,6 +920,7 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ @@ -930,7 +932,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -955,13 +956,13 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, ref_interval=[gcc_dev_sec_diff], dev_interval=[gchp_dev_sec_diff], benchmark_type=config["options"]["bmk_type"], overwrite=True, devmet=devmet, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -985,11 +986,11 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1016,12 +1017,12 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, colname=colname, var_prefix=colname, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1048,11 +1049,11 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, colname=colname, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1068,12 +1069,12 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, devmet, gchp_vs_gcc_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1089,12 +1090,12 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, devmet, gchp_vs_gcc_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1105,7 +1106,11 @@ def run_benchmark_default(config): print("\n%%% Creating GCHP vs. GCC column AOD plots %%%") # Filepaths - ref = get_filepath(gchp_vs_gcc_refdir, "Aerosols", gcc_dev_date) + ref = get_filepath( + gchp_vs_gcc_refdir, + "Aerosols", + gcc_dev_date + ) dev = get_filepath( gchp_vs_gcc_devdir, "Aerosols", @@ -1119,11 +1124,11 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1148,11 +1153,11 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species(), ) @@ -1184,9 +1189,9 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1241,9 +1246,9 @@ def run_benchmark_default(config): deve, config["data"]["dev"]["gchp"]["version"], devperiod, + spcdb_files, overwrite=True, dst=gchp_vs_gcc_tablesdir, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1303,9 +1308,9 @@ def run_benchmark_default(config): gchp_vs_gcc_refstr, dev, gchp_vs_gcc_devstr, + spcdb_files, dst=gchp_vs_gcc_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1353,8 +1358,10 @@ def run_benchmark_default(config): if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: # ================================================================== - # GCHP vs GCHP error checks + # GCC vs GCC initialization & error checks # ================================================================== + spcdb_files = get_species_database_files(config, "gchp", "gchp") + if not np.equal(gchp_ref_sec_diff, gchp_dev_sec_diff): print("Skipping GCHP vs GCHP emissions tables and operations") print("budget tables because months are different lengths") @@ -1428,6 +1435,7 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, refmet=refmet, devmet=devmet, dst=gchp_vs_gchp_resultsdir, @@ -1437,7 +1445,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1467,6 +1474,7 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ @@ -1476,7 +1484,6 @@ def run_benchmark_default(config): benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1506,6 +1513,7 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, ref_interval=[gchp_ref_sec_diff], dev_interval=[gchp_dev_sec_diff], @@ -1513,7 +1521,6 @@ def run_benchmark_default(config): overwrite=True, refmet=refmet, devmet=devmet, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1542,11 +1549,11 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1578,12 +1585,12 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, colname=colname, var_prefix=colname, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1615,11 +1622,11 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, colname=colname, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1635,12 +1642,12 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, devmet, gchp_vs_gchp_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1656,12 +1663,12 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, devmet, gchp_vs_gchp_devstr, + spcdb_files, colname="StateMet", var_prefix='Met', dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1691,11 +1698,11 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) @@ -1725,11 +1732,11 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], varlist=drydepvel_species() ) @@ -1764,9 +1771,9 @@ def run_benchmark_default(config): gchp_vs_gchp_refstr, dev, gchp_vs_gchp_devstr, + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1827,9 +1834,9 @@ def run_benchmark_default(config): deve, config["data"]["dev"]["gchp"]["version"], devperiod, + spcdb_files, overwrite=True, dst=gchp_vs_gchp_tablesdir, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -1899,9 +1906,9 @@ def run_benchmark_default(config): config["data"]["ref"]["gchp"]["version"], dev, config["data"]["dev"]["gchp"]["version"], + spcdb_files, dst=gchp_vs_gchp_tablesdir, overwrite=True, - spcdb_dir=spcdb_dir, ) # ================================================================== @@ -2008,9 +2015,20 @@ def run_benchmark_default(config): if config["options"]["outputs"]["plot_conc"]: print("\n%%% Creating GCHP vs. GCC diff-of-diffs conc plots %%%") + # Species database files (just use the gcc ones) + spcdb_files = get_species_database_files(config, "gcc", "gcc") + # Filepaths - gcc_ref = get_filepath(gcc_vs_gcc_refdir, "SpeciesConc", gcc_ref_date) - gcc_dev = get_filepath(gcc_vs_gcc_devdir, "SpeciesConc", gcc_dev_date) + gcc_ref = get_filepath( + gcc_vs_gcc_refdir, + "SpeciesConc", + gcc_ref_date + ) + gcc_dev = get_filepath( + gcc_vs_gcc_devdir, + "SpeciesConc", + gcc_dev_date + ) gchp_ref = get_filepath( gchp_vs_gchp_refdir, "SpeciesConc", @@ -2031,6 +2049,7 @@ def run_benchmark_default(config): diff_of_diffs_refstr, gchp_ref, diff_of_diffs_devstr, + spcdb_files, dst=diff_of_diffs_resultsdir, weightsdir=config["paths"]["weights_dir"], benchmark_type=config["options"]["bmk_type"], @@ -2039,7 +2058,6 @@ def run_benchmark_default(config): second_ref=gcc_dev, second_dev=gchp_dev, cats_in_ugm3=None, - spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"] ) diff --git a/gcpy/cstools.py b/gcpy/cstools.py index 5bd3650f..cec06ab1 100644 --- a/gcpy/cstools.py +++ b/gcpy/cstools.py @@ -174,7 +174,7 @@ def face_area( xyz_corner = np.zeros((4,3)) for i_vert in range(4): x_lon = i_lon + (i_vert > 1) - x_lat = i_lat + i_vert in (0, 3) + x_lat = i_lat + (i_vert in (0, 3)) lon_corner[i_vert] = lon_b_rad[x_lon,x_lat] lat_corner[i_vert] = lat_b_rad[x_lon,x_lat] for i_vert in range(4): diff --git a/gcpy/plot/compare_single_level.py b/gcpy/plot/compare_single_level.py index 3d80de21..4219f935 100644 --- a/gcpy/plot/compare_single_level.py +++ b/gcpy/plot/compare_single_level.py @@ -20,9 +20,10 @@ from pypdf import PdfReader, PdfWriter from gcpy.grid import get_grid_extents, call_make_grid from gcpy.regrid import regrid_comparison_data, create_regridders -from gcpy.util import reshape_MAPL_CS, get_diff_of_diffs, \ +from gcpy.util import \ + reshape_MAPL_CS, get_diff_of_diffs, get_molwt_from_metadata, \ all_zero_or_nan, slice_by_lev_and_time, compare_varnames, \ - read_config_file, verify_variable_type + read_species_metadata, verify_variable_type from gcpy.units import check_units, data_unit_is_mol_per_mol from gcpy.constants import MW_AIR_g, NO_STRETCH_SG_PARAMS from gcpy.plot.core import gcpy_style, six_panel_subplot_names, \ @@ -64,7 +65,7 @@ def compare_single_level( sigdiff_list=None, second_ref=None, second_dev=None, - spcdb_dir=os.path.dirname(__file__), + spcdb_files=None, ll_plot_func='imshow', **extra_plot_args ): @@ -127,6 +128,10 @@ def compare_single_level( convert_to_ugm3: bool Whether to convert data units to ug/m3 for plotting. Default value: False + spcdb_files: str | list + A single species_database.yml file or a list of files + (e.g. for Ref & Dev). Only used when convert_ugm3=True. + Default value: None flip_ref: bool Set this flag to True to flip the vertical dimension of 3D variables in the Ref dataset. @@ -172,9 +177,6 @@ def compare_single_level( A dataset of the same model type / grid as devdata, to be used in diff-of-diffs plotting. Default value: None - spcdb_dir: str - Directory containing species_database.yml file. - Default value: Path of GCPy code repository ll_plot_func: str Function to use for lat/lon single level plotting with possible values 'imshow' and 'pcolormesh'. imshow is much @@ -230,15 +232,18 @@ def compare_single_level( savepdf = True if pdfname == "": savepdf = False + + # If converting to ug/m3, read species database file(s) to obtain + # molecular weights. if convert_to_ugm3: - properties = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True + if spcdb_files is None: + msg = "You must pass 'spcdb_files' when convert_to_ugm3=True!" + raise ValueError(msg) + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, + quiet= True ) - + # Get stretched grid info, if any. # Parameter order is stretch factor, target longitude, target latitude. # Stretch factor 1 corresponds with no stretch. @@ -425,36 +430,42 @@ def compare_single_level( False ) - # Get a list of properties for the given species + # Get the species molecular weights from Ref & Dev metadata spc_name = varname.replace(varname.split("_")[0] + "_", "") - species_properties = properties.get(spc_name) - - # If no properties are found, then exit with an error. - # Otherwise, get the molecular weight in g/mol. - if species_properties is None: - # Hack lumped species until we implement a solution - if spc_name in ["Simple_SOA", "Complex_SOA"]: - spc_mw_g = 150.0 - else: - msg = f"No properties found for {spc_name}. Cannot convert" \ - + " to ug/m3." - raise ValueError(msg) - else: - spc_mw_g = species_properties.get("MW_g") - if spc_mw_g is None: - msg = f"Molecular weight not found for species {spc_name}!" \ - + " Cannot convert to ug/m3." - raise ValueError(msg) + ref_spc_mw_g = get_molwt_from_metadata(ref_metadata, spc_name) + dev_spc_mw_g = get_molwt_from_metadata(dev_metadata, spc_name) + + # Skip if the species has no molecular weight in + # both Ref & Dev species metadata + if ref_spc_mw_g is None and dev_spc_mw_g is None: + msg = f"Cannot convert {spc_name} to ug/m3! " + msg +="no molecular weight found in Ref & Dev metadata!" + continue + + # If only one of the species has no molecular weight + # print a warning message but allow comparison to proceed + if ref_spc_mw_g is None or dev_spc_mw_g is None: + msg = f"Cannot convert {spc_name} to ug/m3!, " + msg +="no molecular weight was found!" + print(msg) # Convert values from ppb to ug/m3: - # ug/m3 = mol/mol * mol/g air * kg/m3 air * 1e3g/kg + # ug/m3 = 1e-9ppb * mol/g air * kg/m3 air * 1e3g/kg # * g/mol spc * 1e6ug/g # = ppb * air density * (spc MW / air MW) - ds_refs[i].values = ds_refs[i].values * ref_airden.values \ - * (spc_mw_g / MW_AIR_g) - ds_devs[i].values = ds_devs[i].values * dev_airden.values \ - * (spc_mw_g / MW_AIR_g) - + # + # If mol. wt. is missing, then set data to NaN + if ref_spc_mw_g is not None: + ds_refs[i].values *= \ + ref_airden.values * (ref_spc_mw_g / MW_AIR_g) + else: + ds_refs[i].values *= np.nan + if dev_spc_mw_g is not None: + ds_devs[i].values *= \ + dev_airden.values * (dev_spc_mw_g / MW_AIR_g) + else: + ds_devs[i].values *= np.nan + # Update units string ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu ds_devs[i].attrs["units"] = "\u03BCg/m3" diff --git a/gcpy/plot/compare_zonal_mean.py b/gcpy/plot/compare_zonal_mean.py index 6adb651d..c8857f62 100644 --- a/gcpy/plot/compare_zonal_mean.py +++ b/gcpy/plot/compare_zonal_mean.py @@ -20,9 +20,10 @@ pad_pressure_edges, convert_lev_to_pres from gcpy.regrid import regrid_comparison_data, create_regridders, gen_xmat, \ regrid_vertical -from gcpy.util import reshape_MAPL_CS, get_diff_of_diffs, \ +from gcpy.util import \ + get_molwt_from_metadata, reshape_MAPL_CS, get_diff_of_diffs, \ all_zero_or_nan, compare_varnames, \ - read_config_file, verify_variable_type + read_species_metadata, verify_variable_type from gcpy.units import check_units, data_unit_is_mol_per_mol from gcpy.constants import MW_AIR_g, NO_STRETCH_SG_PARAMS from gcpy.plot.core import gcpy_style, six_panel_subplot_names, \ @@ -53,6 +54,7 @@ def compare_zonal_mean( normalize_by_area=False, enforce_units=True, convert_to_ugm3=False, + spcdb_files=None, flip_ref=False, flip_dev=False, use_cmap_RdBu=False, @@ -64,7 +66,6 @@ def compare_zonal_mean( sigdiff_list=None, second_ref=None, second_dev=None, - spcdb_dir=os.path.dirname(__file__), ref_vert_params=None, dev_vert_params=None, **extra_plot_args @@ -130,6 +131,10 @@ def compare_zonal_mean( convert_to_ugm3: str Whether to convert data units to ug/m3 for plotting. Default value: False + spcdb_files: str | list + A single species_database.yml file or a list of files + (e.g. for Ref & Dev). Only used when convert_to_ugm3=True. + Default value: None flip_ref: bool Set this flag to True to flip the vertical dimension of 3D variables in the Ref dataset. @@ -174,9 +179,6 @@ def compare_zonal_mean( A dataset of the same model type / grid as devdata, to be used in diff-of-diffs plotting. Default value: None - spcdb_dir: str - Directory containing species_database.yml file. - Default value: Path of GCPy code repository ref_vert_params: list(AP, BP) of list-like types Hybrid grid parameter A in hPa and B (unitless). Needed if ref grid is not 47 or 72 levels. @@ -243,14 +245,16 @@ def compare_zonal_mean( savepdf = True if pdfname == "": savepdf = False - # If converting to ug/m3, load the species database + + # If converting to ug/m3, read species database file(s) so that + # we can obtain molecular weights. if convert_to_ugm3: - properties = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True + if spcdb_files is None: + msg = "You must pass 'spcdb_files' when convert_to_ugm3=True!" + raise ValueError(msg) + ref_metadata, dev_metadata = read_species_metadata( + spcdb_files, + quiet= True ) # Get mid-point pressure and edge pressures for this grid @@ -453,36 +457,41 @@ def compare_zonal_mean( else: dev_airden = devmet["Met_AIRDEN"].isel(lev=dev_pmid_ind) - # Get a list of properties for the given species + # Get the species molecular weights from Ref & Dev metadata spc_name = varname.replace(varname.split("_")[0] + "_", "") - species_properties = properties.get(spc_name) - - # If no properties are found, then exit with an error. - # Otherwise, get the molecular weight in g/mol. - if species_properties is None: - # Hack lumped species until we implement a solution - if spc_name in ["Simple_SOA", "Complex_SOA"]: - spc_mw_g = 150.0 - else: - msg = f"No properties found for {spc_name}. Cannot convert" \ - + " to ug/m3." - raise ValueError(msg) - else: - # Get the species molecular weight in g/mol - spc_mw_g = species_properties.get("MW_g") - if spc_mw_g is None: - msg = f"Molecular weight not found for for species {spc_name}!" \ - + " Cannot convert to ug/m3." - raise ValueError(msg) + ref_spc_mw_g = get_molwt_from_metadata(ref_metadata, spc_name) + dev_spc_mw_g = get_molwt_from_metadata(dev_metadata, spc_name) + + # Skip if the species has no molecular weight in + # both Ref & Dev species metadata + if ref_spc_mw_g is None and dev_spc_mw_g is None: + msg = f"Cannot convert {spc_name} to ug/m3! " + msg +="no molecular weight found in Ref & Dev metadata!" + continue + + # If only one of the species has no molecular weight + # print a warning message but allow comparison to proceed + if ref_spc_mw_g is None or dev_spc_mw_g is None: + msg = f"Cannot convert {spc_name} to ug/m3!, " + msg +="no molecular weight was found!" + print(msg) # Convert values from ppb to ug/m3: # ug/m3 = 1e-9ppb * mol/g air * kg/m3 air * 1e3g/kg # * g/mol spc * 1e6ug/g # = ppb * air density * (spc MW / air MW) - ds_refs[i].values = ds_refs[i].values * ref_airden.values \ - * (spc_mw_g / MW_AIR_g) - ds_devs[i].values = ds_devs[i].values * dev_airden.values \ - * (spc_mw_g / MW_AIR_g) + # + # If mol. wt. is missing, then set data to NaN + if ref_spc_mw_g is not None: + ds_refs[i].values *= \ + ref_airden.values * (ref_spc_mw_g / MW_AIR_g) + else: + ds_refs[i].values *= np.nan + if dev_spc_mw_g is not None: + ds_devs[i].values *= \ + dev_airden.values * (dev_spc_mw_g / MW_AIR_g) + else: + ds_devs[i].values *= np.nan # Update units string ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu diff --git a/gcpy/util.py b/gcpy/util.py index 551a4166..fae975e2 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -50,6 +50,7 @@ def convert_lon( """ verify_variable_type(data, (xr.DataArray, xr.Dataset)) + roll_len = 0 data_copy = data.copy() lon = data_copy[dim].values @@ -2269,7 +2270,7 @@ def get_element_of_series(series, element): Returns a specified element of a pd.Series object. Args - serie : pd.Series : A pd.Series object + series : pd.Series : A pd.Series object element : int : Element of the pd.Series object to return Returns @@ -2279,3 +2280,63 @@ def get_element_of_series(series, element): verify_variable_type(element, int) return list(series)[element] + + +def read_species_metadata(files, quiet=True): + """ + Reads species metadata from multiple files and returns a dict + containing metadata for the union of species. + + Args + files : str|list : Species database file(s) to read + + Keyword Args + quiet : bool : Quiet (true) or verbose (false) printout + + Returns + ref_spcdb : dict : Species metadata for the Ref model + dev_spcdb : dict : Species metadata for the Dev model + """ + + # 1 file is passed, return the same metadata for Ref & Dev + if isinstance(files, str): + dev_spcdb = read_config_file(files, quiet=quiet) + ref_spcdb = dev_spcdb + return ref_spcdb, dev_spcdb + + # 2 files are passed, return Ref & Dev metadata separately + if isinstance(files, list): + ref_spcdb = read_config_file(files[0], quiet=quiet) + dev_spcdb = read_config_file(files[1], quiet=quiet) + return ref_spcdb, dev_spcdb + + raise ValueError("Argument 'files' must be of type 'str' or 'list'!") + + +def get_molwt_from_metadata(metadata, spc_name): + """ + Extracts molecular weight [g/mol] from a dictionary + containing species metadata. + + Args + metadata : dict : Metadata for GEOS-Chem species + spc_name : str : Name of the desired species + + Returns + spc_mw_g : float|None : Species molecular weight [g/mol] + """ + # Extract the metadata for the given species + species_metadata = metadata.get(spc_name) + + # If no properties are found, check if this is one of the SOA + # species. If so, return 150 g/mol, otherwise exit w/ error. + if species_metadata is None: + if spc_name not in ["Simple_SOA", "Complex_SOA"]: + return None + return 150.0 + + # Otherwise return the mol. wt. as listed in the species metadata + spc_mw_g = species_metadata.get("MW_g") + if spc_mw_g is None: + return None + return spc_mw_g