Skip to content

Commit 1350ff9

Browse files
committed
Merge PR #397 (Fix errors in annual mean diff-of-diffs plots)
This merge brings PR #397 (Fix errors in annual mean GCHP vs GC-Classic "diff-of-diffs" plots, by @yantosca) into the GCPy 1.7.0 development stream. In PR #397 We have done the following: 1. Updated functions compare_single_level and compare_zonal_mean so as to not take the time mean of data if we are generating diff-of-diff plots. 2. Simplified function get_diff_of_diffs (in gcpy/util.py) so that a single algorithm is used for both GC-Classic and GCHP. 3. Now take the time mean of absolute and fractional differences in get_diff_of_diffs 4. Added cosmetic changes (comments) Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
2 parents fb063ef + 86c23f4 commit 1350ff9

File tree

5 files changed

+95
-85
lines changed

5 files changed

+95
-85
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5252
- Fixed the restart regridding for stretched GCHP when target lat/lon is exactly 0.0 in `gcpy/regrid_restart_file.py`
5353
- Fixed computation of the global AOD benchmark table caused by hardwired species names
5454
- Fixed error in `create_benchmark_emissions_table` where all species were assumed to be in Ref and Dev even if they were not
55+
- Fixed error that caused the GCHP vs GCC diff-of-diffs AnnualMean plots to be computed as a difference of means instead of a mean of differences
5556

5657
### Removed
5758
- Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0
5859
- Removed `.load()` statements from xarray Datasets to improve performance
5960
- Removed `paths:spcdb_dir` YAML tag in benchmark configuration files
6061
- Removed `st_Ox` from `benchmark_categories.yml`; this species is no longer used in TransportTracers simulations
62+
- Removed special data handling for files generated with MAPL versions prior to 1.0.0 in function `get_diff_of_diffs` (located in `gcpy/util.py`)
6163

6264
## [1.6.2] - 2025-06-12
6365
### Added

gcpy/benchmark/modules/benchmark_funcs.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,21 +1060,33 @@ def make_benchmark_conc_plots(
10601060
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS)
10611061

10621062
# Determine if doing diff-of-diffs
1063-
diff_of_diffs = False
1064-
if second_ref is not None and second_dev is not None:
1065-
diff_of_diffs = True
1063+
diff_of_diffs = second_ref is not None and second_dev is not None
10661064

1067-
1068-
# Open second datasets if passed as arguments (used for diff of diffs)
1069-
# Regrid to same horz grid resolution if two refs or two devs do not match.
10701065
if diff_of_diffs:
1066+
1067+
# --------------------------------------------------------------
1068+
# %%%%% We are plotting diff-of-diffs %%%%%
1069+
#
1070+
# Open the second Ref and Dev datasets, if they have been
1071+
# passed as keyword arguments, as these are needed for the
1072+
# diff-of-diffs plots. Regrid to the same # horizontal grid
1073+
# resolution if the two Refs or two Devs are not on the same
1074+
# grid.
1075+
#
1076+
# Also, do not take the time mean (e.g. Annual Mean) of the
1077+
# datasets, as this will compute the the difference of means.
1078+
# Instead, we need to compute the mean of differences. This
1079+
# will be done in the plotting functions compare_single_level
1080+
# and compare_zonal_mean.
1081+
# --------------------------------------------------------------
10711082
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS)
10721083
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS)
1073-
1074-
print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n')
1075-
print(second_refds)
1076-
print('\nPrinting second_devds (dev of dev for diff-of-diffs)\n')
1077-
print(second_devds)
1084+
1085+
if verbose:
1086+
print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n')
1087+
print(second_refds)
1088+
print('\nPrinting second_devds (dev of dev for diff-of-diffs)\n')
1089+
print(second_devds)
10781090

10791091
# Only regrid if ref grid resolutions differ.
10801092
# Assume only may differ if both cubed-sphere.
@@ -1088,6 +1100,7 @@ def make_benchmark_conc_plots(
10881100
if "Xdim" in devds.dims and "Xdim" in second_devds.dims:
10891101
if devds['Xdim'].size != second_devds['Xdim'].size:
10901102
regrid_dev = True
1103+
10911104
if regrid_ref or regrid_dev:
10921105
# Assume regridding C24 to C48 to compute the difference at C48
10931106
regridfile=os.path.join(weightsdir,'regrid_weights_c24_to_c48.nc')
@@ -1118,17 +1131,20 @@ def make_benchmark_conc_plots(
11181131
print('\nRegrid complete\n')
11191132

11201133
else:
1134+
1135+
# --------------------------------------------------------------
1136+
# %%%%% We are not plotting diff-of-diffs %%%%%
1137+
#
1138+
# We do not need the second Ref and Dev data.
1139+
# We can also compute the time mean (e.g. Annual Mean) here.
1140+
# --------------------------------------------------------------
11211141
second_refds = None
11221142
second_devds = None
1123-
1124-
# Compute the annual mean of datasets (if necessary)
1125-
if time_mean:
1126-
refds = dataset_mean(refds)
1127-
devds = dataset_mean(devds)
1128-
refmetds = dataset_mean(refmetds)
1129-
devmetds = dataset_mean(devmetds)
1130-
second_ref = dataset_mean(second_refds)
1131-
second_dev = dataset_mean(second_devds)
1143+
if time_mean:
1144+
refds = dataset_mean(refds)
1145+
devds = dataset_mean(devds)
1146+
refmetds = dataset_mean(refmetds)
1147+
devmetds = dataset_mean(devmetds)
11321148

11331149
# Create regridding files if necessary while not in parallel loop
11341150
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]

gcpy/plot/compare_single_level.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,17 +205,18 @@ def compare_single_level(
205205
# Prepare diff-of-diffs datasets if needed
206206
if diff_of_diffs:
207207

208-
# # If needed, use fake time dim in case dates are different
209-
# # in datasets. This needs more work for case of single versus
210-
# # multiple times.
211-
# aligned_time = [np.datetime64('2000-01-01')] * refdata.dims['time']
212-
# refdata = refdata.assign_coords({'time': aligned_time})
213-
# devdata = devdata.assign_coords({'time': aligned_time})
214-
# second_ref = second_ref.assign_coords({'time': aligned_time})
215-
# second_dev = second_dev.assign_coords({'time': aligned_time})
208+
## If needed, use fake time dim in case dates are different
209+
## in datasets. This needs more work for case of single versus
210+
## multiple times.
211+
#aligned_time = [np.datetime64('2000-01-01')] * refdata.dims['time']
212+
#refdata = refdata.assign_coords({'time': aligned_time})
213+
#devdata = devdata.assign_coords({'time': aligned_time})
214+
#second_ref = second_ref.assign_coords({'time': aligned_time})
215+
#second_dev = second_dev.assign_coords({'time': aligned_time})
216216

217217
refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref)
218218
devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev)
219+
219220
frac_refstr = 'GCC_dev / GCC_ref'
220221
frac_devstr = 'GCHP_dev / GCHP_ref'
221222
# If no varlist is passed, plot all (surface only for 3D)

gcpy/plot/compare_zonal_mean.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,13 @@ def compare_zonal_mean(
212212
# Prepare diff-of-diffs datasets if needed
213213
if diff_of_diffs:
214214

215-
# # If needed, use fake time dim in case dates are different in datasets.
216-
# # This needs more work for case of single versus multiple times.
217-
# aligned_time = np.datetime64('2000-01-01')
218-
# refdata = refdata.assign_coords({'time' : [aligned_time]})
219-
# devdata = devdata.assign_coords({'time' : [aligned_time]})
220-
# second_ref = second_ref.assign_coords({'time' : [aligned_time]})
221-
# second_dev = second_dev.assign_coords({'time' : [aligned_time]})
215+
## If needed, use fake time dim in case dates are different in datasets.
216+
## This needs more work for case of single versus multiple times.
217+
#aligned_time = np.datetime64('2000-01-01')
218+
#refdata = refdata.assign_coords({'time' : [aligned_time]})
219+
#devdata = devdata.assign_coords({'time' : [aligned_time]})
220+
#second_ref = second_ref.assign_coords({'time' : [aligned_time]})
221+
#second_dev = second_dev.assign_coords({'time' : [aligned_time]})
222222

223223
refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref)
224224
devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev)

gcpy/util.py

Lines changed: 41 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -634,70 +634,61 @@ def get_diff_of_diffs(
634634
dev
635635
):
636636
"""
637-
Generate datasets containing differences between two datasets
637+
Generate datasets containing differences between two datasets.
638638
639639
Args:
640-
ref: xarray Dataset
641-
The "Reference" (aka "Ref") dataset.
642-
dev: xarray Dataset
643-
The "Development" (aka "Dev") dataset
640+
ref : xr.Dataset : The Ref (aka "Reference") dataset
641+
dev : xr.Dataset : The Dev" (aka "Development") dataset
644642
645-
Returns:
646-
absdiffs: xarray Dataset
647-
Dataset containing dev-ref values
648-
fracdiffs: xarray Dataset
649-
Dataset containing dev/ref values
650-
"""
643+
Returns
644+
absdiffs : xr.Dataset : Absolute differences (Dev - Ref)
645+
fracdiffs : xr.Dataset : Fractional differences (Dev / Ref)
646+
"""
647+
# ------------------------------------------------------------------
648+
# Throw an error unless Ref and Dev are on the same grid
649+
# ------------------------------------------------------------------
650+
if not ref.sizes == dev.sizes:
651+
msg = "Diff-of-diffs plot supports only identical grid types "
652+
msg += "(lat/lon or cubed-sphere) within each Ref & Dev pair!"
653+
raise ValueError(msg)
651654

652-
# get diff of diffs datasets for 2 datasets
653-
# limit each pair to be the same type of output (GEOS-Chem Classic or GCHP)
654-
# and same resolution / extent
655+
# ------------------------------------------------------------------
656+
# Include only the common fields in Ref and Dev
657+
# ------------------------------------------------------------------
655658
vardict = compare_varnames(ref, dev, quiet=True)
656659
varlist = vardict["commonvars"]
657-
# Select only common fields between the Ref and Dev datasets
658660
ref = ref[varlist]
659661
dev = dev[varlist]
660-
if 'nf' not in ref.dims and 'nf' not in dev.dims:
662+
663+
# ------------------------------------------------------------------
664+
# Compute absolute (Dev-Ref) and fractional (Dev/Ref) differences
665+
# ------------------------------------------------------------------
666+
with xr.set_options(keep_attrs=True):
667+
661668
# if the coords do not align then set time dimensions equal
662669
try:
663670
xr.align(dev, ref, join='exact')
664671
except BaseException:
665672
ref.coords["time"] = dev.coords["time"]
666-
with xr.set_options(keep_attrs=True):
667-
absdiffs = dev - ref
668-
fracdiffs = dev / ref
669-
for var in dev.data_vars.keys():
670-
# Ensure the diffs Dataset includes attributes
671-
absdiffs[var].attrs = dev[var].attrs
672-
fracdiffs[var].attrs = dev[var].attrs
673-
elif 'nf' in ref.dims and 'nf' in dev.dims:
674-
675-
# Include special handling if cubed sphere grid dimension names are different
676-
# since they changed in MAPL v1.0.0.
677-
if "lat" in ref.dims and "Xdim" in dev.dims:
678-
ref_newdimnames = dev.copy()
679-
for var in dev.data_vars.keys():
680-
if "Xdim" in dev[var].dims:
681-
ref_newdimnames[var].values = ref[var].values.reshape(
682-
dev[var].values.shape)
683-
# NOTE: the reverse conversion is gchp_dev[v].stack(lat=("nf","Ydim")).transpose(
684-
# "time","lev","lat","Xdim").values
685673

686-
with xr.set_options(keep_attrs=True):
687-
absdiffs = dev.copy()
688-
fracdiffs = dev.copy()
689-
for var in dev.data_vars.keys():
690-
if "Xdim" in dev[var].dims or "lat" in dev[var].dims:
691-
absdiffs[var].values = dev[var].values - ref[var].values
692-
fracdiffs[var].values = dev[var].values / ref[var].values
693-
# NOTE: The diffs Datasets are created without variable
694-
# attributes; we have to reattach them
695-
absdiffs[var].attrs = dev[var].attrs
696-
fracdiffs[var].attrs = dev[var].attrs
697-
else:
698-
print('Diff-of-diffs plot supports only identical grid types (lat/lon or cubed-sphere)' + \
699-
' within each dataset pair')
700-
raise ValueError
674+
# Compute absolute and fractional differences
675+
# Also make sure to preserve variable attributes
676+
absdiffs = dev.copy()
677+
fracdiffs = dev.copy()
678+
for var in varlist:
679+
absdiffs[var].values = dev[var].values - ref[var].values
680+
fracdiffs[var].values = dev[var].values / ref[var].values
681+
absdiffs[var].attrs = dev[var].attrs
682+
fracdiffs[var].attrs = dev[var].attrs
683+
684+
# If there are more than one time point, then take the
685+
# time mean (e.g. Annual Mean) of differences
686+
if "time" in absdiffs.coords:
687+
if len(absdiffs.coords["time"]) > 1:
688+
absdiffs = dataset_mean(absdiffs)
689+
if "time" in fracdiffs.coords:
690+
if len(fracdiffs.coords["time"]) > 1:
691+
fracdiffs = dataset_mean(fracdiffs)
701692

702693
return absdiffs, fracdiffs
703694

0 commit comments

Comments
 (0)