Skip to content

Commit e337595

Browse files
committed
Merge PR #395 (Remove xr.Dataset.load() commands to improve performance)
This merge brings PR #395 (Remove xr.Dataset.load() commands to improve performance, by @yantosca) into the GCPy 1.7.0 development stream. PR #395 removes the .load() commands that are appended to the end of xarray Dataset objects. This will load the entire Dataset into memory but is not necessary, as xarray can handle the memory management of large files "under the hood". Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
2 parents 4fea48b + 6876f06 commit e337595

File tree

9 files changed

+17
-21
lines changed

9 files changed

+17
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4949

5050
### Removed
5151
- Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0
52+
- Removed `.load()` statements from xarray Datasets to improve performance
5253

5354
## [1.6.2] - 2025-06-12
5455
### Added

gcpy/append_grid_corners.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
})
4747

4848
# Write to netCDF
49-
ds.load()
5049
ds.close()
5150
ds.to_netcdf(args.filein)
5251

gcpy/benchmark/modules/benchmark_funcs.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,8 @@ def make_benchmark_conc_plots(
10541054
reader = util.dataset_reader(time_mean, verbose=verbose)
10551055

10561056
# Open datasets
1057-
refds = reader(ref, drop_variables=SKIP_THESE_VARS).load()
1058-
devds = reader(dev, drop_variables=SKIP_THESE_VARS).load()
1057+
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
1058+
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
10591059

10601060
# Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
10611061
# naming introduced in GEOS-Chem 14.1.0
@@ -1080,9 +1080,9 @@ def make_benchmark_conc_plots(
10801080
refmetds = None
10811081
devmetds = None
10821082
if refmet:
1083-
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS).load()
1083+
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS)
10841084
if devmet:
1085-
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS).load()
1085+
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS)
10861086

10871087
# Determine if doing diff-of-diffs
10881088
diff_of_diffs = False
@@ -1093,8 +1093,8 @@ def make_benchmark_conc_plots(
10931093
# Open second datasets if passed as arguments (used for diff of diffs)
10941094
# Regrid to same horz grid resolution if two refs or two devs do not match.
10951095
if diff_of_diffs:
1096-
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS).load()
1097-
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS).load()
1096+
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS)
1097+
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS)
10981098

10991099
print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n')
11001100
print(second_refds)
@@ -5695,7 +5695,7 @@ def create_benchmark_summary_table(
56955695
is_gchp=ref_gchp
56965696
),
56975697
drop_variables=skip_vars
5698-
).load()
5698+
)
56995699

57005700
# Get Dev data
57015701
devdata = reader(
@@ -5706,7 +5706,7 @@ def create_benchmark_summary_table(
57065706
is_gchp=dev_gchp
57075707
),
57085708
drop_variables=skip_vars
5709-
).load()
5709+
)
57105710

57115711
# Make sure that Ref and Dev datasets have the same variables.
57125712
# Variables that are in Ref but not in Dev will be added to Dev
@@ -5962,7 +5962,7 @@ def create_benchmark_sanity_check_table(
59625962
dset = reader(
59635963
file_name,
59645964
drop_variables=skip_vars
5965-
).load()
5965+
)
59665966

59675967
# Determine which variables are all zeroes or NaN
59685968
all_zeros_or_nans = []

gcpy/benchmark/modules/benchmark_mass_cons_table.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_area(
4444
# Otherwise read the data from the supplied area_path)
4545
reader = dataset_reader(multi_files=False, verbose=False)
4646
return get_area_from_dataset(
47-
reader(area_path, drop_variables=SKIP_THESE_VARS).load()
47+
reader(area_path, drop_variables=SKIP_THESE_VARS)
4848
)
4949

5050

@@ -371,11 +371,11 @@ def make_benchmark_mass_conservation_table(
371371
ref_data = reader(
372372
ref_files[t_idx],
373373
drop_variables=SKIP_THESE_VARS
374-
).load()
374+
)
375375
dev_data = reader(
376376
dev_files[t_idx],
377377
drop_variables=SKIP_THESE_VARS
378-
).load()
378+
)
379379
ref_area = get_area(ref_areapath, ref_data)
380380
dev_area = get_area(dev_areapath, dev_data)
381381
ref_delta_prs = get_delta_pressure(ref_data)

gcpy/benchmark/modules/benchmark_models_vs_obs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def read_model_data(
208208

209209
# Read data and rename SpeciesConc_ to SpeciesConcVV_, if necessary
210210
# (needed for backwards compatibility with older versions.)
211-
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS).load()
211+
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS)
212212
dataset = rename_speciesconc_to_speciesconcvv(dataset)
213213

214214
# Create a DataArray object and convert to ppbv (if necessary)

gcpy/examples/diagnostics/compare_diags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def read_data(config):
9191
refdata = reader(
9292
ref_file,
9393
drop_variables=SKIP_THESE_VARS
94-
).load()
94+
)
9595
except FileNotFoundError as exc:
9696
msg = "Error reading " + ref_file
9797
raise FileNotFoundError(msg) from exc
@@ -101,7 +101,7 @@ def read_data(config):
101101
devdata = reader(
102102
dev_file,
103103
drop_variables=SKIP_THESE_VARS
104-
).load()
104+
)
105105
except FileNotFoundError as exc:
106106
msg = "Error reading " + dev_file
107107
raise FileNotFoundError(msg) from exc

gcpy/file_regrid.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def file_regrid(
117117
filein,
118118
decode_cf=False,
119119
engine="netcdf4"
120-
).load()
120+
)
121121
cs_res_in = get_cubed_sphere_res(dset)
122122

123123
# Verbose printout of inputs

gcpy/plot/compare_single_level.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,6 @@ def compare_single_level(
202202

203203
# Prepare diff-of-diffs datasets if needed
204204
if diff_of_diffs:
205-
refdata, devdata = refdata.load(), devdata.load()
206-
second_ref, second_dev = second_ref.load(), second_dev.load()
207205

208206
# # If needed, use fake time dim in case dates are different
209207
# # in datasets. This needs more work for case of single versus

gcpy/plot/compare_zonal_mean.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,6 @@ def compare_zonal_mean(
209209

210210
# Prepare diff-of-diffs datasets if needed
211211
if diff_of_diffs:
212-
refdata, devdata = refdata.load(), devdata.load()
213-
second_ref, second_dev = second_ref.load(), second_dev.load()
214212

215213
# # If needed, use fake time dim in case dates are different in datasets.
216214
# # This needs more work for case of single versus multiple times.

0 commit comments

Comments
 (0)