Skip to content

Commit 6876f06

Browse files
committed
Removed xr.Dataset.load commands to improve performance
append_grid_corners.py file_regrid.py plot/compare_single_level.py plot/compare_zonal_mean.py benchmark/modules/benchmark_funcs.py benchmark/modules/benchmark_mass_cons_table.py benchmark/modules/benchmark_models_vs_obs.py examples/diagnostics/compare_diags.py - Removed .load() commands for xarray Dataset objects. We can let xarray decide when to load data into memory. This should speed up data I/O significantly. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
1 parent 4fea48b commit 6876f06

File tree

9 files changed

+17
-21
lines changed

9 files changed

+17
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4949

5050
### Removed
5151
- Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0
52+
- Removed `.load()` statements from xarray Datasets to improve performance
5253

5354
## [1.6.2] - 2025-06-12
5455
### Added

gcpy/append_grid_corners.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
})
4747

4848
# Write to netCDF
49-
ds.load()
5049
ds.close()
5150
ds.to_netcdf(args.filein)
5251

gcpy/benchmark/modules/benchmark_funcs.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,8 @@ def make_benchmark_conc_plots(
10541054
reader = util.dataset_reader(time_mean, verbose=verbose)
10551055

10561056
# Open datasets
1057-
refds = reader(ref, drop_variables=SKIP_THESE_VARS).load()
1058-
devds = reader(dev, drop_variables=SKIP_THESE_VARS).load()
1057+
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
1058+
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
10591059

10601060
# Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
10611061
# naming introduced in GEOS-Chem 14.1.0
@@ -1080,9 +1080,9 @@ def make_benchmark_conc_plots(
10801080
refmetds = None
10811081
devmetds = None
10821082
if refmet:
1083-
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS).load()
1083+
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS)
10841084
if devmet:
1085-
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS).load()
1085+
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS)
10861086

10871087
# Determine if doing diff-of-diffs
10881088
diff_of_diffs = False
@@ -1093,8 +1093,8 @@ def make_benchmark_conc_plots(
10931093
# Open second datasets if passed as arguments (used for diff of diffs)
10941094
# Regrid to same horz grid resolution if two refs or two devs do not match.
10951095
if diff_of_diffs:
1096-
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS).load()
1097-
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS).load()
1096+
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS)
1097+
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS)
10981098

10991099
print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n')
11001100
print(second_refds)
@@ -5695,7 +5695,7 @@ def create_benchmark_summary_table(
56955695
is_gchp=ref_gchp
56965696
),
56975697
drop_variables=skip_vars
5698-
).load()
5698+
)
56995699

57005700
# Get Dev data
57015701
devdata = reader(
@@ -5706,7 +5706,7 @@ def create_benchmark_summary_table(
57065706
is_gchp=dev_gchp
57075707
),
57085708
drop_variables=skip_vars
5709-
).load()
5709+
)
57105710

57115711
# Make sure that Ref and Dev datasets have the same variables.
57125712
# Variables that are in Ref but not in Dev will be added to Dev
@@ -5962,7 +5962,7 @@ def create_benchmark_sanity_check_table(
59625962
dset = reader(
59635963
file_name,
59645964
drop_variables=skip_vars
5965-
).load()
5965+
)
59665966

59675967
# Determine which variables are all zeroes or NaN
59685968
all_zeros_or_nans = []

gcpy/benchmark/modules/benchmark_mass_cons_table.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_area(
4444
# Otherwise read the data from the supplied area_path)
4545
reader = dataset_reader(multi_files=False, verbose=False)
4646
return get_area_from_dataset(
47-
reader(area_path, drop_variables=SKIP_THESE_VARS).load()
47+
reader(area_path, drop_variables=SKIP_THESE_VARS)
4848
)
4949

5050

@@ -371,11 +371,11 @@ def make_benchmark_mass_conservation_table(
371371
ref_data = reader(
372372
ref_files[t_idx],
373373
drop_variables=SKIP_THESE_VARS
374-
).load()
374+
)
375375
dev_data = reader(
376376
dev_files[t_idx],
377377
drop_variables=SKIP_THESE_VARS
378-
).load()
378+
)
379379
ref_area = get_area(ref_areapath, ref_data)
380380
dev_area = get_area(dev_areapath, dev_data)
381381
ref_delta_prs = get_delta_pressure(ref_data)

gcpy/benchmark/modules/benchmark_models_vs_obs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def read_model_data(
208208

209209
# Read data and rename SpeciesConc_ to SpeciesConcVV_, if necessary
210210
# (needed for backwards compatibility with older versions.)
211-
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS).load()
211+
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS)
212212
dataset = rename_speciesconc_to_speciesconcvv(dataset)
213213

214214
# Create a DataArray object and convert to ppbv (if necessary)

gcpy/examples/diagnostics/compare_diags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def read_data(config):
9191
refdata = reader(
9292
ref_file,
9393
drop_variables=SKIP_THESE_VARS
94-
).load()
94+
)
9595
except FileNotFoundError as exc:
9696
msg = "Error reading " + ref_file
9797
raise FileNotFoundError(msg) from exc
@@ -101,7 +101,7 @@ def read_data(config):
101101
devdata = reader(
102102
dev_file,
103103
drop_variables=SKIP_THESE_VARS
104-
).load()
104+
)
105105
except FileNotFoundError as exc:
106106
msg = "Error reading " + dev_file
107107
raise FileNotFoundError(msg) from exc

gcpy/file_regrid.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def file_regrid(
117117
filein,
118118
decode_cf=False,
119119
engine="netcdf4"
120-
).load()
120+
)
121121
cs_res_in = get_cubed_sphere_res(dset)
122122

123123
# Verbose printout of inputs

gcpy/plot/compare_single_level.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,6 @@ def compare_single_level(
202202

203203
# Prepare diff-of-diffs datasets if needed
204204
if diff_of_diffs:
205-
refdata, devdata = refdata.load(), devdata.load()
206-
second_ref, second_dev = second_ref.load(), second_dev.load()
207205

208206
# # If needed, use fake time dim in case dates are different
209207
# # in datasets. This needs more work for case of single versus

gcpy/plot/compare_zonal_mean.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,6 @@ def compare_zonal_mean(
209209

210210
# Prepare diff-of-diffs datasets if needed
211211
if diff_of_diffs:
212-
refdata, devdata = refdata.load(), devdata.load()
213-
second_ref, second_dev = second_ref.load(), second_dev.load()
214212

215213
# # If needed, use fake time dim in case dates are different in datasets.
216214
# # This needs more work for case of single versus multiple times.

0 commit comments

Comments
 (0)