Skip to content

Commit 317de8f

Browse files
committed
Removed xr.Dataset.load commands to improve performance
append_grid_corners.py file_regrid.py plot/compare_single_level.py plot/compare_zonal_mean.py plot/compare_zonal_mean.py benchmark/modules/benchmark_funcs.py benchmark/modules/benchmark_mass_cons_table.py benchmark/modules/benchmark_models_vs_obs.py examples/diagnostics/compare_diags.py - Removed .load() commands for xarray Dataset objects. We can let xarray decide when to load data into memory. This should speed up data I/O significantly. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
1 parent 600ee3b commit 317de8f

File tree

7 files changed

+8
-13
lines changed

7 files changed

+8
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5050

5151
### Removed
5252
- Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0
53+
- Removed `.load()` statements from xarray Datasets to improve performance
5354

5455
## [1.6.2] - 2025-06-12
5556
### Added

gcpy/benchmark/modules/benchmark_mass_cons_table.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_area(
4444
# Otherwise read the data from the supplied area_path)
4545
reader = dataset_reader(multi_files=False, verbose=False)
4646
return get_area_from_dataset(
47-
reader(area_path, drop_variables=SKIP_THESE_VARS).load()
47+
reader(area_path, drop_variables=SKIP_THESE_VARS)
4848
)
4949

5050

@@ -371,11 +371,11 @@ def make_benchmark_mass_conservation_table(
371371
ref_data = reader(
372372
ref_files[t_idx],
373373
drop_variables=SKIP_THESE_VARS
374-
).load()
374+
)
375375
dev_data = reader(
376376
dev_files[t_idx],
377377
drop_variables=SKIP_THESE_VARS
378-
).load()
378+
)
379379
ref_area = get_area(ref_areapath, ref_data)
380380
dev_area = get_area(dev_areapath, dev_data)
381381
ref_delta_prs = get_delta_pressure(ref_data)

gcpy/benchmark/modules/benchmark_models_vs_obs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def read_model_data(
208208

209209
# Read data and rename SpeciesConc_ to SpeciesConcVV_, if necessary
210210
# (needed for backwards compatibility with older versions.)
211-
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS).load()
211+
dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS)
212212
dataset = rename_speciesconc_to_speciesconcvv(dataset)
213213

214214
# Create a DataArray object and convert to ppbv (if necessary)

gcpy/examples/diagnostics/compare_diags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def read_data(config):
9191
refdata = reader(
9292
ref_file,
9393
drop_variables=SKIP_THESE_VARS
94-
).load()
94+
)
9595
except FileNotFoundError as exc:
9696
msg = "Error reading " + ref_file
9797
raise FileNotFoundError(msg) from exc
@@ -101,7 +101,7 @@ def read_data(config):
101101
devdata = reader(
102102
dev_file,
103103
drop_variables=SKIP_THESE_VARS
104-
).load()
104+
)
105105
except FileNotFoundError as exc:
106106
msg = "Error reading " + dev_file
107107
raise FileNotFoundError(msg) from exc

gcpy/file_regrid.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def file_regrid(
117117
filein,
118118
decode_cf=False,
119119
engine="netcdf4"
120-
).load()
120+
)
121121
cs_res_in = get_cubed_sphere_res(dset)
122122

123123
# Verbose printout of inputs

gcpy/plot/compare_single_level.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,6 @@ def compare_single_level(
202202

203203
# Prepare diff-of-diffs datasets if needed
204204
if diff_of_diffs:
205-
refdata, devdata = refdata.load(), devdata.load()
206-
second_ref, second_dev = second_ref.load(), second_dev.load()
207-
208205
# # If needed, use fake time dim in case dates are different
209206
# # in datasets. This needs more work for case of single versus
210207
# # multiple times.

gcpy/plot/compare_zonal_mean.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,6 @@ def compare_zonal_mean(
209209

210210
# Prepare diff-of-diffs datasets if needed
211211
if diff_of_diffs:
212-
refdata, devdata = refdata.load(), devdata.load()
213-
second_ref, second_dev = second_ref.load(), second_dev.load()
214-
215212
# # If needed, use fake time dim in case dates are different in datasets.
216213
# # This needs more work for case of single versus multiple times.
217214
# aligned_time = np.datetime64('2000-01-01')

0 commit comments

Comments
 (0)