Removed xr.Dataset.load commands to improve performance

yantosca · yantosca · commit 317de8f673aa · 2026-01-15T11:54:50.000-05:00
append_grid_corners.py
file_regrid.py
plot/compare_single_level.py
plot/compare_zonal_mean.py
plot/compare_zonal_mean.py
benchmark/modules/benchmark_funcs.py
benchmark/modules/benchmark_mass_cons_table.py
benchmark/modules/benchmark_models_vs_obs.py
examples/diagnostics/compare_diags.py
- Removed .load() commands for xarray Dataset objects.  We can let
  xarray decide when to load data into memory.  This should speed up
  data I/O significantly.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca &lt;yantosca@seas.harvard.edu&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Removed
 - Removed `PdfMerger()` from `compare_single_level` and `compare_zonal_mean`, it has been removed in pypdf >= 5.0.0
+- Removed `.load()` statements from xarray Datasets to improve performance
 
 ## [1.6.2] - 2025-06-12
 ### Added
diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -44,7 +44,7 @@ def get_area(
     # Otherwise read the data from the supplied area_path)
     reader = dataset_reader(multi_files=False, verbose=False)
     return get_area_from_dataset(
-        reader(area_path, drop_variables=SKIP_THESE_VARS).load()
+        reader(area_path, drop_variables=SKIP_THESE_VARS)
     )
 
 
@@ -371,11 +371,11 @@ def make_benchmark_mass_conservation_table(
             ref_data = reader(
                 ref_files[t_idx],
                 drop_variables=SKIP_THESE_VARS
-            ).load()
+            )
             dev_data = reader(
                 dev_files[t_idx],
                 drop_variables=SKIP_THESE_VARS
-            ).load()
+            )
             ref_area = get_area(ref_areapath, ref_data)
             dev_area = get_area(dev_areapath, dev_data)
             ref_delta_prs = get_delta_pressure(ref_data)
diff --git a/gcpy/benchmark/modules/benchmark_models_vs_obs.py b/gcpy/benchmark/modules/benchmark_models_vs_obs.py
@@ -208,7 +208,7 @@ def read_model_data(
 
     # Read data and rename SpeciesConc_ to SpeciesConcVV_, if necessary
     # (needed for backwards compatibility with older versions.)
-    dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS).load()
+    dataset = reader(filepaths,drop_variables=SKIP_THESE_VARS)
     dataset = rename_speciesconc_to_speciesconcvv(dataset)
 
     # Create a DataArray object and convert to ppbv (if necessary)
diff --git a/gcpy/examples/diagnostics/compare_diags.py b/gcpy/examples/diagnostics/compare_diags.py
@@ -91,7 +91,7 @@ def read_data(config):
         refdata = reader(
             ref_file,
             drop_variables=SKIP_THESE_VARS
-        ).load()
+        )
     except FileNotFoundError as exc:
         msg = "Error reading " + ref_file
         raise FileNotFoundError(msg) from exc
@@ -101,7 +101,7 @@ def read_data(config):
         devdata = reader(
             dev_file,
             drop_variables=SKIP_THESE_VARS
-        ).load()
+        )
     except FileNotFoundError as exc:
         msg = "Error reading " + dev_file
         raise FileNotFoundError(msg) from exc
diff --git a/gcpy/file_regrid.py b/gcpy/file_regrid.py
@@ -117,7 +117,7 @@ def file_regrid(
         filein,
         decode_cf=False,
         engine="netcdf4"
-    ).load()
+    )
     cs_res_in = get_cubed_sphere_res(dset)
 
     # Verbose printout of inputs
diff --git a/gcpy/plot/compare_single_level.py b/gcpy/plot/compare_single_level.py
@@ -202,9 +202,6 @@ def compare_single_level(
 
     # Prepare diff-of-diffs datasets if needed
     if diff_of_diffs:
-        refdata, devdata = refdata.load(), devdata.load()
-        second_ref, second_dev = second_ref.load(), second_dev.load()
-
 #        # If needed, use fake time dim in case dates are different
 #        # in datasets.  This needs more work for case of single versus
 #        # multiple times.
diff --git a/gcpy/plot/compare_zonal_mean.py b/gcpy/plot/compare_zonal_mean.py
@@ -209,9 +209,6 @@ def compare_zonal_mean(
 
     # Prepare diff-of-diffs datasets if needed
     if diff_of_diffs:
-        refdata, devdata = refdata.load(), devdata.load()
-        second_ref, second_dev = second_ref.load(), second_dev.load()
-
 #        # If needed, use fake time dim in case dates are different in datasets.
 #        # This needs more work for case of single versus multiple times.
 #        aligned_time = np.datetime64('2000-01-01')