Make parsing of .amr files faster (#194)

wtbarnes · web-flow · commit 51f0464b69e6 · 2025-09-24T11:36:09.000-04:00
* faster amr file parsing

* faster indexing of strand

* more tests, better coverage

* fix read_trm_file docstring

* update codecov badge

* clarify comments
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![pydrad CI status](https://github.com/rice-solar-physics/pydrad/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/rice-solar-physics/pydrad/actions)
 [![Documentation Status](https://readthedocs.org/projects/pydrad/badge/?version=latest)](https://pydrad.readthedocs.io/en/latest/?badge=latest)
-[![codecov](https://codecov.io/gh/rice-solar-physics/pydrad/branch/master/graph/badge.svg)](https://codecov.io/gh/rice-solar-physics/pydrad)
+[![codecov](https://codecov.io/gh/rice-solar-physics/pydrad/graph/badge.svg?token=GZOGGHF2B0)](https://codecov.io/gh/rice-solar-physics/pydrad)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8411058.svg)](https://doi.org/10.5281/zenodo.8411058)
 
 Some Python tools to configure and parse output from the HYDrodynamics and RADiation (HYDRAD) code for field-aligned coronal loop physics.
diff --git a/conftest.py b/conftest.py
@@ -43,7 +43,7 @@ def get_configuration_dict():
             'write_file_physical': True,
             'write_file_timescales': True,
             'loop_length':  90.*u.Mm,
-            'total_time':  2.*u.s,
+            'total_time':  5.*u.s,
         },
         'grid': {
             'adapt': True,
diff --git a/pydrad/configure/tests/test_templates.py b/pydrad/configure/tests/test_templates.py
@@ -170,7 +170,7 @@ def test_heating_header(configuration):
 def test_hydrad_config(configuration):
     config = f"""Initial_Conditions/profiles/initial.amr
 Initial_Conditions/profiles/initial.amr.gravity
-2.0
+5.0
 1.0
 
 Configuration file generated by pydrad on {configuration.date}"""
@@ -180,7 +180,7 @@ def test_hydrad_config(configuration):
     config = f"""Initial_Conditions/profiles/initial.amr
 poly_fit.gravity
 poly_fit.magnetic_field
-2.0
+5.0
 1.0
 
 Configuration file generated by pydrad on {configuration.date}"""
@@ -189,7 +189,7 @@ def test_hydrad_config(configuration):
     config = f"""Results/profile10.amr
 poly_fit.gravity
 poly_fit.magnetic_field
-2.0
+5.0
 1.0
 
 Configuration file generated by pydrad on {configuration.date}"""
diff --git a/pydrad/parse/parse.py b/pydrad/parse/parse.py
@@ -83,9 +83,24 @@ def __getitem__(self, index):
                           master_time=self._master_time,
                           **self._profile_kwargs)
         else:
+            _index = None
+            # This is a nested conditional because cannot compare two arrays of unequal shape
+            if self.time.shape == self._master_time.shape:
+                if (self.time==self._master_time).all():
+                    # NOTE: This is a shortcut that allows for much faster indexing if you are not taking a slice
+                    # from a slice. For example, index only will map to the correct index in the full-resolution
+                    # time array if you are slicing from the original time array ("master_time"). Otherwise, this
+                    # index no longer corresponds and you have to compute it from the master time and the time at
+                    # this slice. For example, in strand[10:15][0], the 0-index corresponds to 10 in the original
+                    # indexing. As such, the index has to be recomputed which is slower. Whereas in strand[10],
+                    # 10 corresponds to 10 in the original indexing and can just be passed straight through to
+                    # the underlying profile.
+                    log.debug('Using explicit index to slice Strand')
+                    _index = index
             return Profile(self.hydrad_root,
                            self.time[index],
                            master_time=self._master_time,
+                           index=_index,
                            **self._profile_kwargs)
 
     def to_hdf5(self, filename, *variables):
@@ -257,10 +272,18 @@ def __init__(self, hydrad_root, time: u.s, **kwargs):
         if time.shape:
             raise ValueError('time must be a scalar')
         self.time = time
-        self._master_time = kwargs.get('master_time')
-        if self._master_time is None:
-            self._master_time = read_master_time(self.hydrad_root,
-                                                read_from_cfg=kwargs.get('read_from_cfg', False))
+        master_time = kwargs.get('master_time')
+        # NOTE: You should only be passing in the index explicitly if you are slicing from the original time array
+        if (index:=kwargs.get('index')) is None:
+            log.debug('Profile index is None. Calculating index from master time')
+            if master_time is None:
+                read_from_cfg = kwargs.get('read_from_cfg', False)
+                log.debug(f"Reading master time from {'cfg' if read_from_cfg else 'amr'} files.")
+                master_time = read_master_time(self.hydrad_root, read_from_cfg=read_from_cfg)
+            self._index = np.where(self.time == master_time)[0][0]
+        else:
+            log.debug(f'Using explicit index {index} while indexing Profile.')
+            self._index = index
         # Read results files
         self._read_amr()
         if kwargs.get('read_phy', True):
@@ -298,10 +321,6 @@ def _hstate_filename(self):
     def _scl_filename(self):
         return self.hydrad_root / 'Results' / f'profile{self._index:d}.scl'
 
-    @property
-    def _index(self):
-        return np.where(self.time == self._master_time)[0][0]
-
     def __repr__(self):
         return f"""HYDRAD Timestep Profile
 -----------------------
diff --git a/pydrad/parse/tests/test_strand.py b/pydrad/parse/tests/test_strand.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from pydrad.parse import Strand
+from pydrad.parse import Profile, Strand
 
 VAR_NAMES = [
     'coordinate',
@@ -63,15 +63,24 @@
 def strand(hydrad):
     return Strand(hydrad)
 
+@pytest.fixture
+def strand_only_amr_time_cfg(hydrad):
+    return Strand(hydrad,
+                  read_from_cfg=True,
+                  read_phy=False,
+                  read_ine=False,
+                  read_trm=False,
+                  read_hstate=False,
+                  read_scl=False)
+
 @pytest.fixture
 def strand_only_amr(hydrad):
     return Strand(hydrad,
                   read_phy=False,
                   read_ine=False,
                   read_trm=False,
                   read_hstate=False,
-                  read_scl=False,
-                  )
+                  read_scl=False)
 
 
 def test_parse_initial_conditions(strand):
@@ -111,6 +120,15 @@ def test_time_arrays_same(hydrad, strand):
     assert u.allclose(strand.time, strand2.time, rtol=0.0, atol=1e-2*u.s)
 
 
+def test_strand_indexing(strand_only_amr):
+    # Make sure indexing is tracked correctly across repeated slicing
+    # of strand
+    assert strand_only_amr[2]._index == 2
+    strand_slice = strand_only_amr[1:4]
+    assert strand_slice[1]._index == 2
+    assert strand_slice[1]._amr_filename == strand_only_amr[2]._amr_filename
+
+
 def test_to_hdf5(strand, tmp_path):
     filename = tmp_path / 'hydrad_results.h5'
     strand.to_hdf5(filename, *VAR_NAMES)
@@ -130,6 +148,7 @@ def test_emission_measure(strand):
     assert isinstance(bins, u.Quantity)
     assert len(bins) == len(em) + 1
 
+
 def test_term_file_output(strand):
     for p in strand:
         # The electron energy equation's numerical viscosity term is always 0:
@@ -145,26 +164,45 @@ def test_term_file_output(strand):
             rtol=0.0,
         )
 
+
 def test_term_file_units(strand):
     assert strand[0].mass_advection.unit == u.Unit('g s-1 cm-3')
     assert strand[0].momentum_gravity.unit == u.Unit('dyne s-1 cm-3')
     assert strand[0].electron_viscous_stress.unit == u.Unit('erg s-1 cm-3')
     assert strand[0].hydrogen_collisions.unit == u.Unit('erg s-1 cm-3')
 
+
 def test_scale_file_output(strand):
     for p in strand:
         # all time-scales should be strictly greater than 0
         assert all(t > (0.0 * u.s) for t in p.radiative_timescale)
         assert all(t > (0.0 * u.s) for t in p.collisional_timescale)
         assert all(t > (0.0 * u.s) for t in p.ion_conductive_timescale)
 
+
 def test_scale_file_units(strand):
     assert strand[0].advective_timescale.unit == u.Unit('s')
     assert strand[0].electron_conductive_timescale.unit == u.Unit('s')
     assert strand[0].collisional_timescale.unit == u.Unit('s')
 
+
 def test_amr_file_units(strand, strand_only_amr):
     assert strand[0].mass_density.unit == u.Unit('g cm-3')
     assert strand_only_amr[0].mass_density.unit == u.Unit('g cm-3')
     assert strand[0].electron_mass_density.unit == u.Unit('g cm-3')
     assert strand_only_amr[0].electron_mass_density.unit == u.Unit('g cm-3')
+
+
+def test_profile_instantiation(strand_only_amr, strand_only_amr_time_cfg):
+    # Test various ways to instantiate a Profile
+    # No index, no master time
+    p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1])
+    assert p._index == 1
+    # No index, no master time, read from cfg
+    # NOTE: This uses a different strand object as the original time array must also be read from the cfg
+    # file rather than the array file. Otherwise, the values will be slightly different.
+    p = Profile(strand_only_amr_time_cfg.hydrad_root, strand_only_amr_time_cfg.time[1], read_from_cfg=True)
+    assert p._index == 1
+    # No index, master time
+    p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1], master_time=strand_only_amr._master_time)
+    assert p._index == 1
diff --git a/pydrad/parse/util.py b/pydrad/parse/util.py
@@ -90,28 +90,34 @@ def read_amr_file(filename):
         'electron_energy_density': 'erg cm-3',
         'hydrogen_energy_density': 'erg cm-3',
     }
-    table = astropy.table.QTable.read(
+    # NOTE: Purposefully using pandas explicitly as it seems to be faster
+    # than astropy.io.ascii.read for tables with this particular delimiter.
+    # I am not completely sure why this is the case but the difference is
+    # almost an order of magnitude.
+    table = read_csv(
         filename,
-        format='ascii',
-        data_start=4,
+        skiprows=4,
+        sep=r'\s+',
+        header=None,
+        engine='c',
     )
     # NOTE: The columns we care about are doubles in HYDRAD, while the
     # other columns are integers with information about the
-    # refinement level of the grid cell.  As a result, if electron
+    # refinement level of the grid cell. As a result, if electron
     # mass density is not present in the .amr file, then the
     # seventh column is an integer.
-    if table.dtype[len(columns)-1] == np.int64:
+    if table.dtypes[len(columns)-1] == np.int64:
         columns.remove('electron_mass_density')
         del units['electron_mass_density']
     # NOTE: This is done after creating the table because the
     # remaining number of columns can be variable and thus we
     # cannot assign all of the column names at once.
-    table.rename_columns(
-        table.colnames[:len(columns)],
-        columns,
+    table = table.truncate(
+        after=len(columns)-1,
+        axis='columns'
     )
-    for column in columns:
-        table[column].unit = units[column]
+    table.rename(columns={i:name for i, name in enumerate(columns)}, inplace=True)
+    table = astropy.table.QTable.from_pandas(table, units=units)
     return table
 
 
@@ -208,11 +214,12 @@ def read_trm_file(filename):
     Parse ``.trm`` files with hydrodynamic equation terms as a function of position.
 
     The files come in sets of 5 rows with variable number of columns:
-        -- Loop coordinate (1 column), and at each position:
-        -- Terms of mass equation (2 columns)
-        -- Terms of momentum equation (6 columns)
-        -- Terms of electron energy equation (11 columns)
-        -- Terms of hydrogen energy equation (11 columns)
+
+    * Loop coordinate (1 column), and at each position:
+    * Terms of mass equation (2 columns)
+    * Terms of momentum equation (6 columns)
+    * Terms of electron energy equation (11 columns)
+    * Terms of hydrogen energy equation (11 columns)
     """
     units = {
         'mass': 'g cm-3 s-1',
diff --git a/pydrad/tests/test_hydrad.py b/pydrad/tests/test_hydrad.py
@@ -37,4 +37,4 @@ def test_use_openmp(tmpdir_factory, configuration, hydrad_clean):
     omp_configuration.setup_simulation(hydrad_tmp, hydrad_clean, overwrite=True)
     run_shell_command(hydrad_tmp / 'HYDRAD.exe')
     strand = Strand(hydrad_tmp)
-    assert len(strand) == 3
+    assert len(strand) == 6