Improve performance of read_ine_file and read_phy_file (#195)

wtbarnes · web-flow · commit c175a02bba25 · 2025-10-14T19:16:32.000-04:00
* speedup ine parsing

* add ine parsing test

* Better Profile repr

* suggestions from code review

* fix template tests for new CHIANTI verison

* speedup parsing of phy files

* some unit tests for file reading functions

* make read_ine_file test more robust
diff --git a/conftest.py b/conftest.py
@@ -83,15 +83,15 @@ def get_configuration_dict():
         },
         'radiation': {
             'abundance_dataset': 'asplund',
-            'decouple_ionization_state_solver': False,
+            'decouple_ionization_state_solver': True,
             'density_dependent_rates': False,
             'elements_equilibrium': [],
-            'elements_nonequilibrium': [],
-            'emissivity_dataset': 'chianti_v7',
+            'elements_nonequilibrium': ['hydrogen', 'carbon'],
+            'emissivity_dataset': 'chianti_v10',
             'nlte_chromosphere': False,
             'optically_thick_radiation': False,
             'ranges_dataset': 'ranges',
-            'rates_dataset': 'chianti_v7',
+            'rates_dataset': 'chianti_v10',
             'use_power_law_radiative_losses': True
         },
         'solver': {
diff --git a/pydrad/configure/tests/test_templates.py b/pydrad/configure/tests/test_templates.py
@@ -541,9 +541,9 @@ def test_radiation_config_equilibrium(configuration):
         'iron'
     ]
     config = f"""ranges
-chianti_v7
+chianti_v10
 asplund
-chianti_v7
+chianti_v10
 3
 h
 1
@@ -559,9 +559,9 @@ def test_radiation_config_equilibrium(configuration):
 def test_radiation_config_nonequilibrium(configuration):
     configuration.config['radiation']['elements_nonequilibrium'] = ['iron']
     config = f"""ranges
-chianti_v7
+chianti_v10
 asplund
-chianti_v7
+chianti_v10
 1
 fe
 26
diff --git a/pydrad/parse/parse.py b/pydrad/parse/parse.py
@@ -324,7 +324,8 @@ def _scl_filename(self):
     def __repr__(self):
         return f"""HYDRAD Timestep Profile
 -----------------------
-Filename: {self._phy_filename}
+Filename: {self._amr_filename}
+Time: {self.time}
 Timestep #: {self._index}"""
 
     def _read_amr(self):
diff --git a/pydrad/parse/tests/conftest.py b/pydrad/parse/tests/conftest.py
@@ -0,0 +1,11 @@
+"""
+Common fixtures for testing pydrad.parse
+"""
+import pytest
+
+from pydrad.parse import Strand
+
+
+@pytest.fixture
+def strand(hydrad):
+    return Strand(hydrad)
diff --git a/pydrad/parse/tests/test_strand.py b/pydrad/parse/tests/test_strand.py
@@ -4,6 +4,7 @@
 import astropy.units as u
 import h5py
 import numpy as np
+import plasmapy.particles
 import pytest
 
 from pydrad.parse import Profile, Strand
@@ -59,10 +60,6 @@
 ]
 
 
-@pytest.fixture
-def strand(hydrad):
-    return Strand(hydrad)
-
 @pytest.fixture
 def strand_only_amr_time_cfg(hydrad):
     return Strand(hydrad,
@@ -206,3 +203,13 @@ def test_profile_instantiation(strand_only_amr, strand_only_amr_time_cfg):
     # No index, master time
     p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1], master_time=strand_only_amr._master_time)
     assert p._index == 1
+
+
+def test_ine_results(strand):
+    for profile in strand:
+        for element in strand.config['radiation']['elements_nonequilibrium']:
+            Z = plasmapy.particles.atomic_number(element)
+            for i_z in range(1,Z+2):
+                assert hasattr(profile, f'{element}_{i_z}')
+                ion_frac = getattr(profile, f'{element}_{i_z}')
+                assert ion_frac.shape == profile.coordinate.shape
diff --git a/pydrad/parse/tests/test_util.py b/pydrad/parse/tests/test_util.py
@@ -0,0 +1,20 @@
+"""
+Unit tests for reader functions
+"""
+import plasmapy.particles
+
+from pydrad.parse.util import read_ine_file, read_phy_file
+
+
+def test_read_phy_file(strand):
+    tab = read_phy_file(strand[0]._phy_filename)
+    assert len(tab.colnames) == 11
+    assert tab['coordinate'].shape == strand[0].grid_centers.shape
+
+
+def test_read_ine_file(strand):
+    tab = read_ine_file(strand[0]._ine_filename, strand[0].grid_centers.shape[0])
+    n_columns = sum([plasmapy.particles.atomic_number(el)+1
+                     for el in strand.config['radiation']['elements_nonequilibrium']])
+    assert len(tab.colnames) == n_columns  # NEI elements modeled are H and C: Z_H+1 + Z_C+1=9
+    assert tab['hydrogen_1'].shape == strand[0].grid_centers.shape
diff --git a/pydrad/parse/util.py b/pydrad/parse/util.py
@@ -22,6 +22,13 @@
 ]
 
 
+# Do this here as calling this each time adds significant overhead
+# when parsing a file.
+ELEMENT_NAME_MAPPING = {
+    z: plasmapy.particles.element_name(z) for z in range(1,31)
+}
+
+
 def read_master_time(hydrad_root, read_from_cfg=False):
     """
     Get array of times that correspond to each timestep for the entire simulation.
@@ -155,10 +162,14 @@ def read_phy_file(filename):
         'electron_heat_flux': 'erg s-1 cm-2',
         'hydrogen_heat_flux': 'erg s-1 cm-2',
     }
-    return astropy.table.QTable.read(
-        filename,
-        format='ascii',
-        names=columns,
+    return astropy.table.QTable.from_pandas(
+        read_csv(
+            filename,
+            sep=r'\s+',
+            header=None,
+            engine='c',
+            names=columns,
+        ),
         units=units,
     )
 
@@ -175,38 +186,35 @@ def read_ine_file(filename, n_s):
     ----------
     filename: path-like
     n_s: `int`
+        The number of grid cells in the snapshot corresponding to this file.
     """
-    # TODO: clean this up somehow? I've purposefully included
-    # a lot of comments because the format of this file makes
-    # the parsing code quite opaque
-    with pathlib.Path(filename).open() as f:
+    # This file is grouped into n_s groups each of length n_el + 1 (because the first entry is
+    # the spatial coordinate) such that the total number of lines is n_s*(n_el + 1).
+    # Each line in the group (except the first line) has Z+2 entries corresponding to Z followed
+    # by the ionization fraction of the Z+1 ionization stages of element Z at the spatial
+    # coordinate specified in the first line of the group.
+    # Because of the complexity of the structure of this file, we need to parse it line by line.
+    with filename.open(mode='r') as f:
         lines = f.readlines()
-    # First parse all of the population fraction arrays
-    # NOTE: Have to calculate the number of elements we have
-    # computed population fractions for as we do not necessarily
-    # know this ahead of time
-    n_e = int(len(lines)/n_s - 1)
-    # The file is arranged in n_s groups of n_e+1 lines each where the first
-    # line is the coordinate and the subsequent lines are the population fraction
-    # for each element, with each column corresponding to an ion of that element
-    # First, separate by coordinate
-    pop_lists = [lines[i*(n_e+1)+1:(i+1)*(n_e+1)] for i in range(n_s)]
-    # Convert each row of each group into a floating point array
-    pop_lists = [[np.array(l.split(), dtype=float) for l in p] for p in pop_lists]
-    # NOTE: each row has Z+2 entries as the first entry is the atomic number Z
-    # Get these from just the first group as the number of elements is the same
-    # for each
-    Z = np.array([p[0] for p in pop_lists[0]], dtype=int)
-    pop_arrays = [np.zeros((n_s, z+1)) for z in Z]
-    for i, p in enumerate(pop_lists):
-        for j, line in enumerate(p):
-            pop_arrays[j][i, :] = line[1:]  # Skip first entry, it is the atomic number
-    columns = []
+    n_el = int(len(lines)/n_s - 1)
+    # The innermost loop parses the ionization fraction for all ionization stages of a given element Z
+    # at all spatial coordinates and casts it to an array. This innermost array has dimensions (n_s,Z+1).
+    # The outermost array iterates over all elements. The result is a list of length n_el where each entry
+    # contains the ionization fractions at all ionization stages of a given element at all spatial coordinates.
+    data = [
+        np.asarray(
+            [lines[(1+n_el)*i_s+1+i_z].split()[1:] for i_s in range(n_s)],
+            dtype=np.float64
+        )
+        for i_z in range(n_el)
+    ]
+    Z = [x.shape[1]-1 for x in data]
+    colnames = []
     for z in Z:
-        el_name = plasmapy.particles.element_name(z)
-        columns += [f'{el_name}_{i+1}' for i in range(z+1)]
-    data = np.hstack([p for p in pop_arrays])
-    return astropy.table.QTable(data=data, names=columns)
+        # A precomputed mapping between Z and element name is used as calling plasmapy.particles.element_name
+        # each time leads to significant overhead.
+        colnames += [f'{ELEMENT_NAME_MAPPING[z]}_{i}' for i in range(1, z+2)]
+    return astropy.table.Table(data=np.hstack(data), names=colnames, copy=False)
 
 
 def read_trm_file(filename):

Original file line number	Diff line number	Diff line change
`@@ -541,9 +541,9 @@ def test_radiation_config_equilibrium(configuration):`
`541`	`541`	`'iron'`
`542`	`542`	`]`
`543`	`543`	`config = f"""ranges`
`544`		`-chianti_v7`
	`544`	`+chianti_v10`
`545`	`545`	`asplund`
`546`		`-chianti_v7`
	`546`	`+chianti_v10`
`547`	`547`	`3`
`548`	`548`	`h`
`549`	`549`	`1`
`@@ -559,9 +559,9 @@ def test_radiation_config_equilibrium(configuration):`
`559`	`559`	`def test_radiation_config_nonequilibrium(configuration):`
`560`	`560`	`configuration.config['radiation']['elements_nonequilibrium'] = ['iron']`
`561`	`561`	`config = f"""ranges`
`562`		`-chianti_v7`
	`562`	`+chianti_v10`
`563`	`563`	`asplund`
`564`		`-chianti_v7`
	`564`	`+chianti_v10`
`565`	`565`	`1`
`566`	`566`	`fe`
`567`	`567`	`26`