Skip to content

Commit c175a02

Browse files
authored
Improve performance of read_ine_file and read_phy_file (#195)
* speedup ine parsing * add ine parsing test * Better Profile repr * suggestions from code review * fix template tests for new CHIANTI verison * speedup parsing of phy files * some unit tests for file reading functions * make read_ine_file test more robust
1 parent 51f0464 commit c175a02

File tree

7 files changed

+93
-46
lines changed

7 files changed

+93
-46
lines changed

conftest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@ def get_configuration_dict():
8383
},
8484
'radiation': {
8585
'abundance_dataset': 'asplund',
86-
'decouple_ionization_state_solver': False,
86+
'decouple_ionization_state_solver': True,
8787
'density_dependent_rates': False,
8888
'elements_equilibrium': [],
89-
'elements_nonequilibrium': [],
90-
'emissivity_dataset': 'chianti_v7',
89+
'elements_nonequilibrium': ['hydrogen', 'carbon'],
90+
'emissivity_dataset': 'chianti_v10',
9191
'nlte_chromosphere': False,
9292
'optically_thick_radiation': False,
9393
'ranges_dataset': 'ranges',
94-
'rates_dataset': 'chianti_v7',
94+
'rates_dataset': 'chianti_v10',
9595
'use_power_law_radiative_losses': True
9696
},
9797
'solver': {

pydrad/configure/tests/test_templates.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -541,9 +541,9 @@ def test_radiation_config_equilibrium(configuration):
541541
'iron'
542542
]
543543
config = f"""ranges
544-
chianti_v7
544+
chianti_v10
545545
asplund
546-
chianti_v7
546+
chianti_v10
547547
3
548548
h
549549
1
@@ -559,9 +559,9 @@ def test_radiation_config_equilibrium(configuration):
559559
def test_radiation_config_nonequilibrium(configuration):
560560
configuration.config['radiation']['elements_nonequilibrium'] = ['iron']
561561
config = f"""ranges
562-
chianti_v7
562+
chianti_v10
563563
asplund
564-
chianti_v7
564+
chianti_v10
565565
1
566566
fe
567567
26

pydrad/parse/parse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,8 @@ def _scl_filename(self):
324324
def __repr__(self):
325325
return f"""HYDRAD Timestep Profile
326326
-----------------------
327-
Filename: {self._phy_filename}
327+
Filename: {self._amr_filename}
328+
Time: {self.time}
328329
Timestep #: {self._index}"""
329330

330331
def _read_amr(self):

pydrad/parse/tests/conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""
2+
Common fixtures for testing pydrad.parse
3+
"""
4+
import pytest
5+
6+
from pydrad.parse import Strand
7+
8+
9+
@pytest.fixture
10+
def strand(hydrad):
11+
return Strand(hydrad)

pydrad/parse/tests/test_strand.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import astropy.units as u
55
import h5py
66
import numpy as np
7+
import plasmapy.particles
78
import pytest
89

910
from pydrad.parse import Profile, Strand
@@ -59,10 +60,6 @@
5960
]
6061

6162

62-
@pytest.fixture
63-
def strand(hydrad):
64-
return Strand(hydrad)
65-
6663
@pytest.fixture
6764
def strand_only_amr_time_cfg(hydrad):
6865
return Strand(hydrad,
@@ -206,3 +203,13 @@ def test_profile_instantiation(strand_only_amr, strand_only_amr_time_cfg):
206203
# No index, master time
207204
p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1], master_time=strand_only_amr._master_time)
208205
assert p._index == 1
206+
207+
208+
def test_ine_results(strand):
209+
for profile in strand:
210+
for element in strand.config['radiation']['elements_nonequilibrium']:
211+
Z = plasmapy.particles.atomic_number(element)
212+
for i_z in range(1,Z+2):
213+
assert hasattr(profile, f'{element}_{i_z}')
214+
ion_frac = getattr(profile, f'{element}_{i_z}')
215+
assert ion_frac.shape == profile.coordinate.shape

pydrad/parse/tests/test_util.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Unit tests for reader functions
3+
"""
4+
import plasmapy.particles
5+
6+
from pydrad.parse.util import read_ine_file, read_phy_file
7+
8+
9+
def test_read_phy_file(strand):
10+
tab = read_phy_file(strand[0]._phy_filename)
11+
assert len(tab.colnames) == 11
12+
assert tab['coordinate'].shape == strand[0].grid_centers.shape
13+
14+
15+
def test_read_ine_file(strand):
16+
tab = read_ine_file(strand[0]._ine_filename, strand[0].grid_centers.shape[0])
17+
n_columns = sum([plasmapy.particles.atomic_number(el)+1
18+
for el in strand.config['radiation']['elements_nonequilibrium']])
19+
assert len(tab.colnames) == n_columns # NEI elements modeled are H and C: Z_H+1 + Z_C+1=9
20+
assert tab['hydrogen_1'].shape == strand[0].grid_centers.shape

pydrad/parse/util.py

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@
2222
]
2323

2424

25+
# Do this here as calling this each time adds significant overhead
26+
# when parsing a file.
27+
ELEMENT_NAME_MAPPING = {
28+
z: plasmapy.particles.element_name(z) for z in range(1,31)
29+
}
30+
31+
2532
def read_master_time(hydrad_root, read_from_cfg=False):
2633
"""
2734
Get array of times that correspond to each timestep for the entire simulation.
@@ -155,10 +162,14 @@ def read_phy_file(filename):
155162
'electron_heat_flux': 'erg s-1 cm-2',
156163
'hydrogen_heat_flux': 'erg s-1 cm-2',
157164
}
158-
return astropy.table.QTable.read(
159-
filename,
160-
format='ascii',
161-
names=columns,
165+
return astropy.table.QTable.from_pandas(
166+
read_csv(
167+
filename,
168+
sep=r'\s+',
169+
header=None,
170+
engine='c',
171+
names=columns,
172+
),
162173
units=units,
163174
)
164175

@@ -175,38 +186,35 @@ def read_ine_file(filename, n_s):
175186
----------
176187
filename: path-like
177188
n_s: `int`
189+
The number of grid cells in the snapshot corresponding to this file.
178190
"""
179-
# TODO: clean this up somehow? I've purposefully included
180-
# a lot of comments because the format of this file makes
181-
# the parsing code quite opaque
182-
with pathlib.Path(filename).open() as f:
191+
# This file is grouped into n_s groups each of length n_el + 1 (because the first entry is
192+
# the spatial coordinate) such that the total number of lines is n_s*(n_el + 1).
193+
# Each line in the group (except the first line) has Z+2 entries corresponding to Z followed
194+
# by the ionization fraction of the Z+1 ionization stages of element Z at the spatial
195+
# coordinate specified in the first line of the group.
196+
# Because of the complexity of the structure of this file, we need to parse it line by line.
197+
with filename.open(mode='r') as f:
183198
lines = f.readlines()
184-
# First parse all of the population fraction arrays
185-
# NOTE: Have to calculate the number of elements we have
186-
# computed population fractions for as we do not necessarily
187-
# know this ahead of time
188-
n_e = int(len(lines)/n_s - 1)
189-
# The file is arranged in n_s groups of n_e+1 lines each where the first
190-
# line is the coordinate and the subsequent lines are the population fraction
191-
# for each element, with each column corresponding to an ion of that element
192-
# First, separate by coordinate
193-
pop_lists = [lines[i*(n_e+1)+1:(i+1)*(n_e+1)] for i in range(n_s)]
194-
# Convert each row of each group into a floating point array
195-
pop_lists = [[np.array(l.split(), dtype=float) for l in p] for p in pop_lists]
196-
# NOTE: each row has Z+2 entries as the first entry is the atomic number Z
197-
# Get these from just the first group as the number of elements is the same
198-
# for each
199-
Z = np.array([p[0] for p in pop_lists[0]], dtype=int)
200-
pop_arrays = [np.zeros((n_s, z+1)) for z in Z]
201-
for i, p in enumerate(pop_lists):
202-
for j, line in enumerate(p):
203-
pop_arrays[j][i, :] = line[1:] # Skip first entry, it is the atomic number
204-
columns = []
199+
n_el = int(len(lines)/n_s - 1)
200+
# The innermost loop parses the ionization fraction for all ionization stages of a given element Z
201+
# at all spatial coordinates and casts it to an array. This innermost array has dimensions (n_s,Z+1).
202+
# The outermost array iterates over all elements. The result is a list of length n_el where each entry
203+
# contains the ionization fractions at all ionization stages of a given element at all spatial coordinates.
204+
data = [
205+
np.asarray(
206+
[lines[(1+n_el)*i_s+1+i_z].split()[1:] for i_s in range(n_s)],
207+
dtype=np.float64
208+
)
209+
for i_z in range(n_el)
210+
]
211+
Z = [x.shape[1]-1 for x in data]
212+
colnames = []
205213
for z in Z:
206-
el_name = plasmapy.particles.element_name(z)
207-
columns += [f'{el_name}_{i+1}' for i in range(z+1)]
208-
data = np.hstack([p for p in pop_arrays])
209-
return astropy.table.QTable(data=data, names=columns)
214+
# A precomputed mapping between Z and element name is used as calling plasmapy.particles.element_name
215+
# each time leads to significant overhead.
216+
colnames += [f'{ELEMENT_NAME_MAPPING[z]}_{i}' for i in range(1, z+2)]
217+
return astropy.table.Table(data=np.hstack(data), names=colnames, copy=False)
210218

211219

212220
def read_trm_file(filename):

0 commit comments

Comments
 (0)