Skip to content

Commit 51f0464

Browse files
authored
Make parsing of .amr files faster (#194)
* faster amr file parsing * faster indexing of strand * more tests, better coverage * fix read_trm_file docstring * update codecov badge * clarify comments
1 parent 4e85644 commit 51f0464

File tree

7 files changed

+96
-32
lines changed

7 files changed

+96
-32
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[![pydrad CI status](https://github.com/rice-solar-physics/pydrad/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/rice-solar-physics/pydrad/actions)
44
[![Documentation Status](https://readthedocs.org/projects/pydrad/badge/?version=latest)](https://pydrad.readthedocs.io/en/latest/?badge=latest)
5-
[![codecov](https://codecov.io/gh/rice-solar-physics/pydrad/branch/master/graph/badge.svg)](https://codecov.io/gh/rice-solar-physics/pydrad)
5+
[![codecov](https://codecov.io/gh/rice-solar-physics/pydrad/graph/badge.svg?token=GZOGGHF2B0)](https://codecov.io/gh/rice-solar-physics/pydrad)
66
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8411058.svg)](https://doi.org/10.5281/zenodo.8411058)
77

88
Some Python tools to configure and parse output from the HYDrodynamics and RADiation (HYDRAD) code for field-aligned coronal loop physics.

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def get_configuration_dict():
4343
'write_file_physical': True,
4444
'write_file_timescales': True,
4545
'loop_length': 90.*u.Mm,
46-
'total_time': 2.*u.s,
46+
'total_time': 5.*u.s,
4747
},
4848
'grid': {
4949
'adapt': True,

pydrad/configure/tests/test_templates.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def test_heating_header(configuration):
170170
def test_hydrad_config(configuration):
171171
config = f"""Initial_Conditions/profiles/initial.amr
172172
Initial_Conditions/profiles/initial.amr.gravity
173-
2.0
173+
5.0
174174
1.0
175175
176176
Configuration file generated by pydrad on {configuration.date}"""
@@ -180,7 +180,7 @@ def test_hydrad_config(configuration):
180180
config = f"""Initial_Conditions/profiles/initial.amr
181181
poly_fit.gravity
182182
poly_fit.magnetic_field
183-
2.0
183+
5.0
184184
1.0
185185
186186
Configuration file generated by pydrad on {configuration.date}"""
@@ -189,7 +189,7 @@ def test_hydrad_config(configuration):
189189
config = f"""Results/profile10.amr
190190
poly_fit.gravity
191191
poly_fit.magnetic_field
192-
2.0
192+
5.0
193193
1.0
194194
195195
Configuration file generated by pydrad on {configuration.date}"""

pydrad/parse/parse.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,24 @@ def __getitem__(self, index):
8383
master_time=self._master_time,
8484
**self._profile_kwargs)
8585
else:
86+
_index = None
87+
# This is a nested conditional because cannot compare two arrays of unequal shape
88+
if self.time.shape == self._master_time.shape:
89+
if (self.time==self._master_time).all():
90+
# NOTE: This is a shortcut that allows for much faster indexing if you are not taking a slice
91+
# from a slice. For example, index only will map to the correct index in the full-resolution
92+
# time array if you are slicing from the original time array ("master_time"). Otherwise, this
93+
# index no longer corresponds and you have to compute it from the master time and the time at
94+
# this slice. For example, in strand[10:15][0], the 0-index corresponds to 10 in the original
95+
# indexing. As such, the index has to be recomputed which is slower. Whereas in strand[10],
96+
# 10 corresponds to 10 in the original indexing and can just be passed straight through to
97+
# the underlying profile.
98+
log.debug('Using explicit index to slice Strand')
99+
_index = index
86100
return Profile(self.hydrad_root,
87101
self.time[index],
88102
master_time=self._master_time,
103+
index=_index,
89104
**self._profile_kwargs)
90105

91106
def to_hdf5(self, filename, *variables):
@@ -257,10 +272,18 @@ def __init__(self, hydrad_root, time: u.s, **kwargs):
257272
if time.shape:
258273
raise ValueError('time must be a scalar')
259274
self.time = time
260-
self._master_time = kwargs.get('master_time')
261-
if self._master_time is None:
262-
self._master_time = read_master_time(self.hydrad_root,
263-
read_from_cfg=kwargs.get('read_from_cfg', False))
275+
master_time = kwargs.get('master_time')
276+
# NOTE: You should only be passing in the index explicitly if you are slicing from the original time array
277+
if (index:=kwargs.get('index')) is None:
278+
log.debug('Profile index is None. Calculating index from master time')
279+
if master_time is None:
280+
read_from_cfg = kwargs.get('read_from_cfg', False)
281+
log.debug(f"Reading master time from {'cfg' if read_from_cfg else 'amr'} files.")
282+
master_time = read_master_time(self.hydrad_root, read_from_cfg=read_from_cfg)
283+
self._index = np.where(self.time == master_time)[0][0]
284+
else:
285+
log.debug(f'Using explicit index {index} while indexing Profile.')
286+
self._index = index
264287
# Read results files
265288
self._read_amr()
266289
if kwargs.get('read_phy', True):
@@ -298,10 +321,6 @@ def _hstate_filename(self):
298321
def _scl_filename(self):
299322
return self.hydrad_root / 'Results' / f'profile{self._index:d}.scl'
300323

301-
@property
302-
def _index(self):
303-
return np.where(self.time == self._master_time)[0][0]
304-
305324
def __repr__(self):
306325
return f"""HYDRAD Timestep Profile
307326
-----------------------

pydrad/parse/tests/test_strand.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import pytest
88

9-
from pydrad.parse import Strand
9+
from pydrad.parse import Profile, Strand
1010

1111
VAR_NAMES = [
1212
'coordinate',
@@ -63,15 +63,24 @@
6363
def strand(hydrad):
6464
return Strand(hydrad)
6565

66+
@pytest.fixture
67+
def strand_only_amr_time_cfg(hydrad):
68+
return Strand(hydrad,
69+
read_from_cfg=True,
70+
read_phy=False,
71+
read_ine=False,
72+
read_trm=False,
73+
read_hstate=False,
74+
read_scl=False)
75+
6676
@pytest.fixture
6777
def strand_only_amr(hydrad):
6878
return Strand(hydrad,
6979
read_phy=False,
7080
read_ine=False,
7181
read_trm=False,
7282
read_hstate=False,
73-
read_scl=False,
74-
)
83+
read_scl=False)
7584

7685

7786
def test_parse_initial_conditions(strand):
@@ -111,6 +120,15 @@ def test_time_arrays_same(hydrad, strand):
111120
assert u.allclose(strand.time, strand2.time, rtol=0.0, atol=1e-2*u.s)
112121

113122

123+
def test_strand_indexing(strand_only_amr):
124+
# Make sure indexing is tracked correctly across repeated slicing
125+
# of strand
126+
assert strand_only_amr[2]._index == 2
127+
strand_slice = strand_only_amr[1:4]
128+
assert strand_slice[1]._index == 2
129+
assert strand_slice[1]._amr_filename == strand_only_amr[2]._amr_filename
130+
131+
114132
def test_to_hdf5(strand, tmp_path):
115133
filename = tmp_path / 'hydrad_results.h5'
116134
strand.to_hdf5(filename, *VAR_NAMES)
@@ -130,6 +148,7 @@ def test_emission_measure(strand):
130148
assert isinstance(bins, u.Quantity)
131149
assert len(bins) == len(em) + 1
132150

151+
133152
def test_term_file_output(strand):
134153
for p in strand:
135154
# The electron energy equation's numerical viscosity term is always 0:
@@ -145,26 +164,45 @@ def test_term_file_output(strand):
145164
rtol=0.0,
146165
)
147166

167+
148168
def test_term_file_units(strand):
149169
assert strand[0].mass_advection.unit == u.Unit('g s-1 cm-3')
150170
assert strand[0].momentum_gravity.unit == u.Unit('dyne s-1 cm-3')
151171
assert strand[0].electron_viscous_stress.unit == u.Unit('erg s-1 cm-3')
152172
assert strand[0].hydrogen_collisions.unit == u.Unit('erg s-1 cm-3')
153173

174+
154175
def test_scale_file_output(strand):
155176
for p in strand:
156177
# all time-scales should be strictly greater than 0
157178
assert all(t > (0.0 * u.s) for t in p.radiative_timescale)
158179
assert all(t > (0.0 * u.s) for t in p.collisional_timescale)
159180
assert all(t > (0.0 * u.s) for t in p.ion_conductive_timescale)
160181

182+
161183
def test_scale_file_units(strand):
162184
assert strand[0].advective_timescale.unit == u.Unit('s')
163185
assert strand[0].electron_conductive_timescale.unit == u.Unit('s')
164186
assert strand[0].collisional_timescale.unit == u.Unit('s')
165187

188+
166189
def test_amr_file_units(strand, strand_only_amr):
167190
assert strand[0].mass_density.unit == u.Unit('g cm-3')
168191
assert strand_only_amr[0].mass_density.unit == u.Unit('g cm-3')
169192
assert strand[0].electron_mass_density.unit == u.Unit('g cm-3')
170193
assert strand_only_amr[0].electron_mass_density.unit == u.Unit('g cm-3')
194+
195+
196+
def test_profile_instantiation(strand_only_amr, strand_only_amr_time_cfg):
197+
# Test various ways to instantiate a Profile
198+
# No index, no master time
199+
p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1])
200+
assert p._index == 1
201+
# No index, no master time, read from cfg
202+
# NOTE: This uses a different strand object as the original time array must also be read from the cfg
203+
# file rather than the array file. Otherwise, the values will be slightly different.
204+
p = Profile(strand_only_amr_time_cfg.hydrad_root, strand_only_amr_time_cfg.time[1], read_from_cfg=True)
205+
assert p._index == 1
206+
# No index, master time
207+
p = Profile(strand_only_amr.hydrad_root, strand_only_amr.time[1], master_time=strand_only_amr._master_time)
208+
assert p._index == 1

pydrad/parse/util.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,28 +90,34 @@ def read_amr_file(filename):
9090
'electron_energy_density': 'erg cm-3',
9191
'hydrogen_energy_density': 'erg cm-3',
9292
}
93-
table = astropy.table.QTable.read(
93+
# NOTE: Purposefully using pandas explicitly as it seems to be faster
94+
# than astropy.io.ascii.read for tables with this particular delimiter.
95+
# I am not completely sure why this is the case but the difference is
96+
# almost an order of magnitude.
97+
table = read_csv(
9498
filename,
95-
format='ascii',
96-
data_start=4,
99+
skiprows=4,
100+
sep=r'\s+',
101+
header=None,
102+
engine='c',
97103
)
98104
# NOTE: The columns we care about are doubles in HYDRAD, while the
99105
# other columns are integers with information about the
100-
# refinement level of the grid cell. As a result, if electron
106+
# refinement level of the grid cell. As a result, if electron
101107
# mass density is not present in the .amr file, then the
102108
# seventh column is an integer.
103-
if table.dtype[len(columns)-1] == np.int64:
109+
if table.dtypes[len(columns)-1] == np.int64:
104110
columns.remove('electron_mass_density')
105111
del units['electron_mass_density']
106112
# NOTE: This is done after creating the table because the
107113
# remaining number of columns can be variable and thus we
108114
# cannot assign all of the column names at once.
109-
table.rename_columns(
110-
table.colnames[:len(columns)],
111-
columns,
115+
table = table.truncate(
116+
after=len(columns)-1,
117+
axis='columns'
112118
)
113-
for column in columns:
114-
table[column].unit = units[column]
119+
table.rename(columns={i:name for i, name in enumerate(columns)}, inplace=True)
120+
table = astropy.table.QTable.from_pandas(table, units=units)
115121
return table
116122

117123

@@ -208,11 +214,12 @@ def read_trm_file(filename):
208214
Parse ``.trm`` files with hydrodynamic equation terms as a function of position.
209215
210216
The files come in sets of 5 rows with variable number of columns:
211-
-- Loop coordinate (1 column), and at each position:
212-
-- Terms of mass equation (2 columns)
213-
-- Terms of momentum equation (6 columns)
214-
-- Terms of electron energy equation (11 columns)
215-
-- Terms of hydrogen energy equation (11 columns)
217+
218+
* Loop coordinate (1 column), and at each position:
219+
* Terms of mass equation (2 columns)
220+
* Terms of momentum equation (6 columns)
221+
* Terms of electron energy equation (11 columns)
222+
* Terms of hydrogen energy equation (11 columns)
216223
"""
217224
units = {
218225
'mass': 'g cm-3 s-1',

pydrad/tests/test_hydrad.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ def test_use_openmp(tmpdir_factory, configuration, hydrad_clean):
3737
omp_configuration.setup_simulation(hydrad_tmp, hydrad_clean, overwrite=True)
3838
run_shell_command(hydrad_tmp / 'HYDRAD.exe')
3939
strand = Strand(hydrad_tmp)
40-
assert len(strand) == 3
40+
assert len(strand) == 6

0 commit comments

Comments
 (0)