Skip to content

Commit f886be0

Browse files
committed
rprof is only accessible through step object
The huge pandas dataframe grouping all rprof no longer exists. This paves the way for step by step reading of the hdf5 file, and allows us to read rprof data even if the number of profiles increases with time.
1 parent 13467f5 commit f886be0

File tree

7 files changed

+45
-73
lines changed

7 files changed

+45
-73
lines changed

stagpy/_step.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -442,11 +442,11 @@ def timeinfo(self):
442442
def rprof(self):
443443
"""Radial profiles data of the time step.
444444
445-
Set to None if no radial profiles data is available for this time step.
445+
This is a :class:`pandas.DataFrame` with iz as index and variable names
446+
as columns. Set to None if no radial profiles data is available for
447+
this time step.
446448
"""
447-
if self.istep not in self.sdat.rprof.index.levels[0]:
448-
return None
449-
return self.sdat.rprof.loc[self.istep]
449+
return self.sdat._rprof_and_times[0].get(self.istep)
450450

451451
@property
452452
def isnap(self):

stagpy/plates.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,7 @@ def main_plates(sdat):
593593
"""Plot several plates information."""
594594
# calculating averaged horizontal surface velocity
595595
# needed for redimensionalisation
596-
ilast = sdat.rprof.index.levels[0][-1]
597-
rlast = sdat.rprof.loc[ilast]
596+
rlast = sdat.snaps[-1].rprof
598597
nprof = 0
599598
uprof_averaged = rlast.loc[:, 'vhrms'] * 0
600599
for step in sdat.walk.filter(rprof=True):

stagpy/rprof.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def cmd():
205205
conf.core
206206
"""
207207
sdat = StagyyData()
208-
if sdat.rprof is None:
209-
return
210208

211209
if conf.rprof.grid:
212210
for step in sdat.walk.filter(rprof=True):

stagpy/stagyydata.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def _rprof_and_times(self):
601601
rproffile = self.filename('rprof.h5')
602602
self._stagdat['rprof'] = stagyyparsers.rprof_h5(
603603
rproffile, list(phyvars.RPROF.keys()))
604-
if self._stagdat['rprof'][0] is not None:
604+
if self._stagdat['rprof'][1] is not None:
605605
return self._stagdat['rprof']
606606
rproffile = self.filename('rprof.dat')
607607
if self.hdf5 and not rproffile.is_file():
@@ -611,15 +611,6 @@ def _rprof_and_times(self):
611611
rproffile, list(phyvars.RPROF.keys()))
612612
return self._stagdat['rprof']
613613

614-
@property
615-
def rprof(self):
616-
"""Radial profiles data.
617-
618-
This is a :class:`pandas.DataFrame` with a 2-level index (istep and iz)
619-
and variable names as columns.
620-
"""
621-
return self._rprof_and_times[0]
622-
623614
@property
624615
def rtimes(self):
625616
"""Radial profiles times.

stagpy/stagyyparsers.py

Lines changed: 33 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
of :class:`~stagpy.stagyydata.StagyyData`.
77
"""
88
from functools import partial
9-
from itertools import product, repeat
9+
from itertools import product
1010
from operator import itemgetter
1111
from xml.etree import ElementTree as xmlET
1212
import re
@@ -112,11 +112,10 @@ def time_series_h5(timefile, colnames):
112112
return pdf.loc[~pdf.index.duplicated(keep='last')]
113113

114114

115-
def _extract_rsnap_isteps(rproffile):
116-
"""Extract istep and compute list of rows to delete."""
115+
def _extract_rsnap_isteps(rproffile, data):
116+
"""Extract istep, time and build separate rprof df."""
117117
step_regex = re.compile(r'^\*+step:\s*(\d+) ; time =\s*(\S+)')
118-
isteps = [] # list of (istep, time, nz)
119-
rows_to_del = set()
118+
isteps = [] # list of (istep, time, df)
120119
line = ' '
121120
with rproffile.open() as stream:
122121
while line[0] != '*':
@@ -128,22 +127,19 @@ def _extract_rsnap_isteps(rproffile):
128127
iline = 0
129128
for line in stream:
130129
if line[0] == '*':
131-
isteps.append((istep, time, nlines))
130+
isteps.append((istep, time, data.iloc[iline - nlines:iline]))
132131
match = step_regex.match(line)
133132
istep = int(match.group(1))
134133
time = float(match.group(2))
135134
nlines = 0
136135
# remove useless lines produced when run is restarted
137-
nrows_to_del = 0
138136
while isteps and istep <= isteps[-1][0]:
139-
nrows_to_del += isteps.pop()[-1]
140-
rows_to_del = rows_to_del.union(
141-
range(iline - nrows_to_del, iline))
137+
isteps.pop()
142138
else:
143139
nlines += 1
144140
iline += 1
145-
isteps.append((istep, time, nlines))
146-
return isteps, rows_to_del
141+
isteps.append((istep, time, data.iloc[iline - nlines:iline]))
142+
return isteps
147143

148144

149145
def rprof(rproffile, colnames):
@@ -156,39 +152,32 @@ def rprof(rproffile, colnames):
156152
Args:
157153
rproffile (:class:`pathlib.Path`): path of the rprof.dat file.
158154
colnames (list of names): names of the variables expected in
159-
:data:`rproffile` (may be modified).
155+
:data:`rproffile`.
160156
161157
Returns:
162-
tuple of :class:`pandas.DataFrame`: (profs, times)
163-
:data:`profs` are the radial profiles, with the variables in
164-
columns and rows double-indexed with the time step and the radial
165-
index of numerical cells.
158+
tuple: (profs, times)
159+
:data:`profs` is a dict mapping istep to radial profiles
160+
:class:`pandas.DataFrame`.
166161
167162
:data:`times` is the dimensionless time indexed by time steps.
168163
"""
169164
if not rproffile.is_file():
170-
return None, None
165+
return {}, None
171166
data = pd.read_csv(rproffile, delim_whitespace=True, dtype=str,
172167
header=None, comment='*', skiprows=1,
173168
engine='c', memory_map=True,
174169
error_bad_lines=False, warn_bad_lines=False)
175170
data = data.apply(pd.to_numeric, raw=True, errors='coerce')
176171

177-
isteps, rows_to_del = _extract_rsnap_isteps(rproffile)
178-
if rows_to_del:
179-
rows_to_keep = set(range(len(data))) - rows_to_del
180-
data = data.take(list(rows_to_keep))
172+
isteps = _extract_rsnap_isteps(rproffile, data)
181173

182-
id_arr = [[], []]
183-
for istep, _, n_z in isteps:
184-
id_arr[0].extend(repeat(istep, n_z))
185-
id_arr[1].extend(range(n_z))
186-
187-
data.index = id_arr
188-
189-
ncols = data.shape[1]
190-
_tidy_names(colnames, ncols)
191-
data.columns = colnames
174+
data = {}
175+
for istep, _, step_df in isteps:
176+
step_df.index = range(step_df.shape[0]) # check whether necessary
177+
step_cols = list(colnames)
178+
_tidy_names(step_cols, step_df.shape[1])
179+
step_df.columns = step_cols
180+
data[istep] = step_df
192181

193182
df_times = pd.DataFrame(list(map(itemgetter(1), isteps)),
194183
index=map(itemgetter(0), isteps))
@@ -207,38 +196,32 @@ def rprof_h5(rproffile, colnames):
207196
:data:`rproffile`.
208197
209198
Returns:
210-
tuple of :class:`pandas.DataFrame`: (profs, times)
211-
:data:`profs` are the radial profiles, with the variables in
212-
columns and rows double-indexed with the time step and the radial
213-
index of numerical cells.
199+
tuple: (profs, times)
200+
:data:`profs` is a dict mapping istep to radial profiles
201+
:class:`pandas.DataFrame`.
214202
215203
:data:`times` is the dimensionless time indexed by time steps.
216204
"""
217205
if not rproffile.is_file():
218-
return None, None
206+
return {}, None
219207
isteps = []
208+
data = {}
220209
with h5py.File(rproffile, 'r') as h5f:
221210
dnames = sorted(dname for dname in h5f.keys()
222211
if dname.startswith('rprof_'))
223-
ncols = h5f['names'].shape[0]
224212
h5names = map(bytes.decode, h5f['names'][len(colnames):])
225-
_tidy_names(colnames, ncols, h5names)
226-
data = np.zeros((0, ncols))
227213
for dname in dnames:
228214
dset = h5f[dname]
229-
data = np.concatenate((data, dset[()]))
230-
isteps.append((dset.attrs['istep'], dset.attrs['time'],
231-
dset.shape[0]))
232-
233-
id_arr = [[], []]
234-
for istep, _, n_z in isteps:
235-
id_arr[0].extend(repeat(istep, n_z))
236-
id_arr[1].extend(range(n_z))
215+
arr = dset[()]
216+
istep = dset.attrs['istep']
217+
step_cols = list(colnames)
218+
_tidy_names(step_cols, arr.shape[1], h5names) # check shape
219+
data[istep] = pd.DataFrame(arr, columns=step_cols)
220+
isteps.append((istep, dset.attrs['time']))
237221

238-
df_data = pd.DataFrame(data, index=id_arr, columns=colnames)
239222
df_times = pd.DataFrame(list(map(itemgetter(1), isteps)),
240223
index=map(itemgetter(0), isteps))
241-
return df_data, df_times
224+
return data, df_times
242225

243226

244227
def _clean_names_refstate(names):

tests/test_parsers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@ def test_time_series_invalid_prs():
1616
def test_rprof_prs(sdat):
1717
names = ['aa', 'bb', 'cc']
1818
data, time = prs.rprof(sdat.filename('rprof.dat'), list(names))
19-
assert (data.columns[:3] == names).all()
20-
assert (data.columns[3:] == list(range(data.shape[1] - 3))).all()
19+
assert all((df.columns[:3] == names).all() for df in data.values())
20+
assert all((df.columns[3:] == list(range(df.shape[1] - 3))).all()
21+
for df in data.values())
2122

2223

2324
def test_rprof_invalid_prs():
24-
assert prs.rprof(pathlib.Path('dummy'), []) == (None, None)
25+
assert prs.rprof(pathlib.Path('dummy'), []) == ({}, None)
2526

2627

2728
def test_fields_prs(sdat):

tests/test_stagyydata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def test_sdat_tseries(sdat):
3838
assert isinstance(sdat.tseries, pandas.DataFrame)
3939

4040

41-
def test_sdat_rprof(sdat):
42-
assert isinstance(sdat.rprof, pandas.DataFrame)
41+
def test_sdat_rtimes(sdat):
42+
assert isinstance(sdat.rtimes, pandas.DataFrame)
4343

4444

4545
def test_sdat_walk_dflt(sdat):

0 commit comments

Comments
 (0)