66 of :class:`~stagpy.stagyydata.StagyyData`.
77"""
88from functools import partial
9- from itertools import product , repeat
9+ from itertools import product
1010from operator import itemgetter
1111from xml .etree import ElementTree as xmlET
1212import re
@@ -112,11 +112,10 @@ def time_series_h5(timefile, colnames):
112112 return pdf .loc [~ pdf .index .duplicated (keep = 'last' )]
113113
114114
115- def _extract_rsnap_isteps (rproffile ):
116- """Extract istep and compute list of rows to delete ."""
115+ def _extract_rsnap_isteps (rproffile , data ):
116+ """Extract istep, time and build separate rprof df ."""
117117 step_regex = re .compile (r'^\*+step:\s*(\d+) ; time =\s*(\S+)' )
118- isteps = [] # list of (istep, time, nz)
119- rows_to_del = set ()
118+ isteps = [] # list of (istep, time, df)
120119 line = ' '
121120 with rproffile .open () as stream :
122121 while line [0 ] != '*' :
@@ -128,22 +127,19 @@ def _extract_rsnap_isteps(rproffile):
128127 iline = 0
129128 for line in stream :
130129 if line [0 ] == '*' :
131- isteps .append ((istep , time , nlines ))
130+ isteps .append ((istep , time , data . iloc [ iline - nlines : iline ] ))
132131 match = step_regex .match (line )
133132 istep = int (match .group (1 ))
134133 time = float (match .group (2 ))
135134 nlines = 0
136135 # remove useless lines produced when run is restarted
137- nrows_to_del = 0
138136 while isteps and istep <= isteps [- 1 ][0 ]:
139- nrows_to_del += isteps .pop ()[- 1 ]
140- rows_to_del = rows_to_del .union (
141- range (iline - nrows_to_del , iline ))
137+ isteps .pop ()
142138 else :
143139 nlines += 1
144140 iline += 1
145- isteps .append ((istep , time , nlines ))
146- return isteps , rows_to_del
141+ isteps .append ((istep , time , data . iloc [ iline - nlines : iline ] ))
142+ return isteps
147143
148144
149145def rprof (rproffile , colnames ):
@@ -156,39 +152,32 @@ def rprof(rproffile, colnames):
156152 Args:
157153 rproffile (:class:`pathlib.Path`): path of the rprof.dat file.
158154 colnames (list of names): names of the variables expected in
159- :data:`rproffile` (may be modified) .
155+ :data:`rproffile`.
160156
161157 Returns:
162- tuple of :class:`pandas.DataFrame`: (profs, times)
163- :data:`profs` are the radial profiles, with the variables in
164- columns and rows double-indexed with the time step and the radial
165- index of numerical cells.
158+ tuple: (profs, times)
159+ :data:`profs` is a dict mapping istep to radial profiles
160+ :class:`pandas.DataFrame`.
166161
167162 :data:`times` is the dimensionless time indexed by time steps.
168163 """
169164 if not rproffile .is_file ():
170- return None , None
165+ return {} , None
171166 data = pd .read_csv (rproffile , delim_whitespace = True , dtype = str ,
172167 header = None , comment = '*' , skiprows = 1 ,
173168 engine = 'c' , memory_map = True ,
174169 error_bad_lines = False , warn_bad_lines = False )
175170 data = data .apply (pd .to_numeric , raw = True , errors = 'coerce' )
176171
177- isteps , rows_to_del = _extract_rsnap_isteps (rproffile )
178- if rows_to_del :
179- rows_to_keep = set (range (len (data ))) - rows_to_del
180- data = data .take (list (rows_to_keep ))
172+ isteps = _extract_rsnap_isteps (rproffile , data )
181173
182- id_arr = [[], []]
183- for istep , _ , n_z in isteps :
184- id_arr [0 ].extend (repeat (istep , n_z ))
185- id_arr [1 ].extend (range (n_z ))
186-
187- data .index = id_arr
188-
189- ncols = data .shape [1 ]
190- _tidy_names (colnames , ncols )
191- data .columns = colnames
174+ data = {}
175+ for istep , _ , step_df in isteps :
176+ step_df .index = range (step_df .shape [0 ]) # check whether necessary
177+ step_cols = list (colnames )
178+ _tidy_names (step_cols , step_df .shape [1 ])
179+ step_df .columns = step_cols
180+ data [istep ] = step_df
192181
193182 df_times = pd .DataFrame (list (map (itemgetter (1 ), isteps )),
194183 index = map (itemgetter (0 ), isteps ))
@@ -207,38 +196,32 @@ def rprof_h5(rproffile, colnames):
207196 :data:`rproffile`.
208197
209198 Returns:
210- tuple of :class:`pandas.DataFrame`: (profs, times)
211- :data:`profs` are the radial profiles, with the variables in
212- columns and rows double-indexed with the time step and the radial
213- index of numerical cells.
199+ tuple: (profs, times)
200+ :data:`profs` is a dict mapping istep to radial profiles
201+ :class:`pandas.DataFrame`.
214202
215203 :data:`times` is the dimensionless time indexed by time steps.
216204 """
217205 if not rproffile .is_file ():
218- return None , None
206+ return {} , None
219207 isteps = []
208+ data = {}
220209 with h5py .File (rproffile , 'r' ) as h5f :
221210 dnames = sorted (dname for dname in h5f .keys ()
222211 if dname .startswith ('rprof_' ))
223- ncols = h5f ['names' ].shape [0 ]
224212 h5names = map (bytes .decode , h5f ['names' ][len (colnames ):])
225- _tidy_names (colnames , ncols , h5names )
226- data = np .zeros ((0 , ncols ))
227213 for dname in dnames :
228214 dset = h5f [dname ]
229- data = np .concatenate ((data , dset [()]))
230- isteps .append ((dset .attrs ['istep' ], dset .attrs ['time' ],
231- dset .shape [0 ]))
232-
233- id_arr = [[], []]
234- for istep , _ , n_z in isteps :
235- id_arr [0 ].extend (repeat (istep , n_z ))
236- id_arr [1 ].extend (range (n_z ))
215+ arr = dset [()]
216+ istep = dset .attrs ['istep' ]
217+ step_cols = list (colnames )
218+ _tidy_names (step_cols , arr .shape [1 ], h5names ) # check shape
219+ data [istep ] = pd .DataFrame (arr , columns = step_cols )
220+ isteps .append ((istep , dset .attrs ['time' ]))
237221
238- df_data = pd .DataFrame (data , index = id_arr , columns = colnames )
239222 df_times = pd .DataFrame (list (map (itemgetter (1 ), isteps )),
240223 index = map (itemgetter (0 ), isteps ))
241- return df_data , df_times
224+ return data , df_times
242225
243226
244227def _clean_names_refstate (names ):
0 commit comments