11import math
2+ import numpy as np
23
34
45class NcsSections :
@@ -7,7 +8,7 @@ class NcsSections:
78 Methods of NcsSectionsFactory perform parsing of this information from an Ncs file and
89 produce these where the sections are discontiguous in time and in temporal order.
910
10- TODO: This class will likely need __eq__, __ne__, and __hash__ to be useful in
11+ TODO: This class will likely need __ne__ to be useful in
1112 more sophisticated segment construction algorithms.
1213
1314 """
@@ -16,6 +17,16 @@ def __init__(self):
1617 self .sampFreqUsed = 0 # actual sampling frequency of samples
1718 self .microsPerSampUsed = 0 # microseconds per sample
1819
20+ def __eq__ (self , other ):
21+ samp_eq = self .sampFreqUsed == other .sampFreqUsed
22+ micros_eq = self .microsPerSampUsed == other .microsPerSampUsed
23+ sects_eq = self .sects == other .sects
24+ return (samp_eq and micros_eq and sects_eq )
25+
26+ def __hash__ (self ):
27+ return (f'{ self .sampFreqUsed } ;{ self .microsPerSampUsed } ;'
28+ f'{ [s .__hash__ () for s in self .sects ]} ' ).__hash__ ()
29+
1930
2031class NcsSection :
2132 """
@@ -37,11 +48,23 @@ def __init__(self):
3748 self .endTime = - 1 # end time of last record, that is, the end time of the last
3849 # sampling period contained in the last record of the section
3950
40- def __init__ (self , sb , st , eb , et ):
51+ def __init__ (self , sb , st , eb , et , ns ):
4152 self .startRec = sb
4253 self .startTime = st
4354 self .endRec = eb
4455 self .endTime = et
56+ self .n_samples = ns
57+
58+ def __eq__ (self , other ):
59+ return (self .startRec == other .startRec
60+ and self .startTime == other .startTime
61+ and self .endRec == other .endRec
62+ and self .endTime == other .endTime
63+ and self .n_samples == other .n_samples )
64+
65+ def __hash__ (self ):
66+ s = f'{ self .startRec } ;{ self .startTime } ;{ self .endRec } ;{ self .endTime } ;{ self .n_samples } '
67+ return s .__hash__ ()
4568
4669 def before_time (self , rhb ):
4770 """
@@ -124,32 +147,38 @@ def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePred
124147 NcsSections object with block locations marked
125148 """
126149 startBlockPredTime = blkOnePredTime
127- blkLen = 0
150+ blk_len = 0
128151 curBlock = ncsSects .sects [0 ]
129152 for recn in range (1 , ncsMemMap .shape [0 ]):
130- if ncsMemMap ['channel_id' ][recn ] != chanNum or \
131- ncsMemMap ['sample_rate' ][recn ] != reqFreq :
153+ timestamp = ncsMemMap ['timestamp' ][recn ]
154+ channel_id = ncsMemMap ['channel_id' ][recn ]
155+ sample_rate = ncsMemMap ['sample_rate' ][recn ]
156+ nb_valid = ncsMemMap ['nb_valid' ][recn ]
157+
158+ if channel_id != chanNum or sample_rate != reqFreq :
132159 raise IOError ('Channel number or sampling frequency changed in ' +
133160 'records within file' )
134161 predTime = NcsSectionsFactory .calc_sample_time (ncsSects .sampFreqUsed ,
135- startBlockPredTime , blkLen )
136- ts = ncsMemMap ['timestamp' ][recn ]
137- nValidSamps = ncsMemMap ['nb_valid' ][recn ]
138- if ts != predTime :
162+ startBlockPredTime , blk_len )
163+ nValidSamps = nb_valid
164+ if timestamp != predTime :
139165 curBlock .endRec = recn - 1
140166 curBlock .endTime = predTime
141- curBlock = NcsSection (recn , ts , - 1 , - 1 )
167+ curBlock .n_samples = blk_len
168+ curBlock = NcsSection (recn , timestamp , - 1 , - 1 , - 1 )
142169 ncsSects .sects .append (curBlock )
143170 startBlockPredTime = NcsSectionsFactory .calc_sample_time (
144- ncsSects .sampFreqUsed , ts , nValidSamps )
145- blkLen = 0
171+ ncsSects .sampFreqUsed ,
172+ timestamp ,
173+ nValidSamps )
174+ blk_len = 0
146175 else :
147- blkLen += nValidSamps
176+ blk_len += nValidSamps
148177
149178 curBlock .endRec = ncsMemMap .shape [0 ] - 1
150179 endTime = NcsSectionsFactory .calc_sample_time (ncsSects .sampFreqUsed ,
151180 startBlockPredTime ,
152- blkLen )
181+ blk_len )
153182 curBlock .endTime = endTime
154183
155184 return ncsSects
@@ -199,15 +228,16 @@ def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq):
199228 ncsMemMap ['sample_rate' ][lastBlkI ] == reqFreq and \
200229 lts == predLastBlockStartTime :
201230 lastBlkEndTime = NcsSectionsFactory .calc_sample_time (actualSampFreq , lts , lnb )
202- curBlock = NcsSection (0 , ts0 , lastBlkI , lastBlkEndTime )
231+ n_samples = NcsSection ._RECORD_SIZE * lastBlkI
232+ curBlock = NcsSection (0 , ts0 , lastBlkI , lastBlkEndTime , n_samples )
203233
204234 nb .sects .append (curBlock )
205235 return nb
206236
207237 # otherwise need to scan looking for breaks
208238 else :
209239 blkOnePredTime = NcsSectionsFactory .calc_sample_time (actualSampFreq , ts0 , nb0 )
210- curBlock = NcsSection (0 , ts0 , - 1 , - 1 )
240+ curBlock = NcsSection (0 , ts0 , - 1 , - 1 , - 1 )
211241 nb .sects .append (curBlock )
212242 return NcsSectionsFactory ._parseGivenActualFrequency (ncsMemMap , nb , chanNum , reqFreq ,
213243 blkOnePredTime )
@@ -233,60 +263,72 @@ def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen):
233263 largest block
234264 """
235265
236- # track frequency of each block and use estimate with longest block
237- maxBlkLen = 0
238- maxBlkFreqEstimate = 0
239-
240- # Parse the record sequence, finding blocks of continuous time with no more than
241- # maxGapLength and same channel number
242266 chanNum = ncsMemMap ['channel_id' ][0 ]
243-
244- startBlockTime = ncsMemMap ['timestamp' ][0 ]
245- blkLen = ncsMemMap ['nb_valid' ][0 ]
246- lastRecTime = startBlockTime
247- lastRecNumSamps = blkLen
248267 recFreq = ncsMemMap ['sample_rate' ][0 ]
249268
250- curBlock = NcsSection (0 , startBlockTime , - 1 , - 1 )
251- ncsSects .sects .append (curBlock )
252- for recn in range (1 , ncsMemMap .shape [0 ]):
253- if ncsMemMap ['channel_id' ][recn ] != chanNum or \
254- ncsMemMap ['sample_rate' ][recn ] != recFreq :
255- raise IOError ('Channel number or sampling frequency changed in ' +
256- 'records within file' )
257- predTime = NcsSectionsFactory .calc_sample_time (ncsSects .sampFreqUsed , lastRecTime ,
258- lastRecNumSamps )
259- ts = ncsMemMap ['timestamp' ][recn ]
260- nb = ncsMemMap ['nb_valid' ][recn ]
261- if abs (ts - predTime ) > maxGapLen :
262- curBlock .endRec = recn - 1
263- curBlock .endTime = predTime
264- curBlock = NcsSection (recn , ts , - 1 , - 1 )
265- ncsSects .sects .append (curBlock )
266- if blkLen > maxBlkLen :
267- maxBlkLen = blkLen
268- maxBlkFreqEstimate = (blkLen - lastRecNumSamps ) * 1e6 / \
269- (lastRecTime - startBlockTime )
270- startBlockTime = ts
271- blkLen = nb
272- else :
273- blkLen += nb
274- lastRecTime = ts
275- lastRecNumSamps = nb
276-
277- if blkLen > maxBlkLen :
278- maxBlkFreqEstimate = (blkLen - lastRecNumSamps ) * 1e6 / \
279- (lastRecTime - startBlockTime )
280-
281- curBlock .endRec = ncsMemMap .shape [0 ] - 1
282- endTime = NcsSectionsFactory .calc_sample_time (ncsSects .sampFreqUsed , lastRecTime ,
283- lastRecNumSamps )
284- curBlock .endTime = endTime
269+ # check for consistent channel_ids and sampling rates
270+ ncsMemMap ['channel_id' ]
271+ if not (ncsMemMap ['channel_id' ] == chanNum ).all ():
272+ raise IOError ('Channel number changed in records within file' )
273+
274+ if not all (ncsMemMap ['sample_rate' ] == recFreq ):
275+ raise IOError ('Sampling frequency changed in records within file' )
276+
277+ # find most frequent number of samples
278+ exp_nb_valid = np .argmax (np .bincount (ncsMemMap ['nb_valid' ]))
279+ # detect records with incomplete number of samples
280+ gap_rec_ids = list (np .where (ncsMemMap ['nb_valid' ] != exp_nb_valid )[0 ])
281+
282+ rec_duration = 1e6 / ncsSects .sampFreqUsed * ncsMemMap ['nb_valid' ]
283+ pred_times = np .rint (ncsMemMap ['timestamp' ] + rec_duration ).astype (np .int64 )
284+ max_pred_times = pred_times + maxGapLen
285+ # data records that start later than the predicted time (including the
286+ # maximal accepted gap length) are considered delayed and a gap is
287+ # registered.
288+ delayed_recs = list (np .where (max_pred_times [:- 1 ] < ncsMemMap ['timestamp' ][1 :])[0 ])
289+ gap_rec_ids .extend (delayed_recs )
290+
291+ # cleaning extracted gap ids
292+ # last record can not be the beginning of a gap
293+ last_rec_id = len (ncsMemMap ['timestamp' ]) - 1
294+ if last_rec_id in gap_rec_ids :
295+ gap_rec_ids .remove (last_rec_id )
296+
297+ # gap ids can only be listed once
298+ gap_rec_ids = sorted (set (gap_rec_ids ))
299+
300+ # create recording segments from identified gaps
301+ ncsSects .sects .append (NcsSection (0 , ncsMemMap ['timestamp' ][0 ], - 1 , - 1 , - 1 ))
302+ for gap_rec_id in gap_rec_ids :
303+ curr_sec = ncsSects .sects [- 1 ]
304+ curr_sec .endRec = gap_rec_id
305+ curr_sec .endTime = pred_times [gap_rec_id ]
306+ n_samples = np .sum (ncsMemMap ['nb_valid' ][curr_sec .startRec :gap_rec_id + 1 ])
307+ curr_sec .n_samples = n_samples
308+
309+ next_sec = NcsSection (gap_rec_id + 1 ,
310+ ncsMemMap ['timestamp' ][gap_rec_id + 1 ], - 1 , - 1 , - 1 )
311+ ncsSects .sects .append (next_sec )
312+
313+ curr_sec = ncsSects .sects [- 1 ]
314+ curr_sec .endRec = len (ncsMemMap ['timestamp' ]) - 1
315+ curr_sec .endTime = pred_times [- 1 ]
316+ n_samples = np .sum (ncsMemMap ['nb_valid' ][curr_sec .startRec :])
317+ curr_sec .n_samples = n_samples
318+
319+ # calculate the estimated frequency of the block with the most samples
320+ max_blk_idx = np .argmax ([bl .endRec - bl .startRec for bl in ncsSects .sects ])
321+ max_blk = ncsSects .sects [max_blk_idx ]
322+
323+ maxBlkFreqEstimate = (max_blk .n_samples - ncsMemMap ['nb_valid' ][max_blk .endRec ]) * 1e6 / \
324+ (ncsMemMap ['timestamp' ][max_blk .endRec ] - max_blk .startTime )
285325
286326 ncsSects .sampFreqUsed = maxBlkFreqEstimate
287327 ncsSects .microsPerSampUsed = NcsSectionsFactory .get_micros_per_samp_for_freq (
288328 maxBlkFreqEstimate )
289-
329+ # free memory that is unnecessarily occupied by the memmap
330+ # (see https://github.com/numpy/numpy/issues/19340)
331+ del ncsMemMap
290332 return ncsSects
291333
292334 @staticmethod
@@ -325,7 +367,7 @@ def _buildForMaxGap(ncsMemMap, nomFreq):
325367 freqInFile = math .floor (nomFreq )
326368 if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile :
327369 endTime = NcsSectionsFactory .calc_sample_time (nomFreq , lts , lnb )
328- curBlock = NcsSection (0 , ts0 , lastBlkI , endTime )
370+ curBlock = NcsSection (0 , ts0 , lastBlkI , endTime , numSampsForPred )
329371 nb .sects .append (curBlock )
330372 nb .sampFreqUsed = numSampsForPred / (lts - ts0 ) * 1e6
331373 nb .microsPerSampUsed = NcsSectionsFactory .get_micros_per_samp_for_freq (nb .sampFreqUsed )
0 commit comments