1818 _spike_channel_dtype ,
1919 _event_channel_dtype ,
2020)
21+
22+ import numpy as np
23+ import json
24+ import warnings
2125from neo .core import NeoReadWriteError
2226
2327
@@ -29,6 +33,14 @@ class BiocamRawIO(BaseRawIO):
2933 ----------
3034 filename: str, default: ''
3135 The *.h5 file to be read
36+ fill_gaps_strategy: "zeros" | "synthetic_noise" | None, default: None
37+ The strategy to fill the gaps in the data when using event-based
38+ compression. If None and the file is event-based compressed,
39+ you need to specify a fill gaps strategy:
40+
41+ * "zeros": the gaps are filled with unsigned 0s (2048). This value is the "0" of the unsigned 12 bits
42+ representation of the data.
43+ * "synthetic_noise": the gaps are filled with synthetic noise.
3244
3345 Examples
3446 --------
@@ -49,9 +61,10 @@ class BiocamRawIO(BaseRawIO):
4961 extensions = ["h5" , "brw" ]
5062 rawmode = "one-file"
5163
52- def __init__ (self , filename = "" ):
64+ def __init__ (self , filename = "" , fill_gaps_strategy = "zeros" ):
5365 BaseRawIO .__init__ (self )
5466 self .filename = filename
67+ self ._fill_gaps_strategy = fill_gaps_strategy
5568
5669 def _source_name (self ):
5770 return self .filename
@@ -130,7 +143,24 @@ def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, strea
130143 i_stop = self ._num_frames
131144
132145 # read functions are different based on the version of biocam
133- data = self ._read_function (self ._filehandle , i_start , i_stop , self ._num_channels )
146+ if self ._read_function is readHDF5t_brw4_sparse :
147+ if self ._fill_gaps_strategy is None :
148+ raise ValueError (
149+ "Please set `fill_gaps_strategy` to 'zeros' or 'synthetic_noise'."
150+ )
151+ if self ._fill_gaps_strategy == "synthetic_noise" :
152+ warnings .warn ("Event-based compression : gaps will be filled with synthetic noise. "
153+ "Set `fill_gaps_strategy` to 'zeros' to fill gaps with 0s." )
154+ use_synthetic_noise = True
155+ elif self ._fill_gaps_strategy == "zeros" :
156+ use_synthetic_noise = False
157+ else :
158+ raise ValueError ("`fill_gaps_strategy` must be 'zeros' or 'synthetic_noise'" )
159+
160+ data = self ._read_function (self ._filehandle , i_start , i_stop , self ._num_channels ,
161+ use_synthetic_noise = use_synthetic_noise )
162+ else :
163+ data = self ._read_function (self ._filehandle , i_start , i_stop , self ._num_channels )
134164
135165 # older style data returns array of (n_samples, n_channels), should be a view
136166 # but if memory issues come up we should doublecheck out how the file is being stored
@@ -243,15 +273,21 @@ def open_biocam_file_header(filename) -> dict:
243273 min_digital = experiment_settings ["ValueConverter" ]["MinDigitalValue" ]
244274 scale_factor = experiment_settings ["ValueConverter" ]["ScaleFactor" ]
245275 sampling_rate = experiment_settings ["TimeConverter" ]["FrameRate" ]
276+ num_frames = rf ['TOC' ][- 1 ,- 1 ]
246277
247278 num_channels = None
248- for key in rf :
249- if key [:5 ] == "Well_" :
250- num_channels = len (rf [key ]["StoredChIdxs" ])
251- if len (rf [key ]["Raw" ]) % num_channels :
252- raise NeoReadWriteError (f"Length of raw data array is not multiple of channel number in { key } " )
253- num_frames = len (rf [key ]["Raw" ]) // num_channels
254- break
279+ well_ID = None
280+ for well_ID in rf :
281+ if well_ID .startswith ("Well_" ):
282+ num_channels = len (rf [well_ID ]["StoredChIdxs" ])
283+ if "Raw" in rf [well_ID ]:
284+ if len (rf [well_ID ]["Raw" ]) % num_channels :
285+ raise NeoReadWriteError (f"Length of raw data array is not multiple of channel number in { well_ID } " )
286+ num_frames = len (rf [well_ID ]["Raw" ]) // num_channels
287+ break
288+ elif "EventsBasedSparseRaw" in rf [well_ID ]:
289+ # Not sure how to check for this with sparse data
290+ pass
255291
256292 if num_channels is not None :
257293 num_channels_x = num_channels_y = int (np .sqrt (num_channels ))
@@ -264,7 +300,10 @@ def open_biocam_file_header(filename) -> dict:
264300
265301 gain = scale_factor * (max_uv - min_uv ) / (max_digital - min_digital )
266302 offset = min_uv
267- read_function = readHDF5t_brw4
303+ if "Raw" in rf [well_ID ]:
304+ read_function = readHDF5t_brw4
305+ elif "EventsBasedSparseRaw" in rf [well_ID ]:
306+ read_function = readHDF5t_brw4_sparse
268307
269308 return dict (
270309 file_handle = rf ,
@@ -302,5 +341,120 @@ def readHDF5t_101_i(rf, t0, t1, nch):
302341
303342def readHDF5t_brw4 (rf , t0 , t1 , nch ):
304343 for key in rf :
305- if key [: 5 ] == "Well_" :
344+ if key . startswith ( "Well_" ) :
306345 return rf [key ]["Raw" ][nch * t0 : nch * t1 ]
346+
347+
348+ def readHDF5t_brw4_sparse (rf , t0 , t1 , nch , use_synthetic_noise = False ):
349+
350+ # noise_std = None
351+ start_frame = t0
352+ num_frames = t1 - t0
353+ for well_ID in rf :
354+ if well_ID .startswith ("Well_" ):
355+ break
356+ # initialize an empty (fill with zeros) data collection
357+ data = np .zeros ((nch , num_frames ), dtype = np .uint16 )
358+ if not use_synthetic_noise :
359+ # Will read as 0s after 12 bits signed conversion
360+ data .fill (2048 )
361+ else :
362+ # fill the data collection with Gaussian noise if requested
363+ data = generate_synthetic_noise (rf , data , well_ID , start_frame , num_frames ) #, std=noise_std)
364+ # fill the data collection with the decoded event based sparse raw data
365+ data = decode_event_based_raw_data (rf , data , well_ID , start_frame , num_frames )
366+
367+ return data .T
368+
369+
370+ def decode_event_based_raw_data (rf , data , well_ID , start_frame , num_frames ):
371+ # Source: Documentation by 3Brain
372+ # https://gin.g-node.org/NeuralEnsemble/ephy_testing_data/src/master/biocam/documentation_brw_4.x_bxr_3.x_bcmp_1.x_in_brainwave_5.x_v1.1.3.pdf
373+ # collect the TOCs
374+ toc = np .array (rf ["TOC" ])
375+ events_toc = np .array (rf [well_ID ]["EventsBasedSparseRawTOC" ])
376+ # from the given start position and duration in frames, localize the corresponding event positions
377+ # using the TOC
378+ toc_start_idx = np .searchsorted (toc [:, 1 ], start_frame )
379+ toc_end_idx = min (
380+ np .searchsorted (toc [:, 1 ], start_frame + num_frames , side = "right" ) + 1 ,
381+ len (toc ) - 1 )
382+ events_start_pos = events_toc [toc_start_idx ]
383+ events_end_pos = events_toc [toc_end_idx ]
384+ # decode all data for the given well ID and time interval
385+ binary_data = rf [well_ID ]["EventsBasedSparseRaw" ][events_start_pos :events_end_pos ]
386+ binary_data_length = len (binary_data )
387+ pos = 0
388+ while pos < binary_data_length :
389+ ch_idx = int .from_bytes (binary_data [pos :pos + 4 ], byteorder = "little" )
390+ pos += 4
391+ ch_data_length = int .from_bytes (binary_data [pos :pos + 4 ], byteorder = "little" )
392+ pos += 4
393+ ch_data_pos = pos
394+ while pos < ch_data_pos + ch_data_length :
395+ from_inclusive = int .from_bytes (binary_data [pos :pos + 8 ], byteorder = "little" )
396+ pos += 8
397+ to_exclusive = int .from_bytes (binary_data [pos :pos + 8 ], byteorder = "little" )
398+ pos += 8
399+ range_data_pos = pos
400+ for j in range (from_inclusive , to_exclusive ):
401+ if j >= start_frame + num_frames :
402+ break
403+ if j >= start_frame :
404+ data [ch_idx ][j - start_frame ] = int .from_bytes (
405+ binary_data [range_data_pos :range_data_pos + 2 ], byteorder = "little" )
406+ range_data_pos += 2
407+ pos += (to_exclusive - from_inclusive ) * 2
408+
409+ return data
410+
411+ def generate_synthetic_noise (rf , data , well_ID , start_frame , num_frames ):
412+ # Source: Documentation by 3Brain
413+ # https://gin.g-node.org/NeuralEnsemble/ephy_testing_data/src/master/biocam/documentation_brw_4.x_bxr_3.x_bcmp_1.x_in_brainwave_5.x_v1.1.3.pdf
414+ # collect the TOCs
415+ toc = np .array (rf ["TOC" ])
416+ noise_toc = np .array (rf [well_ID ]["NoiseTOC" ])
417+ # from the given start position in frames, localize the corresponding noise positions
418+ # using the TOC
419+ toc_start_idx = np .searchsorted (toc [:, 1 ], start_frame )
420+ noise_start_pos = noise_toc [toc_start_idx ]
421+ noise_end_pos = noise_start_pos
422+ for i in range (toc_start_idx + 1 , len (noise_toc )):
423+ next_pos = noise_toc [i ]
424+ if next_pos > noise_start_pos :
425+ noise_end_pos = next_pos
426+ break
427+ if noise_end_pos == noise_start_pos :
428+ for i in range (toc_start_idx - 1 , 0 , - 1 ):
429+ previous_pos = noise_toc [i ]
430+ if previous_pos < noise_start_pos :
431+ noise_end_pos = noise_start_pos
432+ noise_start_pos = previous_pos
433+ break
434+ # obtain the noise info at the start position
435+ noise_ch_idx = rf [well_ID ]["NoiseChIdxs" ][noise_start_pos :noise_end_pos ]
436+ noise_mean = rf [well_ID ]["NoiseMean" ][noise_start_pos :noise_end_pos ]
437+ noise_std = rf [well_ID ]["NoiseStdDev" ][noise_start_pos :noise_end_pos ]
438+
439+ noise_length = noise_end_pos - noise_start_pos
440+ noise_info = {}
441+ mean_collection = []
442+ std_collection = []
443+ for i in range (1 , noise_length ):
444+ noise_info [noise_ch_idx [i ]] = [noise_mean [i ], noise_std [i ]]
445+ mean_collection .append (noise_mean [i ])
446+ std_collection .append (noise_std [i ])
447+ # calculate the median mean and standard deviation of all channels to be used for
448+ # invalid channels
449+ median_mean = np .median (mean_collection )
450+ median_std = np .median (std_collection )
451+ # fill with Gaussian noise
452+ for ch_idx in range (len (data )):
453+ if ch_idx in noise_info :
454+ data [ch_idx ] = np .array (np .random .normal (noise_info [ch_idx ][0 ], noise_info [ch_idx ][1 ],
455+ num_frames ), dtype = np .uint16 )
456+ else :
457+ data [ch_idx ] = np .array (np .random .normal (median_mean , median_std , num_frames ),
458+ dtype = np .uint16 )
459+
460+ return data
0 commit comments