11import logging
22import re
3+ import shutil
34from pathlib import Path
4- from typing import Any , Literal , overload
5+ from typing import Literal , overload
56
6- import numpy as np
77import pandas as pd
8- from pandas .core .dtypes .concat import union_categoricals
98
109from ibldsp .utils import sync_timestamps
1110from ibllib .exceptions import SyncBpodFpgaException
1211from ibllib .io .extractors .ephys_fpga import BPOD_FPGA_DRIFT_THRESHOLD_PPM , get_protocol_period , get_sync_fronts
1312from ibllib .io .raw_daq_loaders import load_timeline_sync_and_chmap
1413from ibllib .pipes .base_tasks import BehaviourTask
15- from iblutil .io import jsonable
1614from one .alf import io as alfio
1715
1816logger = logging .getLogger ('ibllib.' + __name__ )
2119RE_PATTERN_EVENT = re .compile (r'^(?P<Channel>\D+\d?)_?(?P<Value>.*)$' )
2220
2321
24- def bpod_session_data_to_dataframe (bpod_data : list [dict [str , Any ]], existing_data : pd .DataFrame | None = None ) -> pd .DataFrame :
25- trials = np .arange (len (bpod_data ))
26- if existing_data is not None and 'Trial' in existing_data :
27- trials += existing_data .iloc [- 1 ].Trial + 1
28- dataframes = [] if existing_data is None or len (existing_data ) == 0 else [existing_data ]
29- for index , trial in enumerate (trials ):
30- dataframes .append (bpod_trial_data_to_dataframe (bpod_data [index ], trial ))
31- return concat_bpod_dataframes (dataframes )
32-
33-
34- def concat_bpod_dataframes (dataframes : list [pd .DataFrame ]) -> pd .DataFrame :
35- categories_type = union_categoricals ([df ['Type' ] for df in dataframes ])
36- categories_state = union_categoricals ([df ['State' ] for df in dataframes ])
37- categories_event = union_categoricals ([df ['Event' ] for df in dataframes ])
38- categories_channel = union_categoricals ([df ['Channel' ] for df in dataframes ])
39- for df in dataframes :
40- df ['Type' ] = df ['Type' ].cat .set_categories (categories_type .categories )
41- df ['State' ] = df ['State' ].cat .set_categories (categories_state .categories )
42- df ['Event' ] = df ['Event' ].cat .set_categories (categories_event .categories )
43- df ['Channel' ] = df ['Channel' ].cat .set_categories (categories_channel .categories )
44- return pd .concat (dataframes )
45-
46-
47- def bpod_trial_data_to_dataframes (
48- bpod_trial_data : list [dict [str , Any ]], existing_data : list [pd .DataFrame ] | None = None
49- ) -> list [pd .DataFrame ]:
50- dataframes = existing_data if existing_data is not None else list ()
51- trial_number = len (dataframes )
52- for single_trial_data in bpod_trial_data :
53- dataframes .append (bpod_trial_data_to_dataframe (bpod_trial_data = single_trial_data , trial = trial_number ))
54- trial_number += 1
55- return dataframes
56-
57-
58- def bpod_trial_data_to_dataframe (bpod_trial_data : dict [str , Any ], trial : int ) -> pd .DataFrame :
59- trial_start = bpod_trial_data ['Trial start timestamp' ]
60- trial_end = bpod_trial_data ['Trial end timestamp' ]
61- state_times = bpod_trial_data ['States timestamps' ].items ()
62- event_times = bpod_trial_data ['Events timestamps' ].items ()
63- event_list = [(0 , 'TrialStart' , pd .NA , pd .NA )]
64- event_list += [(t , 'StateStart' , state , pd .NA ) for state , times in state_times for t , _ in times if not np .isnan (t )]
65- event_list += [(t , 'InputEvent' , pd .NA , event ) for event , times in event_times for t in times ]
66- event_list += [(t , 'StateEnd' , state , pd .NA ) for state , times in state_times for _ , t in times if not np .isnan (t )]
67- event_list += [(trial_end - trial_start , 'TrialEnd' , pd .NA , pd .NA )]
68- event_list = sorted (event_list )
69- df = pd .DataFrame (data = event_list , columns = ['Time' , 'Type' , 'State' , 'Event' ])
70- df .Time = np .array ((df .Time + trial_start ) * 1e6 , dtype = 'timedelta64[us]' )
71- df .set_index ('Time' , inplace = True )
72- df ['Type' ] = df ['Type' ].astype ('category' )
73- df ['State' ] = df ['State' ].astype ('category' ).ffill ()
74- df ['Event' ] = df ['Event' ].astype ('category' )
75- df .insert (2 , 'Trial' , pd .to_numeric ([trial ], downcast = 'unsigned' )[0 ])
76- mappings = df ['Event' ].cat .categories .to_series ().str .extract (RE_PATTERN_EVENT , expand = True )
77- mappings ['Channel' ] = mappings ['Channel' ].astype ('category' )
78- mappings ['Value' ] = mappings ['Value' ].replace ({'Low' : '0' , 'High' : '1' , 'Out' : '0' , 'In' : '1' })
79- mappings ['Value' ] = pd .to_numeric (mappings ['Value' ], errors = 'coerce' , downcast = 'unsigned' , dtype_backend = 'numpy_nullable' )
80- df ['Channel' ] = df ['Event' ].map (mappings ['Channel' ])
81- df ['Value' ] = df ['Event' ].map (mappings ['Value' ])
82- return df
83-
84-
85- def create_dataframe (jsonable_file : Path ) -> pd .DataFrame :
86- if jsonable_file .name != '_iblrig_taskData.raw.jsonable' :
87- raise ValueError ('Input file must be named `_iblrig_taskData.raw.jsonable`' )
88- bpod_dicts = jsonable .load_task_jsonable (jsonable_file )[1 ]
89- bpod_data = bpod_session_data_to_dataframe (bpod_dicts )
90- output = bpod_data [bpod_data ['Channel' ].eq ('BNC2' )].copy ()
91- if len (output ) == 0 :
92- raise ValueError ('No audio TTLs found in the provided file' )
93- output [['Stimulus' , 'Frequency' , 'Attenuation' ]] = output ['State' ].str .extract (r'^(\d+)_(\d+|WN)[^-\d]+([-\d]+)dB$' )
94- output .replace ({'Frequency' : 'WN' }, '-1' , inplace = True )
95- output [['Stimulus' , 'Frequency' , 'Attenuation' ]] = output [['Stimulus' , 'Frequency' , 'Attenuation' ]].astype ('Int64' )
96- return output [['Trial' , 'Stimulus' , 'Value' , 'Frequency' , 'Attenuation' ]]
97-
98-
9922class TonotopicMappingBpod (BehaviourTask ):
10023 """Extract data from tonotopic mapping task - bpod time."""
10124
10225 @property
10326 def signature (self ):
10427 signature = super ().signature
10528 signature ['input_files' ] = [
106- ('_iblrig_taskData.raw.jsonable ' , self .collection , True , True ),
29+ ('_iblrig_taskData.raw.pqt ' , self .collection , True , True ),
10730 ('_iblrig_taskSettings.raw.json' , self .collection , True , True ),
10831 ]
10932 signature ['output_files' ] = [('_sp_tonotopic.trials.pqt' , self .output_collection , True )]
@@ -116,13 +39,13 @@ def extract_behaviour(self, save: bool = Literal[True]) -> tuple[pd.DataFrame, l
11639 def extract_behaviour (self , save : bool = Literal [False ]) -> tuple [pd .DataFrame , None ]: ...
11740
11841 def extract_behaviour (self , save : bool = True ) -> tuple [pd .DataFrame , list [Path ] | None ]:
119- filename_in = self .session_path .joinpath (self .collection , '_iblrig_taskData.raw.jsonable' ).absolute ()
42+ filename_in = self .session_path .joinpath (self .collection , '_iblrig_taskData.raw.pqt' ).absolute ()
43+ data = pd .read_parquet (filename_in )
12044 filename_out = []
121- data = create_dataframe (filename_in )
12245 if save :
12346 filename_out .append (self .session_path / self .output_files [0 ].glob_pattern )
12447 filename_out [0 ].parent .mkdir (exist_ok = True , parents = True )
125- data . to_parquet ( filename_out [0 ])
48+ shutil . copy ( filename_in , filename_out [0 ])
12649 return data , filename_out
12750
12851 def _run (self , overwrite : bool = False , save : bool = True ) -> list [Path ]:
0 commit comments