Merge pull request #954 from int-brain-lab/manual_curation_ssloader

mayofaulkner · web-flow · commit 20c4de332473 · 2025-03-17T20:35:36.000+01:00
Manual curation ssloader
diff --git a/brainbox/io/one.py b/brainbox/io/one.py
@@ -5,14 +5,15 @@
 import re
 import os
 from pathlib import Path
+from collections import defaultdict
 
 import numpy as np
 import pandas as pd
 from scipy.interpolate import interp1d
 import matplotlib.pyplot as plt
 
 from one.api import ONE, One
-from one.alf.path import get_alf_path, full_path_parts
+from one.alf.path import get_alf_path, full_path_parts, filename_parts
 from one.alf.exceptions import ALFObjectNotFound, ALFMultipleCollectionsFound
 from one.alf import cache
 import one.alf.io as alfio
@@ -193,9 +194,9 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch
     for pname in pnames:
         probe_collection = _get_spike_sorting_collection(collections, pname)
         spikes[pname] = one.load_object(eid, collection=probe_collection, obj='spikes',
-                                        attribute=spike_attributes)
+                                        attribute=spike_attributes, namespace='')
         clusters[pname] = one.load_object(eid, collection=probe_collection, obj='clusters',
-                                          attribute=cluster_attributes)
+                                          attribute=cluster_attributes, namespace='')
     if return_channels:
         channels = _load_channels_locations_from_disk(
             eid, collection=collection, one=one, revision=revision, brain_regions=brain_regions)
@@ -1035,7 +1036,31 @@ def load_channels(self, **kwargs):
             self.histology = 'alf'
         return Bunch(channels)
 
-    def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False, **kwargs):
+    @staticmethod
+    def filter_files_by_namespace(all_files, namespace):
+
+        # Create dict for each file with available namespaces, no namespce is stored under the key None
+        namespace_files = defaultdict(dict)
+        available_namespaces = []
+        for file in all_files:
+            fparts = filename_parts(file.name, as_dict=True)
+            fname = f"{fparts['object']}.{fparts['attribute']}"
+            nspace = fparts['namespace']
+            available_namespaces.append(nspace)
+            namespace_files[fname][nspace] = file
+
+        if namespace not in set(available_namespaces):
+            _logger.info(f'Could not find manual curation results for {namespace}, returning default'
+                         f' non manually curated spikesorting data')
+
+        # Return the files with the chosen namespace.
+        files = [f.get(namespace, f.get(None, None)) for f in namespace_files.values()]
+        # remove any None files
+        files = [f for f in files if f]
+        return files
+
+    def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False,
+                           namespace=None, **kwargs):
         """
         Loads spikes, clusters and channels
 
@@ -1053,6 +1078,8 @@ def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_ve
         :param enforce_version: if True, will raise an error if the spike sorting version and revision is not the expected one
         :param dataset_types: list of extra dataset types, for example: ['spikes.samples', 'spikes.templates']
         :param good_units: False, if True will load only the good units, possibly by downloading a smaller spikes table
+        :param namespace: None, if given will load the manually curated spikesorting with the given namespace,
+                         e.g to load '_av_.clusters.depths use namespace='av'
         :param kwargs: additional arguments to be passed to one.api.One.load_object
         :return:
         """
@@ -1061,13 +1088,21 @@ def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_ve
         self.files = {}
         self.spike_sorter = spike_sorter
         self.revision = revision
+
+        if good_units and namespace is not None:
+            _logger.info('Good units table does not exist for manually curated spike sorting. Pass in namespace with'
+                         'good_units=False and filter the spikes post hoc by the good clusters.')
+            return [None] * 3
         objects = ['passingSpikes', 'clusters', 'channels'] if good_units else None
         self.download_spike_sorting(spike_sorter=spike_sorter, revision=revision, objects=objects, **kwargs)
         channels = self.load_channels(spike_sorter=spike_sorter, revision=revision, **kwargs)
+        self.files['clusters'] = self.filter_files_by_namespace(self.files['clusters'], namespace)
         clusters = self._load_object(self.files['clusters'], wildcards=self.one.wildcards)
+
         if good_units:
             spikes = self._load_object(self.files['passingSpikes'], wildcards=self.one.wildcards)
         else:
+            self.files['spikes'] = self.filter_files_by_namespace(self.files['spikes'], namespace)
             spikes = self._load_object(self.files['spikes'], wildcards=self.one.wildcards)
         if enforce_version:
             self._assert_version_consistency()
diff --git a/ibllib/io/extractors/mesoscope.py b/ibllib/io/extractors/mesoscope.py
@@ -673,7 +673,8 @@ def _extract(self, sync=None, chmap=None, device_collection='raw_imaging_data',
             assert len(fov_time_shifts) == self.n_FOVs, f'unexpected number of FOVs for {collection}'
             ts = frame_times[np.logical_and(frame_times >= tmin, frame_times <= tmax)]
             assert ts.size >= imaging_data[
-                'times_scanImage'].size, f"fewer DAQ timestamps for {collection} than expected: DAQ/frames = {ts.size}/{imaging_data['times_scanImage'].size}"
+                'times_scanImage'].size, (f"fewer DAQ timestamps for {collection} than expected: "
+                                          f"DAQ/frames = {ts.size}/{imaging_data['times_scanImage'].size}")
             if ts.size > imaging_data['times_scanImage'].size:
                 _logger.warning(
                     'More DAQ frame times detected for %s than were found in the raw image data.\n'
diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
@@ -252,7 +252,8 @@ def run_qc(self, camera_data=None, update=True):
         if camera_data is None:
             camera_data, _ = self.extract_camera(save=False)
         qc = CameraQC(
-            self.session_path, 'left', sync_type='bpod', sync_collection=self.collection, one=self.one)
+            self.session_path, 'left', sync_type='bpod', sync_collection=self.collection, one=self.one,
+            protocol=self.protocol)
         qc.run(update=update)
         return qc
 
diff --git a/ibllib/qc/camera.py b/ibllib/qc/camera.py
@@ -135,6 +135,7 @@ def __init__(self, session_path_or_eid, camera, **kwargs):
         self.n_samples = kwargs.pop('n_samples', 100)
         self.sync_collection = kwargs.pop('sync_collection', None)
         self.sync = kwargs.pop('sync_type', None)
+        self.protocol = kwargs.pop('protocol', None)
         super().__init__(session_path_or_eid, **kwargs)
 
         # Data
@@ -163,7 +164,10 @@ def __init__(self, session_path_or_eid, camera, **kwargs):
         self.outcome = spec.QC.NOT_SET
 
         # Specify any checks to remove
-        self.checks_to_remove = []
+        if self.protocol is not None and 'habituation' in self.protocol:
+            self.checks_to_remove = ['check_wheel_alignment']
+        else:
+            self.checks_to_remove = []
         self._type = None
 
     @property
@@ -271,8 +275,12 @@ def load_data(self, extract_times: bool = False, load_video: bool = True) -> Non
                 else:
                     raise NotImplementedError(f'Unknown namespace "{ns}"')
             else:
-                wheel_data = training_wheel.get_wheel_position(
-                    self.session_path, task_collection=task_collection)
+                if self.protocol is not None and 'habituation' in self.protocol:
+                    wheel_data = training_wheel.get_wheel_position(
+                        self.session_path, task_collection=task_collection)
+                else:
+                    wheel_data = [None, None]
+
             self.data['wheel'] = Bunch(zip(wheel_keys, wheel_data))
 
         # Find short period of wheel motion for motion correlation.