int-brain-lab
diff --git a/‎ibllib/io/extractors/biased_trials.py‎
Lines changed: 18 additions & 14 deletions b/‎ibllib/io/extractors/biased_trials.py‎
Lines changed: 18 additions & 14 deletions
diff --git a/‎ibllib/io/extractors/extractor_types.json‎
Lines changed: 7 additions & 2 deletions b/‎ibllib/io/extractors/extractor_types.json‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎ibllib/io/extractors/task_extractor_map.json‎
Lines changed: 4 additions & 1 deletion b/‎ibllib/io/extractors/task_extractor_map.json‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎ibllib/oneibl/data_handlers.py‎
Lines changed: 56 additions & 14 deletions b/‎ibllib/oneibl/data_handlers.py‎
Lines changed: 56 additions & 14 deletions
diff --git a/‎ibllib/pipes/behavior_tasks.py‎
Lines changed: 18 additions & 13 deletions b/‎ibllib/pipes/behavior_tasks.py‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎ibllib/pipes/dynamic_pipeline.py‎
Lines changed: 4 additions & 1 deletion b/‎ibllib/pipes/dynamic_pipeline.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎ibllib/pipes/tasks.py‎
Lines changed: 15 additions & 10 deletions b/‎ibllib/pipes/tasks.py‎
Lines changed: 15 additions & 10 deletions
@@ -17,9 +17,7 @@
 
 
 class ContrastLR(BaseBpodTrialsExtractor):
-    """
-    Get left and right contrasts from raw datafile.
-    """
+    """Get left and right contrasts from raw datafile."""
     save_names = ('_ibl_trials.contrastLeft.npy', '_ibl_trials.contrastRight.npy')
     var_names = ('contrastLeft', 'contrastRight')
 
@@ -32,9 +30,7 @@ def _extract(self, **kwargs):
 
 
 class ProbaContrasts(BaseBpodTrialsExtractor):
-    """
-    Bpod pre-generated values for probabilityLeft, contrastLR, phase, quiescence
-    """
+    """Bpod pre-generated values for probabilityLeft, contrastLR, phase, quiescence."""
     save_names = ('_ibl_trials.contrastLeft.npy', '_ibl_trials.contrastRight.npy', None, None,
                   '_ibl_trials.probabilityLeft.npy', '_ibl_trials.quiescencePeriod.npy')
     var_names = ('contrastLeft', 'contrastRight', 'phase',
@@ -103,10 +99,12 @@ class TrialsTableBiased(BaseBpodTrialsExtractor):
                  'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement')
 
     def _extract(self, extractor_classes=None, **kwargs):
+        extractor_classes = extractor_classes or []
         base = [Intervals, GoCueTimes, ResponseTimes, Choice, StimOnOffFreezeTimes, ContrastLR, FeedbackTimes, FeedbackType,
                 RewardVolume, ProbabilityLeft, Wheel]
-        out, _ = run_extractor_classes(base, session_path=self.session_path, bpod_trials=self.bpod_trials, settings=self.settings,
-                                       save=False, task_collection=self.task_collection)
+        out, _ = run_extractor_classes(
+            base + extractor_classes, session_path=self.session_path, bpod_trials=self.bpod_trials,
+            settings=self.settings, save=False, task_collection=self.task_collection)
 
         table = AlfBunch({k: out.pop(k) for k in list(out.keys()) if k not in self.var_names})
         assert len(table.keys()) == 12
@@ -130,11 +128,13 @@ class TrialsTableEphys(BaseBpodTrialsExtractor):
                  'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs):
+        extractor_classes = extractor_classes or []
         base = [Intervals, GoCueTimes, ResponseTimes, Choice, StimOnOffFreezeTimes, ProbaContrasts,
                 FeedbackTimes, FeedbackType, RewardVolume, Wheel]
         # Exclude from trials table
-        out, _ = run_extractor_classes(base, session_path=self.session_path, bpod_trials=self.bpod_trials, settings=self.settings,
-                                       save=False, task_collection=self.task_collection)
+        out, _ = run_extractor_classes(
+            base + extractor_classes, session_path=self.session_path, bpod_trials=self.bpod_trials,
+            settings=self.settings, save=False, task_collection=self.task_collection)
         table = AlfBunch({k: v for k, v in out.items() if k not in self.var_names})
         assert len(table.keys()) == 12
 
@@ -158,11 +158,13 @@ class BiasedTrials(BaseBpodTrialsExtractor):
                  'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs) -> dict:
+        extractor_classes = extractor_classes or []
         base = [GoCueTriggerTimes, StimOnTriggerTimes, ItiInTimes, StimOffTriggerTimes, StimFreezeTriggerTimes,
                 ErrorCueTriggerTimes, TrialsTableBiased, IncludedTrials, PhasePosQuiescence]
         # Exclude from trials table
-        out, _ = run_extractor_classes(base, session_path=self.session_path, bpod_trials=self.bpod_trials, settings=self.settings,
-                                       save=False, task_collection=self.task_collection)
+        out, _ = run_extractor_classes(
+            base + extractor_classes, session_path=self.session_path, bpod_trials=self.bpod_trials,
+            settings=self.settings, save=False, task_collection=self.task_collection)
         return {k: out[k] for k in self.var_names}
 
 
@@ -181,13 +183,15 @@ class EphysTrials(BaseBpodTrialsExtractor):
                  'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs) -> dict:
+        extractor_classes = extractor_classes or []
         base = [GoCueTriggerTimes, StimOnTriggerTimes, ItiInTimes, StimOffTriggerTimes, StimFreezeTriggerTimes,
                 ErrorCueTriggerTimes, TrialsTableEphys, IncludedTrials, PhasePosQuiescence]
         # Get all detected TTLs. These are stored for QC purposes
         self.frame2ttl, self.audio = raw.load_bpod_fronts(self.session_path, data=self.bpod_trials)
         # Exclude from trials table
-        out, _ = run_extractor_classes(base, session_path=self.session_path, bpod_trials=self.bpod_trials, settings=self.settings,
-                                       save=False, task_collection=self.task_collection)
+        out, _ = run_extractor_classes(
+            base + extractor_classes, session_path=self.session_path, bpod_trials=self.bpod_trials,
+            settings=self.settings, save=False, task_collection=self.task_collection)
         return {k: out[k] for k in self.var_names}
 
 
 
@@ -1,4 +1,7 @@
-{"ksocha_ephysOptoStimulation": "ephys_passive_opto",
+{"THIS FILE": "SHOULD NO LONGER BE USED!",
+ "SEE": "https://github.com/int-brain-lab/project_extraction?tab=readme-ov-file#project_extraction",
+ "********": "*******************************",
+ "ksocha_ephysOptoStimulation": "ephys_passive_opto",
  "ksocha_ephysOptoChoiceWorld": "ephys_biased_opto",
  "passiveChoiceWorld": "ephys_replay",
  "opto_ephysChoiceWorld": "ephys_biased_opto",
@@ -17,5 +20,7 @@
  "_habituationChoiceWorld": "habituation",
  "_trainingChoiceWorld": "training",
  "ephysMockChoiceWorld": "mock_ephys",
- "ephys_certification": "sync_ephys"
+ "ephys_certification": "sync_ephys",
+ "trainingPhaseChoiceWorld": "training",
+ "************": "*********************"
 }
@@ -1,4 +1,7 @@
-{"ephysChoiceWorld": "EphysTrials",
+{"!!THIS FILE": "SHOULD NOT BE EDITED...",
+ "SEE": "PROJECT EXTRACTION REPO!!",
+ "************": "**********************",
+ "ephysChoiceWorld": "EphysTrials",
  "_biasedChoiceWorld": "BiasedTrials",
  "_habituationChoiceWorld": "HabituationTrials",
  "_trainingChoiceWorld": "TrainingTrials",
 
@@ -34,16 +34,14 @@ def __init__(self, session_path, signature, one=None):
         self.session_path = session_path
         self.signature = signature
         self.one = one
+        self.processed = {}  # Map of filepaths and their processed records (e.g. upload receipts or Alyx records)
 
     def setUp(self):
         """Function to optionally overload to download required data to run task."""
         pass
 
     def getData(self, one=None):
-        """
-        Finds the datasets required for task based on input signatures
-        :return:
-        """
+        """Finds the datasets required for task based on input signatures."""
         if self.one is None and one is None:
             return
 
@@ -60,6 +58,22 @@ def getData(self, one=None):
             df = df.droplevel(level='eid')
         return df
 
+    def getOutputFiles(self):
+        assert self.session_path
+        from one.alf.io import iter_datasets
+        # Next convert datasets to frame
+        from one.alf.cache import DATASETS_COLUMNS, _get_dataset_info
+        # Create dataframe of all ALF datasets
+        dsets = iter_datasets(self.session_path)
+        records = [_get_dataset_info(self.session_path, dset, compute_hash=False) for dset in dsets]
+        df = pd.DataFrame(records, columns=DATASETS_COLUMNS)
+        from functools import partial
+        filt = partial(filter_datasets, df, wildcards=True, assert_unique=False)
+        # Filter outputs
+        dids = pd.concat(filt(filename=file[0], collection=file[1]).index for file in self.signature['output_files'])
+        present = df.loc[dids, :].copy()
+        return present
+
     def uploadData(self, outputs, version):
         """
         Function to optionally overload to upload and register data
@@ -75,10 +89,7 @@ def uploadData(self, outputs, version):
         return versions
 
     def cleanUp(self):
-        """
-        Function to optionally overload to cleanup files after running task
-        :return:
-        """
+        """Function to optionally overload to clean up files after running task."""
         pass
 
 
@@ -104,16 +115,47 @@ def __init__(self, session_path, signatures, one=None):
         """
         super().__init__(session_path, signatures, one=one)
 
-    def uploadData(self, outputs, version, **kwargs):
+    def uploadData(self, outputs, version, clobber=False, **kwargs):
         """
-        Function to upload and register data of completed task
-        :param outputs: output files from task to register
-        :param version: ibllib version
-        :return: output info of registered datasets
+        Upload and/or register output data.
+
+        This is typically called by :meth:`ibllib.pipes.tasks.Task.register_datasets`.
+
+        Parameters
+        ----------
+        outputs : list of pathlib.Path
+            A set of ALF paths to register to Alyx.
+        version : str, list of str
+            The version of ibllib used to generate these output files.
+        clobber : bool
+            If True, re-upload outputs that have already been passed to this method.
+        kwargs
+            Optional keyword arguments for one.registration.RegistrationClient.register_files.
+
+        Returns
+        -------
+        list of dicts, dict
+            A list of newly created Alyx dataset records or the registration data if dry.
         """
         versions = super().uploadData(outputs, version)
         data_repo = get_local_data_repository(self.one.alyx)
-        return register_dataset(outputs, one=self.one, versions=versions, repository=data_repo, **kwargs)
+        # If clobber = False, do not re-upload the outputs that have already been processed
+        if not isinstance(outputs, list):
+            outputs = [outputs]
+        to_upload = list(filter(None if clobber else lambda x: x not in self.processed, outputs))
+        records = register_dataset(to_upload, one=self.one, versions=versions, repository=data_repo, **kwargs) or []
+        if kwargs.get('dry', False):
+            return records
+        # Store processed outputs
+        self.processed.update({k: v for k, v in zip(to_upload, records) if v})
+        return [self.processed[x] for x in outputs if x in self.processed]
+
+    def cleanUp(self):
+        """Empties and returns the processed dataset mep."""
+        super().cleanUp()
+        processed = self.processed
+        self.processed = {}
+        return processed
 
 
 class ServerGlobusDataHandler(DataHandler):
 
@@ -8,10 +8,11 @@
 from one.api import ONE
 
 from ibllib.oneibl.registration import get_lab
+from ibllib.oneibl.data_handlers import ServerDataHandler
 from ibllib.pipes import base_tasks
 from ibllib.io.raw_data_loaders import load_settings, load_bpod_fronts
 from ibllib.qc.task_extractors import TaskQCExtractor
-from ibllib.qc.task_metrics import HabituationQC, TaskQC
+from ibllib.qc.task_metrics import HabituationQC, TaskQC, update_dataset_qc
 from ibllib.io.extractors.ephys_passive import PassiveChoiceWorld
 from ibllib.io.extractors.bpod_trials import get_bpod_extractor
 from ibllib.io.extractors.ephys_fpga import FpgaTrials, FpgaTrialsHabituation, get_sync_and_chn_map
@@ -72,9 +73,7 @@ def signature(self):
         return signature
 
     def _run(self, update=True, save=True):
-        """
-        Extracts an iblrig training session
-        """
+        """Extracts an iblrig training session."""
         trials, output_files = self.extract_behaviour(save=save)
 
         if trials is None:
@@ -296,7 +295,7 @@ def signature(self):
         }
         return signature
 
-    def _run(self, update=True, save=True):
+    def _run(self, update=True, save=True, **kwargs):
         """Extracts an iblrig training session."""
         trials, output_files = self.extract_behaviour(save=save)
         if trials is None:
@@ -305,7 +304,16 @@ def _run(self, update=True, save=True):
             return output_files
 
         # Run the task QC
-        self.run_qc(trials)
+        qc = self.run_qc(trials, update=update, **kwargs)
+        if update and not self.one.offline:
+            on_server = self.location == 'server' and isinstance(self.data_handler, ServerDataHandler)
+            if not on_server:
+                _logger.warning('Updating dataset QC only supported on local servers')
+            else:
+                labs = get_lab(self.session_path, self.one.alyx)
+                # registered_dsets = self.register_datasets(labs=labs)
+                datasets = self.data_handler.uploadData(output_files, self.version, labs=labs)
+                update_dataset_qc(qc, datasets, self.one)
 
         return output_files
 
@@ -467,14 +475,11 @@ def run_qc(self, trials_data=None, update=False, plot_qc=False, QC=None):
         return qc
 
     def _run(self, update=True, plot_qc=True, save=True):
-        dsets, out_files = self.extract_behaviour(save=save)
-
-        if not self.one or self.one.offline:
-            return out_files
+        output_files = super()._run(update=update, save=save, plot_qc=plot_qc)
+        if update and not self.one.offline:
+            self._behaviour_criterion(update=update)
 
-        self._behaviour_criterion(update=update)
-        self.run_qc(dsets, update=update, plot_qc=plot_qc)
-        return out_files
+        return output_files
 
 
 class ChoiceWorldTrialsTimeline(ChoiceWorldTrialsNidq):
 
@@ -215,10 +215,11 @@ def make_pipeline(session_path, **pkwargs):
                 for sync_option in ('nidq', 'bpod'):
                     if sync_option in extractor.lower() and not sync == sync_option:
                         raise ValueError(f'Extractor "{extractor}" and sync "{sync}" do not match')
+                # TODO Assert sync_label correct here (currently unused)
                 # Look for the extractor in the behavior extractors module
                 if hasattr(btasks, extractor):
                     task = getattr(btasks, extractor)
-                # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for # example
+                # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for example
                 elif hasattr(btasks, extractor + sync.capitalize()):
                     task = getattr(btasks, extractor + sync.capitalize())
                 else:
@@ -229,6 +230,8 @@ def make_pipeline(session_path, **pkwargs):
                     else:
                         raise NotImplementedError(
                             f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects')
+                _logger.debug('%s (protocol #%i, task #%i) = %s.%s',
+                              protocol, i, j, task.__module__, task.__name__)
                 # Rename the class to something more informative
                 task_name = f'{task.__name__}_{i:02}'
                 # For now we assume that the second task in the list is always the trials extractor, which is dependent
 
@@ -95,15 +95,15 @@
 
 
 class Task(abc.ABC):
-    log = ''  # place holder to keep the log of the task for registration
+    log = ''  # placeholder to keep the log of the task for registration
     cpu = 1   # CPU resource
     gpu = 0   # GPU resources: as of now, either 0 or 1
     io_charge = 5  # integer percentage
     priority = 30  # integer percentage, 100 means highest priority
     ram = 4  # RAM needed to run (GB)
     one = None  # one instance (optional)
     level = 0  # level in the pipeline hierarchy: level 0 means there is no parent task
-    outputs = None  # place holder for a list of Path containing output files
+    outputs = None  # placeholder for a list of Path containing output files
     time_elapsed_secs = None
     time_out_secs = 3600 * 2  # time-out after which a task is considered dead
     version = ibllib.__version__
@@ -245,16 +245,21 @@ def run(self, **kwargs):
         self.tearDown()
         return self.status
 
-    def register_datasets(self, one=None, **kwargs):
+    def register_datasets(self, **kwargs):
         """
-        Register output datasets form the task to Alyx
-        :param one:
-        :param jobid:
-        :param kwargs: directly passed to the register_dataset function
-        :return:
+        Register output datasets from the task to Alyx.
+
+        Parameters
+        ----------
+        kwargs
+            Directly passed to the `DataHandler.upload_data` method.
+
+        Returns
+        -------
+        list
+            The output of the `DataHandler.upload_data` method, e.g. a list of registered datasets.
         """
         _ = self.register_images()
-
         return self.data_handler.uploadData(self.outputs, self.version, **kwargs)
 
     def register_images(self, **kwargs):
@@ -737,7 +742,7 @@ def run_alyx_task(tdict=None, session_path=None, one=None, job_deck=None,
     # otherwise register data and set (provisional) status to Complete
     else:
         try:
-            kwargs = dict(one=one, max_md5_size=max_md5_size)
+            kwargs = dict(max_md5_size=max_md5_size)
             if location == 'server':
                 # Explicitly pass lab as lab cannot be inferred from path (which the registration client tries to do).
                 # To avoid making extra REST requests we can also set labs=None if using ONE v1.20.1.